1 // Copyright 2021 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
16 #define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
17 
18 #include <cassert>
19 #include <cstdint>
20 #include <iosfwd>
21 
22 #include "absl/base/config.h"
23 #include "absl/base/internal/raw_logging.h"
24 #include "absl/base/optimization.h"
25 #include "absl/strings/internal/cord_data_edge.h"
26 #include "absl/strings/internal/cord_internal.h"
27 #include "absl/strings/internal/cord_rep_flat.h"
28 #include "absl/strings/string_view.h"
29 #include "absl/types/span.h"
30 
31 namespace absl {
32 ABSL_NAMESPACE_BEGIN
33 namespace cord_internal {
34 
35 class CordRepBtreeNavigator;
36 
37 // CordRepBtree is as the name implies a btree implementation of a Cordrep tree.
38 // Data is stored at the leaf level only, non leaf nodes contain down pointers
39 // only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT
40 // or EXTERNAL nodes. The implementation allows for data to be added to either
41 // end of the tree only, it does not provide any 'insert' logic. This has the
42 // benefit that we can expect good fill ratios: all nodes except the outer
43 // 'legs' will have 100% fill ratios for trees built using Append/Prepend
44 // methods. Merged trees will typically have a fill ratio well above 50% as in a
45 // similar fashion, one side of the merged tree will typically have a 100% fill
46 // ratio, and the 'open' end will average 50%. All operations are O(log(n)) or
47 // better, and the tree never needs balancing.
48 //
49 // All methods accepting a CordRep* or CordRepBtree* adopt a reference on that
50 // input unless explicitly stated otherwise. All functions returning a CordRep*
51 // or CordRepBtree* instance transfer a reference back to the caller.
52 // Simplified, callers both 'donate' and 'consume' a reference count on each
53 // call, simplifying the API. An example of building a tree:
54 //
55 //   CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello"));
56 //   tree = CordRepBtree::Append(tree, MakeFlat("world"));
57 //
58 // In the above example, all inputs are consumed, making each call affecting
59 // `tree` reference count neutral. The returned `tree` value can be different
60 // from the input if the input is shared with other threads, or if the tree
61 // grows in height, but callers typically never have to concern themselves with
62 // that and trust that all methods DTRT at all times.
63 class CordRepBtree : public CordRep {
64  public:
65   // EdgeType identifies `front` and `back` enum values.
66   // Various implementations in CordRepBtree such as `Add` and `Edge` are
67   // generic and templated on operating on either of the boundary edges.
68   // For more information on the possible edges contained in a CordRepBtree
69   // instance see the documentation for `edges_`.
70   enum class EdgeType { kFront, kBack };
71 
72   // Convenience constants into `EdgeType`
73   static constexpr EdgeType kFront = EdgeType::kFront;
74   static constexpr EdgeType kBack = EdgeType::kBack;
75 
76   // Maximum number of edges: based on experiments and performance data, we can
77   // pick suitable values resulting in optimum cacheline aligned values. The
78   // preferred values are based on 64-bit systems where we aim to align this
79   // class onto 64 bytes, i.e.:  6 = 64 bytes, 14 = 128 bytes, etc.
80   // TODO(b/192061034): experiment with alternative sizes.
81   static constexpr size_t kMaxCapacity = 6;
82 
83   // Reasonable maximum height of the btree. We can expect a fill ratio of at
84   // least 50%: trees are always expanded at the front or back. Concatenating
85   // trees will then typically fold at the top most node, where the lower nodes
86   // are at least at capacity on one side of joined inputs. At a lower fill
87   // rate of 4 edges per node, we have capacity for ~16 million leaf nodes.
88   // We will fail / abort if an application ever exceeds this height, which
89   // should be extremely rare (near impossible) and be an indication of an
90   // application error: we do not assume it reasonable for any application to
91   // operate correctly with such monster trees.
92   // Another compelling reason for the number `12` is that any contextual stack
93   // required for navigation or insertion requires 12 words and 12 bytes, which
94   // fits inside 2 cache lines with some room to spare, and is reasonable as a
95   // local stack variable compared to Cord's current near 400 bytes stack use.
96   // The maximum `height` value of a node is then `kMaxDepth - 1` as node height
97   // values start with a value of 0 for leaf nodes.
98   static constexpr size_t kMaxDepth = 12;
99   // See comments on height() for why this is an int and not a size_t.
100   static constexpr int kMaxHeight = static_cast<int>(kMaxDepth - 1);
101 
102   // `Action` defines the action for unwinding changes done at the btree's leaf
103   // level that need to be propagated up to the parent node(s). Each operation
104   // on a node has an effect / action defined as follows:
105   // - kSelf
106   //   The operation (add / update, etc) was performed directly on the node as
107   //   the node is private to the current thread (i.e.: not shared directly or
108   //   indirectly through a refcount > 1). Changes can be propagated directly to
109   //   all parent nodes as all parent nodes are also then private to the current
110   //   thread.
111   // - kCopied
112   //   The operation (add / update, etc) was performed on a copy of the original
113   //   node, as the node is (potentially) directly or indirectly shared with
114   //   other threads. Changes need to be propagated into the parent nodes where
115   //   the old down pointer must be unreffed and replaced with this new copy.
116   //   Such changes to parent nodes may themselves require a copy if the parent
117   //   node is also shared. A kCopied action can propagate all the way to the
118   //   top node where we then must unref the `tree` input provided by the
119   //   caller, and return the new copy.
120   // - kPopped
121   //   The operation (typically add) could not be satisfied due to insufficient
122   //   capacity in the targeted node, and a new 'leg' was created that needs to
123   //   be added into the parent node. For example, adding a FLAT inside a leaf
124   //   node that is at capacity will create a new leaf node containing that
125   //   FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can
126   //   cascade up the tree if parent nodes are also at capacity. A 'Popped'
127   //   action propagating all the way to the top of the tree will result in
128   //   the tree becoming one level higher than the current tree through a final
129   //   `CordRepBtree::New(tree, popped)` call, resulting in a new top node
130   //   referencing the old tree and the new (fully popped upwards) 'leg'.
131   enum Action { kSelf, kCopied, kPopped };
132 
133   // Result of an operation on a node. See the `Action` enum for details.
134   struct OpResult {
135     CordRepBtree* tree;
136     Action action;
137   };
138 
139   // Return value of the CopyPrefix and CopySuffix methods which can
140   // return a node or data edge at any height inside the tree.
141   // A height of 0 defines the lowest (leaf) node, a height of -1 identifies
142   // `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof.
143   struct CopyResult {
144     CordRep* edge;
145     int height;
146   };
147 
148   // Logical position inside a node:
149   // - index: index of the edge.
150   // - n: size or offset value depending on context.
151   struct Position {
152     size_t index;
153     size_t n;
154   };
155 
156   // Creates a btree from the given input. Adopts a ref of `rep`.
157   // If the input `rep` is itself a btree, i.e., `IsBtree()`, then this
158   // function immediately returns `rep->btree()`. If the input is a valid data
159   // edge (see IsDataEdge()), then a new leaf node is returned containing `rep`
160   // as the sole data edge. Else, the input is assumed to be a (legacy) concat
161   // tree, and the input is consumed and transformed into a btree().
162   static CordRepBtree* Create(CordRep* rep);
163 
164   // Destroys the provided tree. Should only be called by cord internal API's,
165   // typically after a ref_count.Decrement() on the last reference count.
166   static void Destroy(CordRepBtree* tree);
167 
168   // Destruction
Delete(CordRepBtree * tree)169   static void Delete(CordRepBtree* tree) { delete tree; }
170 
171   // Use CordRep::Unref() as we overload for absl::Span<CordRep* const>.
172   using CordRep::Unref;
173 
174   // Unrefs all edges in `edges` which are assumed to be 'likely one'.
175   static void Unref(absl::Span<CordRep* const> edges);
176 
177   // Appends / Prepends an existing CordRep instance to this tree.
178   // The below methods accept three types of input:
179   // 1) `rep` is a data node (See `IsDataNode` for valid data edges).
180   // `rep` is appended or prepended to this tree 'as is'.
181   // 2) `rep` is a BTREE.
182   // `rep` is merged into `tree` respecting the Append/Prepend order.
183   // 3) `rep` is some other (legacy) type.
184   // `rep` is converted in place and added to `tree`
185   // Requires `tree` and `rep` to be not null.
186   static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep);
187   static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep);
188 
189   // Append/Prepend the data in `data` to this tree.
190   // The `extra` parameter defines how much extra capacity should be allocated
191   // for any additional FLAT being allocated. This is an optimization hint from
192   // the caller. For example, a caller may need to add 2 string_views of data
193   // "abc" and "defghi" which are not consecutive. The caller can in this case
194   // invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated
195   // where possible with at least 6 bytes of extra capacity beyond `length`.
196   // This helps avoiding data getting fragmented over multiple flats.
197   // There is no limit on the size of `data`. If `data` can not be stored inside
198   // a single flat, then the function will iteratively add flats until all data
199   // has been consumed and appended or prepended to the tree.
200   static CordRepBtree* Append(CordRepBtree* tree, string_view data,
201                               size_t extra = 0);
202   static CordRepBtree* Prepend(CordRepBtree* tree, string_view data,
203                                size_t extra = 0);
204 
205   // Returns a new tree, containing `n` bytes of data from this instance
206   // starting at offset `offset`. Where possible, the returned tree shares
207   // (re-uses) data edges and nodes with this instance to minimize the
208   // combined memory footprint of both trees.
209   // Requires `offset + n <= length`. Returns `nullptr` if `n` is zero.
210   CordRep* SubTree(size_t offset, size_t n);
211 
212   // Removes `n` trailing bytes from `tree`, and returns the resulting tree
213   // or data edge. Returns `tree` if n is zero, and nullptr if n == length.
214   // This function is logically identical to:
215   //   result = tree->SubTree(0, tree->length - n);
216   //   Unref(tree);
217   //   return result;
218   // However, the actual implementation will as much as possible perform 'in
219   // place' modifications on the tree on all nodes and edges that are mutable.
220   // For example, in a fully privately owned tree with the last edge being a
221   // flat of length 12, RemoveSuffix(1) will simply set the length of that data
222   // edge to 11, and reduce the length of all nodes on the edge path by 1.
223   static CordRep* RemoveSuffix(CordRepBtree* tree, size_t n);
224 
225   // Returns the character at the given offset.
226   char GetCharacter(size_t offset) const;
227 
228   // Returns true if this node holds a single data edge, and if so, sets
229   // `fragment` to reference the contained data. `fragment` is an optional
230   // output parameter and allowed to be null.
231   bool IsFlat(absl::string_view* fragment) const;
232 
233   // Returns true if the data of `n` bytes starting at offset `offset`
234   // is contained in a single data edge, and if so, sets fragment to reference
235   // the contained data. `fragment` is an optional output parameter and allowed
236   // to be null.
237   bool IsFlat(size_t offset, size_t n, absl::string_view* fragment) const;
238 
239   // Returns a span (mutable range of bytes) of up to `size` bytes into the
240   // last FLAT data edge inside this tree under the following conditions:
241   // - none of the nodes down into the FLAT node are shared.
242   // - the last data edge in this tree is a non-shared FLAT.
243   // - the referenced FLAT has additional capacity available.
244   // If all these conditions are met, a non-empty span is returned, and the
245   // length of the flat node and involved tree nodes have been increased by
246   // `span.length()`. The caller is responsible for immediately assigning values
247   // to all uninitialized data reference by the returned span.
248   // Requires `this->refcount.IsOne()`: this function forces the caller to do
249   // this fast path check on the top level node, as this is the most commonly
250   // shared node of a cord tree.
251   Span<char> GetAppendBuffer(size_t size);
252 
253   // Extracts the right-most data edge from this tree iff:
254   // - the tree and all internal edges to the right-most node are not shared.
255   // - the right-most node is a FLAT node and not shared.
256   // - the right-most node has at least the desired extra capacity.
257   //
258   // Returns {tree, nullptr} if any of the above conditions are not met.
259   // This method effectively removes data from the tree. The intent of this
260   // method is to allow applications appending small string data to use
261   // pre-existing capacity, and add the modified rep back to the tree.
262   //
263   // Simplified such code would look similar to this:
264   //   void MyTreeBuilder::Append(string_view data) {
265   //     ExtractResult result = CordRepBtree::ExtractAppendBuffer(tree_, 1);
266   //     if (CordRep* rep = result.extracted) {
267   //       size_t available = rep->Capacity() - rep->length;
268   //       size_t n = std::min(data.size(), n);
269   //       memcpy(rep->Data(), data.data(), n);
270   //       rep->length += n;
271   //       data.remove_prefix(n);
272   //       if (!result.tree->IsBtree()) {
273   //         tree_ = CordRepBtree::Create(result.tree);
274   //       }
275   //       tree_ = CordRepBtree::Append(tree_, rep);
276   //     }
277   //     ...
278   //     // Remaining edge in `result.tree`.
279   //   }
280   static ExtractResult ExtractAppendBuffer(CordRepBtree* tree,
281                                            size_t extra_capacity = 1);
282 
283   // Returns the `height` of the tree. The height of a tree is limited to
284   // kMaxHeight. `height` is implemented as an `int` as in some places we
285   // use negative (-1) values for 'data edges'.
height()286   int height() const { return static_cast<int>(storage[0]); }
287 
288   // Properties: begin, back, end, front/back boundary indexes.
begin()289   size_t begin() const { return static_cast<size_t>(storage[1]); }
back()290   size_t back() const { return static_cast<size_t>(storage[2]) - 1; }
end()291   size_t end() const { return static_cast<size_t>(storage[2]); }
index(EdgeType edge)292   size_t index(EdgeType edge) const {
293     return edge == kFront ? begin() : back();
294   }
295 
296   // Properties: size and capacity.
297   // `capacity` contains the current capacity of this instance, where
298   // `kMaxCapacity` contains the maximum capacity of a btree node.
299   // For now, `capacity` and `kMaxCapacity` return the same value, but this may
300   // change in the future if we see benefit in dynamically sizing 'small' nodes
301   // to 'large' nodes for large data trees.
size()302   size_t size() const { return end() - begin(); }
capacity()303   size_t capacity() const { return kMaxCapacity; }
304 
305   // Edge access
306   inline CordRep* Edge(size_t index) const;
307   inline CordRep* Edge(EdgeType edge_type) const;
308   inline absl::Span<CordRep* const> Edges() const;
309   inline absl::Span<CordRep* const> Edges(size_t begin, size_t end) const;
310 
311   // Returns reference to the data edge at `index`.
312   // Requires this instance to be a leaf node, and `index` to be valid index.
313   inline absl::string_view Data(size_t index) const;
314 
315   // Diagnostics: returns true if `tree` is valid and internally consistent.
316   // If `shallow` is false, then the provided top level node and all child nodes
317   // below it are recursively checked. If `shallow` is true, only the provided
318   // node in `tree` and the cumulative length, type and height of the direct
319   // child nodes of `tree` are checked. The value of `shallow` is ignored if the
320   // internal `cord_btree_exhaustive_validation` diagnostics variable is true,
321   // in which case the performed validations works as if `shallow` were false.
322   // This function is intended for debugging and testing purposes only.
323   static bool IsValid(const CordRepBtree* tree, bool shallow = false);
324 
325   // Diagnostics: asserts that the provided tree is valid.
326   // `AssertValid()` performs a shallow validation by default. `shallow` can be
327   // set to false in which case an exhaustive validation is performed. This
328   // function is implemented in terms of calling `IsValid()` and asserting the
329   // return value to be true. See `IsValid()` for more information.
330   // This function is intended for debugging and testing purposes only.
331   static CordRepBtree* AssertValid(CordRepBtree* tree, bool shallow = true);
332   static const CordRepBtree* AssertValid(const CordRepBtree* tree,
333                                          bool shallow = true);
334 
335   // Diagnostics: dump the contents of this tree to `stream`.
336   // This function is intended for debugging and testing purposes only.
337   static void Dump(const CordRep* rep, std::ostream& stream);
338   static void Dump(const CordRep* rep, absl::string_view label,
339                    std::ostream& stream);
340   static void Dump(const CordRep* rep, absl::string_view label,
341                    bool include_contents, std::ostream& stream);
342 
343   // Adds the edge `edge` to this node if possible. `owned` indicates if the
344   // current node is potentially shared or not with other threads. Returns:
345   // - {kSelf, <this>}
346   //   The edge was directly added to this node.
347   // - {kCopied, <node>}
348   //   The edge was added to a copy of this node.
349   // - {kPopped, New(edge, height())}
350   //   A new leg with the edge was created as this node has no extra capacity.
351   template <EdgeType edge_type>
352   inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta);
353 
354   // Replaces the front or back edge with the provided new edge. Returns:
355   // - {kSelf, <this>}
356   //   The edge was directly set in this node. The old edge is unreffed.
357   // - {kCopied, <node>}
358   //   A copy of this node was created with the new edge value.
359   // In both cases, the function adopts a reference on `edge`.
360   template <EdgeType edge_type>
361   OpResult SetEdge(bool owned, CordRep* edge, size_t delta);
362 
363   // Creates a new empty node at the specified height.
364   static CordRepBtree* New(int height = 0);
365 
366   // Creates a new node containing `rep`, with the height being computed
367   // automatically based on the type of `rep`.
368   static CordRepBtree* New(CordRep* rep);
369 
370   // Creates a new node containing both `front` and `back` at height
371   // `front.height() + 1`. Requires `back.height() == front.height()`.
372   static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back);
373 
374   // Creates a fully balanced tree from the provided tree by rebuilding a new
375   // tree from all data edges in the input. This function is automatically
376   // invoked internally when the tree exceeds the maximum height.
377   static CordRepBtree* Rebuild(CordRepBtree* tree);
378 
379  private:
380   CordRepBtree() = default;
381   ~CordRepBtree() = default;
382 
383   // Initializes the main properties `tag`, `begin`, `end`, `height`.
384   inline void InitInstance(int height, size_t begin = 0, size_t end = 0);
385 
386   // Direct property access begin / end
set_begin(size_t begin)387   void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); }
set_end(size_t end)388   void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); }
389 
390   // Decreases the value of `begin` by `n`, and returns the new value. Notice
391   // how this returns the new value unlike atomic::fetch_add which returns the
392   // old value. This is because this is used to prepend edges at 'begin - 1'.
sub_fetch_begin(size_t n)393   size_t sub_fetch_begin(size_t n) {
394     storage[1] -= static_cast<uint8_t>(n);
395     return storage[1];
396   }
397 
398   // Increases the value of `end` by `n`, and returns the previous value. This
399   // function is typically used to append edges at 'end'.
fetch_add_end(size_t n)400   size_t fetch_add_end(size_t n) {
401     const uint8_t current = storage[2];
402     storage[2] = static_cast<uint8_t>(current + n);
403     return current;
404   }
405 
406   // Returns the index of the last edge starting on, or before `offset`, with
407   // `n` containing the relative offset of `offset` inside that edge.
408   // Requires `offset` < length.
409   Position IndexOf(size_t offset) const;
410 
411   // Returns the index of the last edge starting before `offset`, with `n`
412   // containing the relative offset of `offset` inside that edge.
413   // This function is useful to find the edges for some span of bytes ending at
414   // `offset` (i.e., `n` bytes). For example:
415   //
416   //   Position pos = IndexBefore(n)
417   //   edges = Edges(begin(), pos.index)     // All full edges (may be empty)
418   //   last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty)
419   //
420   // Requires 0 < `offset` <= length.
421   Position IndexBefore(size_t offset) const;
422 
423   // Returns the index of the edge ending at (or on) length `length`, and the
424   // number of bytes inside that edge up to `length`. For example, if we have a
425   // Node with 2 edges, one of 10 and one of 20 long, then IndexOfLength(27)
426   // will return {1, 17}, and IndexOfLength(10) will return {0, 10}.
427   Position IndexOfLength(size_t n) const;
428 
429   // Identical to the above function except starting from the position `front`.
430   // This function is equivalent to `IndexBefore(front.n + offset)`, with
431   // the difference that this function is optimized to start at `front.index`.
432   Position IndexBefore(Position front, size_t offset) const;
433 
434   // Returns the index of the edge directly beyond the edge containing offset
435   // `offset`, with `n` containing the distance of that edge from `offset`.
436   // This function is useful for iteratively finding suffix nodes and remaining
437   // partial bytes in left-most suffix nodes as for example in CopySuffix.
438   // Requires `offset` < length.
439   Position IndexBeyond(size_t offset) const;
440 
441   // Creates a new leaf node containing as much data as possible from `data`.
442   // The data is added either forwards or reversed depending on `edge_type`.
443   // Callers must check the length of the returned node to determine if all data
444   // was copied or not.
445   // See the `Append/Prepend` function for the meaning and purpose of `extra`.
446   template <EdgeType edge_type>
447   static CordRepBtree* NewLeaf(absl::string_view data, size_t extra);
448 
449   // Creates a raw copy of this Btree node with the specified length, copying
450   // all properties, but without adding any references to existing edges.
451   CordRepBtree* CopyRaw(size_t new_length) const;
452 
453   // Creates a full copy of this Btree node, adding a reference on all edges.
454   CordRepBtree* Copy() const;
455 
456   // Creates a partial copy of this Btree node, copying all edges up to `end`,
457   // adding a reference on each copied edge, and sets the length of the newly
458   // created copy to `new_length`.
459   CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const;
460 
461   // Returns a tree containing the edges [tree->begin(), end) and length
462   // of `new_length`. This method consumes a reference on the provided
463   // tree, and logically performs the following operation:
464   //   result = tree->CopyBeginTo(end, new_length);
465   //   CordRep::Unref(tree);
466   //   return result;
467   static CordRepBtree* ConsumeBeginTo(CordRepBtree* tree, size_t end,
468                                       size_t new_length);
469 
470   // Creates a partial copy of this Btree node, copying all edges starting at
471   // `begin`, adding a reference on each copied edge, and sets the length of
472   // the newly created copy to `new_length`.
473   CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const;
474 
475   // Extracts and returns the front edge from the provided tree.
476   // This method consumes a reference on the provided tree, and logically
477   // performs the following operation:
478   //   edge = CordRep::Ref(tree->Edge(kFront));
479   //   CordRep::Unref(tree);
480   //   return edge;
481   static CordRep* ExtractFront(CordRepBtree* tree);
482 
483   // Returns a tree containing the result of appending `right` to `left`.
484   static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right);
485 
486   // Fallback functions for `Create()`, `Append()` and `Prepend()` which
487   // deal with legacy / non conforming input, i.e.: CONCAT trees.
488   static CordRepBtree* CreateSlow(CordRep* rep);
489   static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep);
490   static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep);
491 
492   // Recursively rebuilds `tree` into `stack`. If 'consume` is set to true, the
493   // function will consume a reference on `tree`. `stack` is a null terminated
494   // array containing the new tree's state, with the current leaf node at
495   // stack[0], and parent nodes above that, or null for 'top of tree'.
496   static void Rebuild(CordRepBtree** stack, CordRepBtree* tree, bool consume);
497 
498   // Aligns existing edges to start at index 0, to allow for a new edge to be
499   // added to the back of the current edges.
500   inline void AlignBegin();
501 
502   // Aligns existing edges to end at `capacity`, to allow for a new edge to be
503   // added in front of the current edges.
504   inline void AlignEnd();
505 
506   // Adds the provided edge to this node.
507   // Requires this node to have capacity for the edge. Realigns / moves
508   // existing edges as needed to prepend or append the new edge.
509   template <EdgeType edge_type>
510   inline void Add(CordRep* rep);
511 
512   // Adds the provided edges to this node.
513   // Requires this node to have capacity for the edges. Realigns / moves
514   // existing edges as needed to prepend or append the new edges.
515   template <EdgeType edge_type>
516   inline void Add(absl::Span<CordRep* const>);
517 
518   // Adds data from `data` to this node until either all data has been consumed,
519   // or there is no more capacity for additional flat nodes inside this node.
520   // Requires the current node to be a leaf node, data to be non empty, and the
521   // current node to have capacity for at least one more data edge.
522   // Returns any remaining data from `data` that was not added, which is
523   // depending on the edge type (front / back) either the remaining prefix of
524   // suffix of the input.
525   // See the `Append/Prepend` function for the meaning and purpose of `extra`.
526   template <EdgeType edge_type>
527   absl::string_view AddData(absl::string_view data, size_t extra);
528 
529   // Replace the front or back edge with the provided value.
530   // Adopts a reference on `edge` and unrefs the old edge.
531   template <EdgeType edge_type>
532   inline void SetEdge(CordRep* edge);
533 
534   // Returns a partial copy of the current tree containing the first `n` bytes
535   // of data. `CopyResult` contains both the resulting edge and its height. The
536   // resulting tree may be less high than the current tree, or even be a single
537   // matching data edge if `allow_folding` is set to true.
538   // For example, if `n == 1`, then the result will be the single data edge, and
539   // height will be set to -1 (one below the owning leaf node). If n == 0, this
540   // function returns null. Requires `n <= length`
541   CopyResult CopyPrefix(size_t n, bool allow_folding = true);
542 
543   // Returns a partial copy of the current tree containing all data starting
544   // after `offset`. `CopyResult` contains both the resulting edge and its
545   // height. The resulting tree may be less high than the current tree, or even
546   // be a single matching data edge. For example, if `n == length - 1`, then the
547   // result will be a single data edge, and height will be set to -1 (one below
548   // the owning leaf node).
549   // Requires `offset < length`
550   CopyResult CopySuffix(size_t offset);
551 
552   // Returns a OpResult value of {this, kSelf} or {Copy(), kCopied}
553   // depending on the value of `owned`.
554   inline OpResult ToOpResult(bool owned);
555 
556   // Adds `rep` to the specified tree, returning the modified tree.
557   template <EdgeType edge_type>
558   static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep);
559 
560   // Adds `data` to the specified tree, returning the modified tree.
561   // See the `Append/Prepend` function for the meaning and purpose of `extra`.
562   template <EdgeType edge_type>
563   static CordRepBtree* AddData(CordRepBtree* tree, absl::string_view data,
564                                size_t extra = 0);
565 
566   // Merges `src` into `dst` with `src` being added either before (kFront) or
567   // after (kBack) `dst`. Requires the height of `dst` to be greater than or
568   // equal to the height of `src`.
569   template <EdgeType edge_type>
570   static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src);
571 
572   // Fallback version of GetAppendBuffer for large trees: GetAppendBuffer()
573   // implements an inlined version for trees of limited height (3 levels),
574   // GetAppendBufferSlow implements the logic for large trees.
575   Span<char> GetAppendBufferSlow(size_t size);
576 
577   // `edges_` contains all edges starting from this instance.
578   // These are explicitly `child` edges only, a cord btree (or any cord tree in
579   // that respect) does not store `parent` pointers anywhere: multiple trees /
580   // parents can reference the same shared child edge. The type of these edges
581   // depends on the height of the node. `Leaf nodes` (height == 0) contain `data
582   // edges` (external or flat nodes, or sub-strings thereof). All other nodes
583   // (height > 0) contain pointers to BTREE nodes with a height of `height - 1`.
584   CordRep* edges_[kMaxCapacity];
585 
586   friend class CordRepBtreeTestPeer;
587   friend class CordRepBtreeNavigator;
588 };
589 
btree()590 inline CordRepBtree* CordRep::btree() {
591   assert(IsBtree());
592   return static_cast<CordRepBtree*>(this);
593 }
594 
btree()595 inline const CordRepBtree* CordRep::btree() const {
596   assert(IsBtree());
597   return static_cast<const CordRepBtree*>(this);
598 }
599 
InitInstance(int height,size_t begin,size_t end)600 inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) {
601   tag = BTREE;
602   storage[0] = static_cast<uint8_t>(height);
603   storage[1] = static_cast<uint8_t>(begin);
604   storage[2] = static_cast<uint8_t>(end);
605 }
606 
Edge(size_t index)607 inline CordRep* CordRepBtree::Edge(size_t index) const {
608   assert(index >= begin());
609   assert(index < end());
610   return edges_[index];
611 }
612 
Edge(EdgeType edge_type)613 inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const {
614   return edges_[edge_type == kFront ? begin() : back()];
615 }
616 
Edges()617 inline absl::Span<CordRep* const> CordRepBtree::Edges() const {
618   return {edges_ + begin(), size()};
619 }
620 
Edges(size_t begin,size_t end)621 inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin,
622                                                       size_t end) const {
623   assert(begin <= end);
624   assert(begin >= this->begin());
625   assert(end <= this->end());
626   return {edges_ + begin, static_cast<size_t>(end - begin)};
627 }
628 
Data(size_t index)629 inline absl::string_view CordRepBtree::Data(size_t index) const {
630   assert(height() == 0);
631   return EdgeData(Edge(index));
632 }
633 
New(int height)634 inline CordRepBtree* CordRepBtree::New(int height) {
635   CordRepBtree* tree = new CordRepBtree;
636   tree->length = 0;
637   tree->InitInstance(height);
638   return tree;
639 }
640 
New(CordRep * rep)641 inline CordRepBtree* CordRepBtree::New(CordRep* rep) {
642   CordRepBtree* tree = new CordRepBtree;
643   int height = rep->IsBtree() ? rep->btree()->height() + 1 : 0;
644   tree->length = rep->length;
645   tree->InitInstance(height, /*begin=*/0, /*end=*/1);
646   tree->edges_[0] = rep;
647   return tree;
648 }
649 
New(CordRepBtree * front,CordRepBtree * back)650 inline CordRepBtree* CordRepBtree::New(CordRepBtree* front,
651                                        CordRepBtree* back) {
652   assert(front->height() == back->height());
653   CordRepBtree* tree = new CordRepBtree;
654   tree->length = front->length + back->length;
655   tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2);
656   tree->edges_[0] = front;
657   tree->edges_[1] = back;
658   return tree;
659 }
660 
Unref(absl::Span<CordRep * const> edges)661 inline void CordRepBtree::Unref(absl::Span<CordRep* const> edges) {
662   for (CordRep* edge : edges) {
663     if (ABSL_PREDICT_FALSE(!edge->refcount.Decrement())) {
664       CordRep::Destroy(edge);
665     }
666   }
667 }
668 
CopyRaw(size_t new_length)669 inline CordRepBtree* CordRepBtree::CopyRaw(size_t new_length) const {
670   CordRepBtree* tree = new CordRepBtree;
671 
672   // `length` and `refcount` are the first members of `CordRepBtree`.
673   // We initialize `length` using the given length, have `refcount` be set to
674   // ref = 1 through its default constructor, and copy all data beyond
675   // 'refcount' which starts with `tag` using a single memcpy: all contents
676   // except `refcount` is trivially copyable, and the compiler does not
677   // efficiently coalesce member-wise copy of these members.
678   // See https://gcc.godbolt.org/z/qY8zsca6z
679   // # LINT.IfChange(copy_raw)
680   tree->length = new_length;
681   uint8_t* dst = &tree->tag;
682   const uint8_t* src = &tag;
683   const ptrdiff_t offset = src - reinterpret_cast<const uint8_t*>(this);
684   memcpy(dst, src, sizeof(CordRepBtree) - static_cast<size_t>(offset));
685   return tree;
686   // # LINT.ThenChange()
687 }
688 
Copy()689 inline CordRepBtree* CordRepBtree::Copy() const {
690   CordRepBtree* tree = CopyRaw(length);
691   for (CordRep* rep : Edges()) CordRep::Ref(rep);
692   return tree;
693 }
694 
CopyToEndFrom(size_t begin,size_t new_length)695 inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin,
696                                                  size_t new_length) const {
697   assert(begin >= this->begin());
698   assert(begin <= this->end());
699   CordRepBtree* tree = CopyRaw(new_length);
700   tree->set_begin(begin);
701   for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
702   return tree;
703 }
704 
CopyBeginTo(size_t end,size_t new_length)705 inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end,
706                                                size_t new_length) const {
707   assert(end <= capacity());
708   assert(end >= this->begin());
709   CordRepBtree* tree = CopyRaw(new_length);
710   tree->set_end(end);
711   for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
712   return tree;
713 }
714 
AlignBegin()715 inline void CordRepBtree::AlignBegin() {
716   // The below code itself does not need to be fast as typically we have
717   // mono-directional append/prepend calls, and `begin` / `end` are typically
718   // adjusted no more than once. But we want to avoid potential register clobber
719   // effects, making the compiler emit register save/store/spills, and minimize
720   // the size of code.
721   const size_t delta = begin();
722   if (ABSL_PREDICT_FALSE(delta != 0)) {
723     const size_t new_end = end() - delta;
724     set_begin(0);
725     set_end(new_end);
726     // TODO(mvels): we can write this using 2 loads / 2 stores depending on
727     // total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on
728     // size, and then do overlapping load/store of up to 4 pointers (inlined as
729     // XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a)
730     // compact and b) not clobbering any registers.
731     ABSL_ASSUME(new_end <= kMaxCapacity);
732 #ifdef __clang__
733 #pragma unroll 1
734 #endif
735     for (size_t i = 0; i < new_end; ++i) {
736       edges_[i] = edges_[i + delta];
737     }
738   }
739 }
740 
AlignEnd()741 inline void CordRepBtree::AlignEnd() {
742   // See comments in `AlignBegin` for motivation on the hand-rolled for loops.
743   const size_t delta = capacity() - end();
744   if (delta != 0) {
745     const size_t new_begin = begin() + delta;
746     const size_t new_end = end() + delta;
747     set_begin(new_begin);
748     set_end(new_end);
749     ABSL_ASSUME(new_end <= kMaxCapacity);
750 #ifdef __clang__
751 #pragma unroll 1
752 #endif
753     for (size_t i = new_end - 1; i >= new_begin; --i) {
754       edges_[i] = edges_[i - delta];
755     }
756   }
757 }
758 
759 template <>
760 inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) {
761   AlignBegin();
762   edges_[fetch_add_end(1)] = rep;
763 }
764 
765 template <>
766 inline void CordRepBtree::Add<CordRepBtree::kBack>(
767     absl::Span<CordRep* const> edges) {
768   AlignBegin();
769   size_t new_end = end();
770   for (CordRep* edge : edges) edges_[new_end++] = edge;
771   set_end(new_end);
772 }
773 
774 template <>
775 inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) {
776   AlignEnd();
777   edges_[sub_fetch_begin(1)] = rep;
778 }
779 
780 template <>
781 inline void CordRepBtree::Add<CordRepBtree::kFront>(
782     absl::Span<CordRep* const> edges) {
783   AlignEnd();
784   size_t new_begin = begin() - edges.size();
785   set_begin(new_begin);
786   for (CordRep* edge : edges) edges_[new_begin++] = edge;
787 }
788 
789 template <CordRepBtree::EdgeType edge_type>
SetEdge(CordRep * edge)790 inline void CordRepBtree::SetEdge(CordRep* edge) {
791   const int idx = edge_type == kFront ? begin() : back();
792   CordRep::Unref(edges_[idx]);
793   edges_[idx] = edge;
794 }
795 
ToOpResult(bool owned)796 inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) {
797   return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied};
798 }
799 
IndexOf(size_t offset)800 inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const {
801   assert(offset < length);
802   size_t index = begin();
803   while (offset >= edges_[index]->length) offset -= edges_[index++]->length;
804   return {index, offset};
805 }
806 
IndexBefore(size_t offset)807 inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const {
808   assert(offset > 0);
809   assert(offset <= length);
810   size_t index = begin();
811   while (offset > edges_[index]->length) offset -= edges_[index++]->length;
812   return {index, offset};
813 }
814 
IndexBefore(Position front,size_t offset)815 inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front,
816                                                         size_t offset) const {
817   size_t index = front.index;
818   offset = offset + front.n;
819   while (offset > edges_[index]->length) offset -= edges_[index++]->length;
820   return {index, offset};
821 }
822 
IndexOfLength(size_t n)823 inline CordRepBtree::Position CordRepBtree::IndexOfLength(size_t n) const {
824   assert(n <= length);
825   size_t index = back();
826   size_t strip = length - n;
827   while (strip >= edges_[index]->length) strip -= edges_[index--]->length;
828   return {index, edges_[index]->length - strip};
829 }
830 
IndexBeyond(const size_t offset)831 inline CordRepBtree::Position CordRepBtree::IndexBeyond(
832     const size_t offset) const {
833   // We need to find the edge which `starting offset` is beyond (>=)`offset`.
834   // For this we can't use the `offset -= length` logic of IndexOf. Instead, we
835   // track the offset of the `current edge` in `off`, which we increase as we
836   // iterate over the edges until we find the matching edge.
837   size_t off = 0;
838   size_t index = begin();
839   while (offset > off) off += edges_[index++]->length;
840   return {index, off - offset};
841 }
842 
Create(CordRep * rep)843 inline CordRepBtree* CordRepBtree::Create(CordRep* rep) {
844   if (IsDataEdge(rep)) return New(rep);
845   return CreateSlow(rep);
846 }
847 
GetAppendBuffer(size_t size)848 inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) {
849   assert(refcount.IsOne());
850   CordRepBtree* tree = this;
851   const int height = this->height();
852   CordRepBtree* n1 = tree;
853   CordRepBtree* n2 = tree;
854   CordRepBtree* n3 = tree;
855   switch (height) {
856     case 3:
857       tree = tree->Edge(kBack)->btree();
858       if (!tree->refcount.IsOne()) return {};
859       n2 = tree;
860       ABSL_FALLTHROUGH_INTENDED;
861     case 2:
862       tree = tree->Edge(kBack)->btree();
863       if (!tree->refcount.IsOne()) return {};
864       n1 = tree;
865       ABSL_FALLTHROUGH_INTENDED;
866     case 1:
867       tree = tree->Edge(kBack)->btree();
868       if (!tree->refcount.IsOne()) return {};
869       ABSL_FALLTHROUGH_INTENDED;
870     case 0:
871       CordRep* edge = tree->Edge(kBack);
872       if (!edge->refcount.IsOne()) return {};
873       if (edge->tag < FLAT) return {};
874       size_t avail = edge->flat()->Capacity() - edge->length;
875       if (avail == 0) return {};
876       size_t delta = (std::min)(size, avail);
877       Span<char> span = {edge->flat()->Data() + edge->length, delta};
878       edge->length += delta;
879       switch (height) {
880         case 3:
881           n3->length += delta;
882           ABSL_FALLTHROUGH_INTENDED;
883         case 2:
884           n2->length += delta;
885           ABSL_FALLTHROUGH_INTENDED;
886         case 1:
887           n1->length += delta;
888           ABSL_FALLTHROUGH_INTENDED;
889         case 0:
890           tree->length += delta;
891           return span;
892       }
893       break;
894   }
895   return GetAppendBufferSlow(size);
896 }
897 
898 extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>(
899     CordRepBtree* tree, CordRep* rep);
900 
901 extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>(
902     CordRepBtree* tree, CordRep* rep);
903 
Append(CordRepBtree * tree,CordRep * rep)904 inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) {
905   if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
906     return CordRepBtree::AddCordRep<kBack>(tree, rep);
907   }
908   return AppendSlow(tree, rep);
909 }
910 
Prepend(CordRepBtree * tree,CordRep * rep)911 inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) {
912   if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
913     return CordRepBtree::AddCordRep<kFront>(tree, rep);
914   }
915   return PrependSlow(tree, rep);
916 }
917 
918 #ifdef NDEBUG
919 
AssertValid(CordRepBtree * tree,bool)920 inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree,
921                                                bool /* shallow */) {
922   return tree;
923 }
924 
AssertValid(const CordRepBtree * tree,bool)925 inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
926                                                      bool /* shallow */) {
927   return tree;
928 }
929 
930 #endif
931 
932 }  // namespace cord_internal
933 ABSL_NAMESPACE_END
934 }  // namespace absl
935 
936 #endif  // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
937