1 // Copyright 2021 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
16 #define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
17 
18 #include <atomic>
19 #include <cassert>
20 #include <cstddef>
21 #include <cstdint>
22 #include <type_traits>
23 
24 #include "absl/base/attributes.h"
25 #include "absl/base/config.h"
26 #include "absl/base/internal/endian.h"
27 #include "absl/base/internal/invoke.h"
28 #include "absl/base/optimization.h"
29 #include "absl/container/internal/compressed_tuple.h"
30 #include "absl/container/internal/container_memory.h"
31 #include "absl/meta/type_traits.h"
32 #include "absl/strings/string_view.h"
33 
34 // We can only add poisoning if we can detect consteval executions.
35 #if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \
36     (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
37      defined(ABSL_HAVE_MEMORY_SANITIZER))
38 #define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1
39 #endif
40 
41 #define ABSL_CORD_INTERNAL_NO_SANITIZE \
42   ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY
43 
44 namespace absl {
45 ABSL_NAMESPACE_BEGIN
46 namespace cord_internal {
47 
48 // The overhead of a vtable is too much for Cord, so we roll our own subclasses
49 // using only a single byte to differentiate classes from each other - the "tag"
50 // byte.  Define the subclasses first so we can provide downcasting helper
51 // functions in the base class.
52 struct CordRep;
53 struct CordRepConcat;
54 struct CordRepExternal;
55 struct CordRepFlat;
56 struct CordRepSubstring;
57 struct CordRepCrc;
58 class CordRepRing;
59 class CordRepBtree;
60 
61 class CordzInfo;
62 
63 // Default feature enable states for cord ring buffers
64 enum CordFeatureDefaults {
65   kCordEnableRingBufferDefault = false,
66   kCordShallowSubcordsDefault = false
67 };
68 
69 extern std::atomic<bool> cord_ring_buffer_enabled;
70 extern std::atomic<bool> shallow_subcords_enabled;
71 
72 // `cord_btree_exhaustive_validation` can be set to force exhaustive validation
73 // in debug assertions, and code that calls `IsValid()` explicitly. By default,
74 // assertions should be relatively cheap and AssertValid() can easily lead to
75 // O(n^2) complexity as recursive / full tree validation is O(n).
76 extern std::atomic<bool> cord_btree_exhaustive_validation;
77 
enable_cord_ring_buffer(bool enable)78 inline void enable_cord_ring_buffer(bool enable) {
79   cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed);
80 }
81 
enable_shallow_subcords(bool enable)82 inline void enable_shallow_subcords(bool enable) {
83   shallow_subcords_enabled.store(enable, std::memory_order_relaxed);
84 }
85 
86 enum Constants {
87   // The inlined size to use with absl::InlinedVector.
88   //
89   // Note: The InlinedVectors in this file (and in cord.h) do not need to use
90   // the same value for their inlined size. The fact that they do is historical.
91   // It may be desirable for each to use a different inlined size optimized for
92   // that InlinedVector's usage.
93   //
94   // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
95   // the inlined vector size (47 exists for backward compatibility).
96   kInlinedVectorSize = 47,
97 
98   // Prefer copying blocks of at most this size, otherwise reference count.
99   kMaxBytesToCopy = 511
100 };
101 
102 // Emits a fatal error "Unexpected node type: xyz" and aborts the program.
103 ABSL_ATTRIBUTE_NORETURN void LogFatalNodeType(CordRep* rep);
104 
105 // Fast implementation of memmove for up to 15 bytes. This implementation is
106 // safe for overlapping regions. If nullify_tail is true, the destination is
107 // padded with '\0' up to 15 bytes.
108 template <bool nullify_tail = false>
SmallMemmove(char * dst,const char * src,size_t n)109 inline void SmallMemmove(char* dst, const char* src, size_t n) {
110   if (n >= 8) {
111     assert(n <= 15);
112     uint64_t buf1;
113     uint64_t buf2;
114     memcpy(&buf1, src, 8);
115     memcpy(&buf2, src + n - 8, 8);
116     if (nullify_tail) {
117       memset(dst + 7, 0, 8);
118     }
119     memcpy(dst, &buf1, 8);
120     memcpy(dst + n - 8, &buf2, 8);
121   } else if (n >= 4) {
122     uint32_t buf1;
123     uint32_t buf2;
124     memcpy(&buf1, src, 4);
125     memcpy(&buf2, src + n - 4, 4);
126     if (nullify_tail) {
127       memset(dst + 4, 0, 4);
128       memset(dst + 7, 0, 8);
129     }
130     memcpy(dst, &buf1, 4);
131     memcpy(dst + n - 4, &buf2, 4);
132   } else {
133     if (n != 0) {
134       dst[0] = src[0];
135       dst[n / 2] = src[n / 2];
136       dst[n - 1] = src[n - 1];
137     }
138     if (nullify_tail) {
139       memset(dst + 7, 0, 8);
140       memset(dst + n, 0, 8);
141     }
142   }
143 }
144 
145 // Compact class for tracking the reference count and state flags for CordRep
146 // instances.  Data is stored in an atomic int32_t for compactness and speed.
147 class RefcountAndFlags {
148  public:
RefcountAndFlags()149   constexpr RefcountAndFlags() : count_{kRefIncrement} {}
150   struct Immortal {};
RefcountAndFlags(Immortal)151   explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {}
152 
153   // Increments the reference count. Imposes no memory ordering.
Increment()154   inline void Increment() {
155     count_.fetch_add(kRefIncrement, std::memory_order_relaxed);
156   }
157 
158   // Asserts that the current refcount is greater than 0. If the refcount is
159   // greater than 1, decrements the reference count.
160   //
161   // Returns false if there are no references outstanding; true otherwise.
162   // Inserts barriers to ensure that state written before this method returns
163   // false will be visible to a thread that just observed this method returning
164   // false.  Always returns false when the immortal bit is set.
Decrement()165   inline bool Decrement() {
166     int32_t refcount = count_.load(std::memory_order_acquire) & kRefcountMask;
167     assert(refcount > 0 || refcount & kImmortalFlag);
168     return refcount != kRefIncrement &&
169            (count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) &
170             kRefcountMask) != kRefIncrement;
171   }
172 
173   // Same as Decrement but expect that refcount is greater than 1.
DecrementExpectHighRefcount()174   inline bool DecrementExpectHighRefcount() {
175     int32_t refcount =
176         count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) &
177         kRefcountMask;
178     assert(refcount > 0 || refcount & kImmortalFlag);
179     return refcount != kRefIncrement;
180   }
181 
182   // Returns the current reference count using acquire semantics.
Get()183   inline size_t Get() const {
184     return static_cast<size_t>(count_.load(std::memory_order_acquire) >>
185                                kNumFlags);
186   }
187 
188   // Returns whether the atomic integer is 1.
189   // If the reference count is used in the conventional way, a
190   // reference count of 1 implies that the current thread owns the
191   // reference and no other thread shares it.
192   // This call performs the test for a reference count of one, and
193   // performs the memory barrier needed for the owning thread
194   // to act on the object, knowing that it has exclusive access to the
195   // object.  Always returns false when the immortal bit is set.
IsOne()196   inline bool IsOne() {
197     return (count_.load(std::memory_order_acquire) & kRefcountMask) ==
198            kRefIncrement;
199   }
200 
IsImmortal()201   bool IsImmortal() const {
202     return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0;
203   }
204 
205  private:
206   // We reserve the bottom bits for flags.
207   // kImmortalBit indicates that this entity should never be collected; it is
208   // used for the StringConstant constructor to avoid collecting immutable
209   // constant cords.
210   // kReservedFlag is reserved for future use.
211   enum Flags {
212     kNumFlags = 2,
213 
214     kImmortalFlag = 0x1,
215     kReservedFlag = 0x2,
216     kRefIncrement = (1 << kNumFlags),
217 
218     // Bitmask to use when checking refcount by equality.  This masks out
219     // all flags except kImmortalFlag, which is part of the refcount for
220     // purposes of equality.  (A refcount of 0 or 1 does not count as 0 or 1
221     // if the immortal bit is set.)
222     kRefcountMask = ~kReservedFlag,
223   };
224 
225   std::atomic<int32_t> count_;
226 };
227 
228 // Various representations that we allow
229 enum CordRepKind {
230   UNUSED_0 = 0,
231   SUBSTRING = 1,
232   CRC = 2,
233   BTREE = 3,
234   RING = 4,
235   EXTERNAL = 5,
236 
237   // We have different tags for different sized flat arrays,
238   // starting with FLAT, and limited to MAX_FLAT_TAG. The below values map to an
239   // allocated range of 32 bytes to 256 KB. The current granularity is:
240   // - 8 byte granularity for flat sizes in [32 - 512]
241   // - 64 byte granularity for flat sizes in (512 - 8KiB]
242   // - 4KiB byte granularity for flat sizes in (8KiB, 256 KiB]
243   // If a new tag is needed in the future, then 'FLAT' and 'MAX_FLAT_TAG' should
244   // be adjusted as well as the Tag <---> Size mapping logic so that FLAT still
245   // represents the minimum flat allocation size. (32 bytes as of now).
246   FLAT = 6,
247   MAX_FLAT_TAG = 248
248 };
249 
250 // There are various locations where we want to check if some rep is a 'plain'
251 // data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we
252 // can perform this check in a single branch as 'tag >= EXTERNAL'
253 // Likewise, we have some locations where we check for 'ring or external/flat',
254 // so likewise align RING to EXTERNAL.
255 // Note that we can leave this optimization to the compiler. The compiler will
256 // DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`.
257 static_assert(RING == BTREE + 1, "BTREE and RING not consecutive");
258 static_assert(EXTERNAL == RING + 1, "BTREE and EXTERNAL not consecutive");
259 static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive");
260 
261 struct CordRep {
262   // Result from an `extract edge` operation. Contains the (possibly changed)
263   // tree node as well as the extracted edge, or {tree, nullptr} if no edge
264   // could be extracted.
265   // On success, the returned `tree` value is null if `extracted` was the only
266   // data edge inside the tree, a data edge if there were only two data edges in
267   // the tree, or the (possibly new / smaller) remaining tree with the extracted
268   // data edge removed.
269   struct ExtractResult {
270     CordRep* tree;
271     CordRep* extracted;
272   };
273 
274   CordRep() = default;
CordRepCordRep275   constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l)
276       : length(l), refcount(immortal), tag(EXTERNAL), storage{} {}
277 
278   // The following three fields have to be less than 32 bytes since
279   // that is the smallest supported flat node size. Some code optimizations rely
280   // on the specific layout of these fields. Notably: the non-trivial field
281   // `refcount` being preceded by `length`, and being tailed by POD data
282   // members only.
283   // # LINT.IfChange
284   size_t length;
285   RefcountAndFlags refcount;
286   // If tag < FLAT, it represents CordRepKind and indicates the type of node.
287   // Otherwise, the node type is CordRepFlat and the tag is the encoded size.
288   uint8_t tag;
289 
290   // `storage` provides two main purposes:
291   // - the starting point for FlatCordRep.Data() [flexible-array-member]
292   // - 3 bytes of additional storage for use by derived classes.
293   // The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores
294   // a 'depth' value in storage[0], and the (future) CordRepBtree class stores
295   // `height`, `begin` and `end` in the 3 entries. Otherwise we would need to
296   // allocate room for these in the derived class, as not all compilers reuse
297   // padding space from the base class (clang and gcc do, MSVC does not, etc)
298   uint8_t storage[3];
299   // # LINT.ThenChange(cord_rep_btree.h:copy_raw)
300 
301   // Returns true if this instance's tag matches the requested type.
IsRingCordRep302   constexpr bool IsRing() const { return tag == RING; }
IsSubstringCordRep303   constexpr bool IsSubstring() const { return tag == SUBSTRING; }
IsCrcCordRep304   constexpr bool IsCrc() const { return tag == CRC; }
IsExternalCordRep305   constexpr bool IsExternal() const { return tag == EXTERNAL; }
IsFlatCordRep306   constexpr bool IsFlat() const { return tag >= FLAT; }
IsBtreeCordRep307   constexpr bool IsBtree() const { return tag == BTREE; }
308 
309   inline CordRepRing* ring();
310   inline const CordRepRing* ring() const;
311   inline CordRepSubstring* substring();
312   inline const CordRepSubstring* substring() const;
313   inline CordRepCrc* crc();
314   inline const CordRepCrc* crc() const;
315   inline CordRepExternal* external();
316   inline const CordRepExternal* external() const;
317   inline CordRepFlat* flat();
318   inline const CordRepFlat* flat() const;
319   inline CordRepBtree* btree();
320   inline const CordRepBtree* btree() const;
321 
322   // --------------------------------------------------------------------
323   // Memory management
324 
325   // Destroys the provided `rep`.
326   static void Destroy(CordRep* rep);
327 
328   // Increments the reference count of `rep`.
329   // Requires `rep` to be a non-null pointer value.
330   static inline CordRep* Ref(CordRep* rep);
331 
332   // Decrements the reference count of `rep`. Destroys rep if count reaches
333   // zero. Requires `rep` to be a non-null pointer value.
334   static inline void Unref(CordRep* rep);
335 };
336 
337 struct CordRepSubstring : public CordRep {
338   size_t start;  // Starting offset of substring in child
339   CordRep* child;
340 
341   // Creates a substring on `child`, adopting a reference on `child`.
342   // Requires `child` to be either a flat or external node, and `pos` and `n` to
343   // form a non-empty partial sub range of `'child`, i.e.:
344   // `n > 0 && n < length && n + pos <= length`
345   static inline CordRepSubstring* Create(CordRep* child, size_t pos, size_t n);
346 
347   // Creates a substring of `rep`. Does not adopt a reference on `rep`.
348   // Requires `IsDataEdge(rep) && n > 0 && pos + n <= rep->length`.
349   // If `n == rep->length` then this method returns `CordRep::Ref(rep)`
350   // If `rep` is a substring of a flat or external node, then this method will
351   // return a new substring of that flat or external node with `pos` adjusted
352   // with the original `start` position.
353   static inline CordRep* Substring(CordRep* rep, size_t pos, size_t n);
354 };
355 
356 // Type for function pointer that will invoke the releaser function and also
357 // delete the `CordRepExternalImpl` corresponding to the passed in
358 // `CordRepExternal`.
359 using ExternalReleaserInvoker = void (*)(CordRepExternal*);
360 
361 // External CordReps are allocated together with a type erased releaser. The
362 // releaser is stored in the memory directly following the CordRepExternal.
363 struct CordRepExternal : public CordRep {
364   CordRepExternal() = default;
CordRepExternalCordRepExternal365   explicit constexpr CordRepExternal(absl::string_view str)
366       : CordRep(RefcountAndFlags::Immortal{}, str.size()),
367         base(str.data()),
368         releaser_invoker(nullptr) {}
369 
370   const char* base;
371   // Pointer to function that knows how to call and destroy the releaser.
372   ExternalReleaserInvoker releaser_invoker;
373 
374   // Deletes (releases) the external rep.
375   // Requires rep != nullptr and rep->IsExternal()
376   static void Delete(CordRep* rep);
377 };
378 
379 struct Rank1 {};
380 struct Rank0 : Rank1 {};
381 
382 template <typename Releaser, typename = ::absl::base_internal::invoke_result_t<
383                                  Releaser, absl::string_view>>
InvokeReleaser(Rank0,Releaser && releaser,absl::string_view data)384 void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view data) {
385   ::absl::base_internal::invoke(std::forward<Releaser>(releaser), data);
386 }
387 
388 template <typename Releaser,
389           typename = ::absl::base_internal::invoke_result_t<Releaser>>
InvokeReleaser(Rank1,Releaser && releaser,absl::string_view)390 void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view) {
391   ::absl::base_internal::invoke(std::forward<Releaser>(releaser));
392 }
393 
394 // We use CompressedTuple so that we can benefit from EBCO.
395 template <typename Releaser>
396 struct CordRepExternalImpl
397     : public CordRepExternal,
398       public ::absl::container_internal::CompressedTuple<Releaser> {
399   // The extra int arg is so that we can avoid interfering with copy/move
400   // constructors while still benefitting from perfect forwarding.
401   template <typename T>
CordRepExternalImplCordRepExternalImpl402   CordRepExternalImpl(T&& releaser, int)
403       : CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) {
404     this->releaser_invoker = &Release;
405   }
406 
~CordRepExternalImplCordRepExternalImpl407   ~CordRepExternalImpl() {
408     InvokeReleaser(Rank0{}, std::move(this->template get<0>()),
409                    absl::string_view(base, length));
410   }
411 
ReleaseCordRepExternalImpl412   static void Release(CordRepExternal* rep) {
413     delete static_cast<CordRepExternalImpl*>(rep);
414   }
415 };
416 
Create(CordRep * child,size_t pos,size_t n)417 inline CordRepSubstring* CordRepSubstring::Create(CordRep* child, size_t pos,
418                                                   size_t n) {
419   assert(child != nullptr);
420   assert(n > 0);
421   assert(n < child->length);
422   assert(pos < child->length);
423   assert(n <= child->length - pos);
424 
425   // TODO(b/217376272): Harden internal logic.
426   // Move to strategical places inside the Cord logic and make this an assert.
427   if (ABSL_PREDICT_FALSE(!(child->IsExternal() || child->IsFlat()))) {
428     LogFatalNodeType(child);
429   }
430 
431   CordRepSubstring* rep = new CordRepSubstring();
432   rep->length = n;
433   rep->tag = SUBSTRING;
434   rep->start = pos;
435   rep->child = child;
436   return rep;
437 }
438 
Substring(CordRep * rep,size_t pos,size_t n)439 inline CordRep* CordRepSubstring::Substring(CordRep* rep, size_t pos,
440                                             size_t n) {
441   assert(rep != nullptr);
442   assert(n != 0);
443   assert(pos < rep->length);
444   assert(n <= rep->length - pos);
445   if (n == rep->length) return CordRep::Ref(rep);
446   if (rep->IsSubstring()) {
447     pos += rep->substring()->start;
448     rep = rep->substring()->child;
449   }
450   CordRepSubstring* substr = new CordRepSubstring();
451   substr->length = n;
452   substr->tag = SUBSTRING;
453   substr->start = pos;
454   substr->child = CordRep::Ref(rep);
455   return substr;
456 }
457 
Delete(CordRep * rep)458 inline void CordRepExternal::Delete(CordRep* rep) {
459   assert(rep != nullptr && rep->IsExternal());
460   auto* rep_external = static_cast<CordRepExternal*>(rep);
461   assert(rep_external->releaser_invoker != nullptr);
462   rep_external->releaser_invoker(rep_external);
463 }
464 
465 template <typename Str>
466 struct ConstInitExternalStorage {
467   ABSL_CONST_INIT static CordRepExternal value;
468 };
469 
470 template <typename Str>
471 ABSL_CONST_INIT CordRepExternal
472     ConstInitExternalStorage<Str>::value(Str::value);
473 
474 enum {
475   kMaxInline = 15,
476 };
477 
GetOrNull(absl::string_view data,size_t pos)478 constexpr char GetOrNull(absl::string_view data, size_t pos) {
479   return pos < data.size() ? data[pos] : '\0';
480 }
481 
482 // We store cordz_info as 64 bit pointer value in little endian format. This
483 // guarantees that the least significant byte of cordz_info matches the first
484 // byte of the inline data representation in `data`, which holds the inlined
485 // size or the 'is_tree' bit.
486 using cordz_info_t = int64_t;
487 
488 // Assert that the `cordz_info` pointer value perfectly overlaps the last half
489 // of `data` and can hold a pointer value.
490 static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, "");
491 static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), "");
492 
493 // LittleEndianByte() creates a little endian representation of 'value', i.e.:
494 // a little endian value where the first byte in the host's representation
495 // holds 'value`, with all other bytes being 0.
LittleEndianByte(unsigned char value)496 static constexpr cordz_info_t LittleEndianByte(unsigned char value) {
497 #if defined(ABSL_IS_BIG_ENDIAN)
498   return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8);
499 #else
500   return value;
501 #endif
502 }
503 
504 class InlineData {
505  public:
506   // DefaultInitType forces the use of the default initialization constructor.
507   enum DefaultInitType { kDefaultInit };
508 
509   // kNullCordzInfo holds the little endian representation of intptr_t(1)
510   // This is the 'null' / initial value of 'cordz_info'. The null value
511   // is specifically big endian 1 as with 64-bit pointers, the last
512   // byte of cordz_info overlaps with the last byte holding the tag.
513   static constexpr cordz_info_t kNullCordzInfo = LittleEndianByte(1);
514 
515   // kTagOffset contains the offset of the control byte / tag. This constant is
516   // intended mostly for debugging purposes: do not remove this constant as it
517   // is actively inspected and used by gdb pretty printing code.
518   static constexpr size_t kTagOffset = 0;
519 
520   // Implement `~InlineData()` conditionally: we only need this destructor to
521   // unpoison poisoned instances under *SAN, and it will only compile correctly
522   // if the current compiler supports `absl::is_constant_evaluated()`.
523 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
~InlineData()524   ~InlineData() noexcept { unpoison(); }
525 #endif
526 
InlineData()527   constexpr InlineData() noexcept { poison_this(); }
528 
InlineData(DefaultInitType)529   explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) {
530     poison_this();
531   }
532 
InlineData(CordRep * rep)533   explicit InlineData(CordRep* rep) noexcept : rep_(rep) {
534     ABSL_ASSERT(rep != nullptr);
535   }
536 
537   // Explicit constexpr constructor to create a constexpr InlineData
538   // value. Creates an inlined SSO value if `rep` is null, otherwise
539   // creates a tree instance value.
InlineData(absl::string_view sv,CordRep * rep)540   constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept
541       : rep_(rep ? Rep(rep) : Rep(sv)) {
542     poison();
543   }
544 
545   constexpr InlineData(const InlineData& rhs) noexcept;
546   InlineData& operator=(const InlineData& rhs) noexcept;
547 
548   friend bool operator==(const InlineData& lhs, const InlineData& rhs) {
549 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
550     const Rep l = lhs.rep_.SanitizerSafeCopy();
551     const Rep r = rhs.rep_.SanitizerSafeCopy();
552     return memcmp(&l, &r, sizeof(l)) == 0;
553 #else
554     return memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
555 #endif
556   }
557   friend bool operator!=(const InlineData& lhs, const InlineData& rhs) {
558     return !operator==(lhs, rhs);
559   }
560 
561   // Poisons the unused inlined SSO data if the current instance
562   // is inlined, else un-poisons the entire instance.
563   constexpr void poison();
564 
565   // Un-poisons this instance.
566   constexpr void unpoison();
567 
568   // Poisons the current instance. This is used on default initialization.
569   constexpr void poison_this();
570 
571   // Returns true if the current instance is empty.
572   // The 'empty value' is an inlined data value of zero length.
is_empty()573   bool is_empty() const { return rep_.tag() == 0; }
574 
575   // Returns true if the current instance holds a tree value.
is_tree()576   bool is_tree() const { return (rep_.tag() & 1) != 0; }
577 
578   // Returns true if the current instance holds a cordz_info value.
579   // Requires the current instance to hold a tree value.
is_profiled()580   bool is_profiled() const {
581     assert(is_tree());
582     return rep_.cordz_info() != kNullCordzInfo;
583   }
584 
585   // Returns true if either of the provided instances hold a cordz_info value.
586   // This method is more efficient than the equivalent `data1.is_profiled() ||
587   // data2.is_profiled()`. Requires both arguments to hold a tree.
is_either_profiled(const InlineData & data1,const InlineData & data2)588   static bool is_either_profiled(const InlineData& data1,
589                                  const InlineData& data2) {
590     assert(data1.is_tree() && data2.is_tree());
591     return (data1.rep_.cordz_info() | data2.rep_.cordz_info()) !=
592            kNullCordzInfo;
593   }
594 
595   // Returns the cordz_info sampling instance for this instance, or nullptr
596   // if the current instance is not sampled and does not have CordzInfo data.
597   // Requires the current instance to hold a tree value.
cordz_info()598   CordzInfo* cordz_info() const {
599     assert(is_tree());
600     intptr_t info = static_cast<intptr_t>(absl::little_endian::ToHost64(
601         static_cast<uint64_t>(rep_.cordz_info())));
602     assert(info & 1);
603     return reinterpret_cast<CordzInfo*>(info - 1);
604   }
605 
606   // Sets the current cordz_info sampling instance for this instance, or nullptr
607   // if the current instance is not sampled and does not have CordzInfo data.
608   // Requires the current instance to hold a tree value.
set_cordz_info(CordzInfo * cordz_info)609   void set_cordz_info(CordzInfo* cordz_info) {
610     assert(is_tree());
611     uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1;
612     rep_.set_cordz_info(
613         static_cast<cordz_info_t>(absl::little_endian::FromHost64(info)));
614   }
615 
616   // Resets the current cordz_info to null / empty.
clear_cordz_info()617   void clear_cordz_info() {
618     assert(is_tree());
619     rep_.set_cordz_info(kNullCordzInfo);
620   }
621 
622   // Returns a read only pointer to the character data inside this instance.
623   // Requires the current instance to hold inline data.
as_chars()624   const char* as_chars() const {
625     assert(!is_tree());
626     return rep_.as_chars();
627   }
628 
629   // Returns a mutable pointer to the character data inside this instance.
630   // Should be used for 'write only' operations setting an inlined value.
631   // Applications can set the value of inlined data either before or after
632   // setting the inlined size, i.e., both of the below are valid:
633   //
634   //   // Set inlined data and inline size
635   //   memcpy(data_.as_chars(), data, size);
636   //   data_.set_inline_size(size);
637   //
638   //   // Set inlined size and inline data
639   //   data_.set_inline_size(size);
640   //   memcpy(data_.as_chars(), data, size);
641   //
642   // It's an error to read from the returned pointer without a preceding write
643   // if the current instance does not hold inline data, i.e.: is_tree() == true.
as_chars()644   char* as_chars() { return rep_.as_chars(); }
645 
646   // Returns the tree value of this value.
647   // Requires the current instance to hold a tree value.
as_tree()648   CordRep* as_tree() const {
649     assert(is_tree());
650     return rep_.tree();
651   }
652 
set_inline_data(const char * data,size_t n)653   void set_inline_data(const char* data, size_t n) {
654     ABSL_ASSERT(n <= kMaxInline);
655     unpoison();
656     rep_.set_tag(static_cast<int8_t>(n << 1));
657     SmallMemmove<true>(rep_.as_chars(), data, n);
658     poison();
659   }
660 
copy_max_inline_to(char * dst)661   void copy_max_inline_to(char* dst) const {
662     assert(!is_tree());
663     memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline);
664   }
665 
666   // Initialize this instance to holding the tree value `rep`,
667   // initializing the cordz_info to null, i.e.: 'not profiled'.
make_tree(CordRep * rep)668   void make_tree(CordRep* rep) {
669     unpoison();
670     rep_.make_tree(rep);
671   }
672 
673   // Set the tree value of this instance to 'rep`.
674   // Requires the current instance to already hold a tree value.
675   // Does not affect the value of cordz_info.
set_tree(CordRep * rep)676   void set_tree(CordRep* rep) {
677     assert(is_tree());
678     rep_.set_tree(rep);
679   }
680 
681   // Returns the size of the inlined character data inside this instance.
682   // Requires the current instance to hold inline data.
inline_size()683   size_t inline_size() const { return rep_.inline_size(); }
684 
685   // Sets the size of the inlined character data inside this instance.
686   // Requires `size` to be <= kMaxInline.
687   // See the documentation on 'as_chars()' for more information and examples.
set_inline_size(size_t size)688   void set_inline_size(size_t size) {
689     unpoison();
690     rep_.set_inline_size(size);
691     poison();
692   }
693 
694   // Compares 'this' inlined data  with rhs. The comparison is a straightforward
695   // lexicographic comparison. `Compare()` returns values as follows:
696   //
697   //   -1  'this' InlineData instance is smaller
698   //    0  the InlineData instances are equal
699   //    1  'this' InlineData instance larger
Compare(const InlineData & rhs)700   int Compare(const InlineData& rhs) const {
701     return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy());
702   }
703 
704  private:
705   struct Rep {
706     // See cordz_info_t for forced alignment and size of `cordz_info` details.
707     struct AsTree {
AsTreeRep::AsTree708       explicit constexpr AsTree(absl::cord_internal::CordRep* tree)
709           : rep(tree) {}
710       cordz_info_t cordz_info = kNullCordzInfo;
711       absl::cord_internal::CordRep* rep;
712     };
713 
RepRep714     explicit Rep(DefaultInitType) {}
RepRep715     constexpr Rep() : data{0} {}
716     constexpr Rep(const Rep&) = default;
717     constexpr Rep& operator=(const Rep&) = default;
718 
RepRep719     explicit constexpr Rep(CordRep* rep) : as_tree(rep) {}
720 
RepRep721     explicit constexpr Rep(absl::string_view chars)
722         : data{static_cast<char>((chars.size() << 1)),
723                GetOrNull(chars, 0),
724                GetOrNull(chars, 1),
725                GetOrNull(chars, 2),
726                GetOrNull(chars, 3),
727                GetOrNull(chars, 4),
728                GetOrNull(chars, 5),
729                GetOrNull(chars, 6),
730                GetOrNull(chars, 7),
731                GetOrNull(chars, 8),
732                GetOrNull(chars, 9),
733                GetOrNull(chars, 10),
734                GetOrNull(chars, 11),
735                GetOrNull(chars, 12),
736                GetOrNull(chars, 13),
737                GetOrNull(chars, 14)} {}
738 
739     // Disable sanitizer as we must always be able to read `tag`.
740     ABSL_CORD_INTERNAL_NO_SANITIZE
tagRep741     int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; }
set_tagRep742     void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(this)[0] = rhs; }
743 
as_charsRep744     char* as_chars() { return data + 1; }
as_charsRep745     const char* as_chars() const { return data + 1; }
746 
is_treeRep747     bool is_tree() const { return (tag() & 1) != 0; }
748 
inline_sizeRep749     size_t inline_size() const {
750       ABSL_ASSERT(!is_tree());
751       return static_cast<size_t>(tag()) >> 1;
752     }
753 
set_inline_sizeRep754     void set_inline_size(size_t size) {
755       ABSL_ASSERT(size <= kMaxInline);
756       set_tag(static_cast<int8_t>(size << 1));
757     }
758 
treeRep759     CordRep* tree() const { return as_tree.rep; }
set_treeRep760     void set_tree(CordRep* rhs) { as_tree.rep = rhs; }
761 
cordz_infoRep762     cordz_info_t cordz_info() const { return as_tree.cordz_info; }
set_cordz_infoRep763     void set_cordz_info(cordz_info_t rhs) { as_tree.cordz_info = rhs; }
764 
make_treeRep765     void make_tree(CordRep* tree) {
766       as_tree.rep = tree;
767       as_tree.cordz_info = kNullCordzInfo;
768     }
769 
770 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
SanitizerSafeCopyRep771     constexpr Rep SanitizerSafeCopy() const {
772       if (!absl::is_constant_evaluated()) {
773         Rep res;
774         if (is_tree()) {
775           res = *this;
776         } else {
777           res.set_tag(tag());
778           memcpy(res.as_chars(), as_chars(), inline_size());
779         }
780         return res;
781       } else {
782         return *this;
783       }
784     }
785 #else
SanitizerSafeCopyRep786     constexpr const Rep& SanitizerSafeCopy() const { return *this; }
787 #endif
788 
789     // If the data has length <= kMaxInline, we store it in `data`, and
790     // store the size in the first char of `data` shifted left + 1.
791     // Else we store it in a tree and store a pointer to that tree in
792     // `as_tree.rep` with a tagged pointer to make `tag() & 1` non zero.
793     union {
794       char data[kMaxInline + 1];
795       AsTree as_tree;
796     };
797   };
798 
799   // Private implementation of `Compare()`
Compare(const Rep & lhs,const Rep & rhs)800   static inline int Compare(const Rep& lhs, const Rep& rhs) {
801     uint64_t x, y;
802     memcpy(&x, lhs.as_chars(), sizeof(x));
803     memcpy(&y, rhs.as_chars(), sizeof(y));
804     if (x == y) {
805       memcpy(&x, lhs.as_chars() + 7, sizeof(x));
806       memcpy(&y, rhs.as_chars() + 7, sizeof(y));
807       if (x == y) {
808         if (lhs.inline_size() == rhs.inline_size()) return 0;
809         return lhs.inline_size() < rhs.inline_size() ? -1 : 1;
810       }
811     }
812     x = absl::big_endian::FromHost64(x);
813     y = absl::big_endian::FromHost64(y);
814     return x < y ? -1 : 1;
815   }
816 
817   Rep rep_;
818 };
819 
820 static_assert(sizeof(InlineData) == kMaxInline + 1, "");
821 
822 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
823 
InlineData(const InlineData & rhs)824 constexpr InlineData::InlineData(const InlineData& rhs) noexcept
825     : rep_(rhs.rep_.SanitizerSafeCopy()) {
826   poison();
827 }
828 
829 inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept {
830   unpoison();
831   rep_ = rhs.rep_.SanitizerSafeCopy();
832   poison();
833   return *this;
834 }
835 
poison_this()836 constexpr void InlineData::poison_this() {
837   if (!absl::is_constant_evaluated()) {
838     container_internal::SanitizerPoisonObject(this);
839   }
840 }
841 
unpoison()842 constexpr void InlineData::unpoison() {
843   if (!absl::is_constant_evaluated()) {
844     container_internal::SanitizerUnpoisonObject(this);
845   }
846 }
847 
poison()848 constexpr void InlineData::poison() {
849   if (!absl::is_constant_evaluated()) {
850     if (is_tree()) {
851       container_internal::SanitizerUnpoisonObject(this);
852     } else if (const size_t size = inline_size()) {
853       if (size < kMaxInline) {
854         const char* end = rep_.as_chars() + size;
855         container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size);
856       }
857     } else {
858       container_internal::SanitizerPoisonObject(this);
859     }
860   }
861 }
862 
863 #else  // ABSL_INTERNAL_CORD_HAVE_SANITIZER
864 
865 constexpr InlineData::InlineData(const InlineData&) noexcept = default;
866 inline InlineData& InlineData::operator=(const InlineData&) noexcept = default;
867 
poison_this()868 constexpr void InlineData::poison_this() {}
unpoison()869 constexpr void InlineData::unpoison() {}
poison()870 constexpr void InlineData::poison() {}
871 
872 #endif  // ABSL_INTERNAL_CORD_HAVE_SANITIZER
873 
substring()874 inline CordRepSubstring* CordRep::substring() {
875   assert(IsSubstring());
876   return static_cast<CordRepSubstring*>(this);
877 }
878 
substring()879 inline const CordRepSubstring* CordRep::substring() const {
880   assert(IsSubstring());
881   return static_cast<const CordRepSubstring*>(this);
882 }
883 
external()884 inline CordRepExternal* CordRep::external() {
885   assert(IsExternal());
886   return static_cast<CordRepExternal*>(this);
887 }
888 
external()889 inline const CordRepExternal* CordRep::external() const {
890   assert(IsExternal());
891   return static_cast<const CordRepExternal*>(this);
892 }
893 
Ref(CordRep * rep)894 inline CordRep* CordRep::Ref(CordRep* rep) {
895   // ABSL_ASSUME is a workaround for
896   // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105585
897   ABSL_ASSUME(rep != nullptr);
898   rep->refcount.Increment();
899   return rep;
900 }
901 
Unref(CordRep * rep)902 inline void CordRep::Unref(CordRep* rep) {
903   assert(rep != nullptr);
904   // Expect refcount to be 0. Avoiding the cost of an atomic decrement should
905   // typically outweigh the cost of an extra branch checking for ref == 1.
906   if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) {
907     Destroy(rep);
908   }
909 }
910 
911 }  // namespace cord_internal
912 
913 ABSL_NAMESPACE_END
914 }  // namespace absl
915 #endif  // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
916