1 // Copyright 2021 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
16 #define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
17
18 #include <atomic>
19 #include <cassert>
20 #include <cstddef>
21 #include <cstdint>
22 #include <type_traits>
23
24 #include "absl/base/attributes.h"
25 #include "absl/base/config.h"
26 #include "absl/base/internal/endian.h"
27 #include "absl/base/internal/invoke.h"
28 #include "absl/base/optimization.h"
29 #include "absl/container/internal/compressed_tuple.h"
30 #include "absl/container/internal/container_memory.h"
31 #include "absl/meta/type_traits.h"
32 #include "absl/strings/string_view.h"
33
34 // We can only add poisoning if we can detect consteval executions.
35 #if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \
36 (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
37 defined(ABSL_HAVE_MEMORY_SANITIZER))
38 #define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1
39 #endif
40
41 #define ABSL_CORD_INTERNAL_NO_SANITIZE \
42 ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY
43
44 namespace absl {
45 ABSL_NAMESPACE_BEGIN
46 namespace cord_internal {
47
48 // The overhead of a vtable is too much for Cord, so we roll our own subclasses
49 // using only a single byte to differentiate classes from each other - the "tag"
50 // byte. Define the subclasses first so we can provide downcasting helper
51 // functions in the base class.
52 struct CordRep;
53 struct CordRepConcat;
54 struct CordRepExternal;
55 struct CordRepFlat;
56 struct CordRepSubstring;
57 struct CordRepCrc;
58 class CordRepRing;
59 class CordRepBtree;
60
61 class CordzInfo;
62
63 // Default feature enable states for cord ring buffers
64 enum CordFeatureDefaults {
65 kCordEnableRingBufferDefault = false,
66 kCordShallowSubcordsDefault = false
67 };
68
69 extern std::atomic<bool> cord_ring_buffer_enabled;
70 extern std::atomic<bool> shallow_subcords_enabled;
71
72 // `cord_btree_exhaustive_validation` can be set to force exhaustive validation
73 // in debug assertions, and code that calls `IsValid()` explicitly. By default,
74 // assertions should be relatively cheap and AssertValid() can easily lead to
75 // O(n^2) complexity as recursive / full tree validation is O(n).
76 extern std::atomic<bool> cord_btree_exhaustive_validation;
77
enable_cord_ring_buffer(bool enable)78 inline void enable_cord_ring_buffer(bool enable) {
79 cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed);
80 }
81
enable_shallow_subcords(bool enable)82 inline void enable_shallow_subcords(bool enable) {
83 shallow_subcords_enabled.store(enable, std::memory_order_relaxed);
84 }
85
86 enum Constants {
87 // The inlined size to use with absl::InlinedVector.
88 //
89 // Note: The InlinedVectors in this file (and in cord.h) do not need to use
90 // the same value for their inlined size. The fact that they do is historical.
91 // It may be desirable for each to use a different inlined size optimized for
92 // that InlinedVector's usage.
93 //
94 // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
95 // the inlined vector size (47 exists for backward compatibility).
96 kInlinedVectorSize = 47,
97
98 // Prefer copying blocks of at most this size, otherwise reference count.
99 kMaxBytesToCopy = 511
100 };
101
102 // Emits a fatal error "Unexpected node type: xyz" and aborts the program.
103 ABSL_ATTRIBUTE_NORETURN void LogFatalNodeType(CordRep* rep);
104
105 // Fast implementation of memmove for up to 15 bytes. This implementation is
106 // safe for overlapping regions. If nullify_tail is true, the destination is
107 // padded with '\0' up to 15 bytes.
108 template <bool nullify_tail = false>
SmallMemmove(char * dst,const char * src,size_t n)109 inline void SmallMemmove(char* dst, const char* src, size_t n) {
110 if (n >= 8) {
111 assert(n <= 15);
112 uint64_t buf1;
113 uint64_t buf2;
114 memcpy(&buf1, src, 8);
115 memcpy(&buf2, src + n - 8, 8);
116 if (nullify_tail) {
117 memset(dst + 7, 0, 8);
118 }
119 memcpy(dst, &buf1, 8);
120 memcpy(dst + n - 8, &buf2, 8);
121 } else if (n >= 4) {
122 uint32_t buf1;
123 uint32_t buf2;
124 memcpy(&buf1, src, 4);
125 memcpy(&buf2, src + n - 4, 4);
126 if (nullify_tail) {
127 memset(dst + 4, 0, 4);
128 memset(dst + 7, 0, 8);
129 }
130 memcpy(dst, &buf1, 4);
131 memcpy(dst + n - 4, &buf2, 4);
132 } else {
133 if (n != 0) {
134 dst[0] = src[0];
135 dst[n / 2] = src[n / 2];
136 dst[n - 1] = src[n - 1];
137 }
138 if (nullify_tail) {
139 memset(dst + 7, 0, 8);
140 memset(dst + n, 0, 8);
141 }
142 }
143 }
144
145 // Compact class for tracking the reference count and state flags for CordRep
146 // instances. Data is stored in an atomic int32_t for compactness and speed.
147 class RefcountAndFlags {
148 public:
RefcountAndFlags()149 constexpr RefcountAndFlags() : count_{kRefIncrement} {}
150 struct Immortal {};
RefcountAndFlags(Immortal)151 explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {}
152
153 // Increments the reference count. Imposes no memory ordering.
Increment()154 inline void Increment() {
155 count_.fetch_add(kRefIncrement, std::memory_order_relaxed);
156 }
157
158 // Asserts that the current refcount is greater than 0. If the refcount is
159 // greater than 1, decrements the reference count.
160 //
161 // Returns false if there are no references outstanding; true otherwise.
162 // Inserts barriers to ensure that state written before this method returns
163 // false will be visible to a thread that just observed this method returning
164 // false. Always returns false when the immortal bit is set.
Decrement()165 inline bool Decrement() {
166 int32_t refcount = count_.load(std::memory_order_acquire) & kRefcountMask;
167 assert(refcount > 0 || refcount & kImmortalFlag);
168 return refcount != kRefIncrement &&
169 (count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) &
170 kRefcountMask) != kRefIncrement;
171 }
172
173 // Same as Decrement but expect that refcount is greater than 1.
DecrementExpectHighRefcount()174 inline bool DecrementExpectHighRefcount() {
175 int32_t refcount =
176 count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) &
177 kRefcountMask;
178 assert(refcount > 0 || refcount & kImmortalFlag);
179 return refcount != kRefIncrement;
180 }
181
182 // Returns the current reference count using acquire semantics.
Get()183 inline size_t Get() const {
184 return static_cast<size_t>(count_.load(std::memory_order_acquire) >>
185 kNumFlags);
186 }
187
188 // Returns whether the atomic integer is 1.
189 // If the reference count is used in the conventional way, a
190 // reference count of 1 implies that the current thread owns the
191 // reference and no other thread shares it.
192 // This call performs the test for a reference count of one, and
193 // performs the memory barrier needed for the owning thread
194 // to act on the object, knowing that it has exclusive access to the
195 // object. Always returns false when the immortal bit is set.
IsOne()196 inline bool IsOne() {
197 return (count_.load(std::memory_order_acquire) & kRefcountMask) ==
198 kRefIncrement;
199 }
200
IsImmortal()201 bool IsImmortal() const {
202 return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0;
203 }
204
205 private:
206 // We reserve the bottom bits for flags.
207 // kImmortalBit indicates that this entity should never be collected; it is
208 // used for the StringConstant constructor to avoid collecting immutable
209 // constant cords.
210 // kReservedFlag is reserved for future use.
211 enum Flags {
212 kNumFlags = 2,
213
214 kImmortalFlag = 0x1,
215 kReservedFlag = 0x2,
216 kRefIncrement = (1 << kNumFlags),
217
218 // Bitmask to use when checking refcount by equality. This masks out
219 // all flags except kImmortalFlag, which is part of the refcount for
220 // purposes of equality. (A refcount of 0 or 1 does not count as 0 or 1
221 // if the immortal bit is set.)
222 kRefcountMask = ~kReservedFlag,
223 };
224
225 std::atomic<int32_t> count_;
226 };
227
228 // Various representations that we allow
229 enum CordRepKind {
230 UNUSED_0 = 0,
231 SUBSTRING = 1,
232 CRC = 2,
233 BTREE = 3,
234 RING = 4,
235 EXTERNAL = 5,
236
237 // We have different tags for different sized flat arrays,
238 // starting with FLAT, and limited to MAX_FLAT_TAG. The below values map to an
239 // allocated range of 32 bytes to 256 KB. The current granularity is:
240 // - 8 byte granularity for flat sizes in [32 - 512]
241 // - 64 byte granularity for flat sizes in (512 - 8KiB]
242 // - 4KiB byte granularity for flat sizes in (8KiB, 256 KiB]
243 // If a new tag is needed in the future, then 'FLAT' and 'MAX_FLAT_TAG' should
244 // be adjusted as well as the Tag <---> Size mapping logic so that FLAT still
245 // represents the minimum flat allocation size. (32 bytes as of now).
246 FLAT = 6,
247 MAX_FLAT_TAG = 248
248 };
249
250 // There are various locations where we want to check if some rep is a 'plain'
251 // data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we
252 // can perform this check in a single branch as 'tag >= EXTERNAL'
253 // Likewise, we have some locations where we check for 'ring or external/flat',
254 // so likewise align RING to EXTERNAL.
255 // Note that we can leave this optimization to the compiler. The compiler will
256 // DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`.
257 static_assert(RING == BTREE + 1, "BTREE and RING not consecutive");
258 static_assert(EXTERNAL == RING + 1, "BTREE and EXTERNAL not consecutive");
259 static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive");
260
261 struct CordRep {
262 // Result from an `extract edge` operation. Contains the (possibly changed)
263 // tree node as well as the extracted edge, or {tree, nullptr} if no edge
264 // could be extracted.
265 // On success, the returned `tree` value is null if `extracted` was the only
266 // data edge inside the tree, a data edge if there were only two data edges in
267 // the tree, or the (possibly new / smaller) remaining tree with the extracted
268 // data edge removed.
269 struct ExtractResult {
270 CordRep* tree;
271 CordRep* extracted;
272 };
273
274 CordRep() = default;
CordRepCordRep275 constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l)
276 : length(l), refcount(immortal), tag(EXTERNAL), storage{} {}
277
278 // The following three fields have to be less than 32 bytes since
279 // that is the smallest supported flat node size. Some code optimizations rely
280 // on the specific layout of these fields. Notably: the non-trivial field
281 // `refcount` being preceded by `length`, and being tailed by POD data
282 // members only.
283 // # LINT.IfChange
284 size_t length;
285 RefcountAndFlags refcount;
286 // If tag < FLAT, it represents CordRepKind and indicates the type of node.
287 // Otherwise, the node type is CordRepFlat and the tag is the encoded size.
288 uint8_t tag;
289
290 // `storage` provides two main purposes:
291 // - the starting point for FlatCordRep.Data() [flexible-array-member]
292 // - 3 bytes of additional storage for use by derived classes.
293 // The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores
294 // a 'depth' value in storage[0], and the (future) CordRepBtree class stores
295 // `height`, `begin` and `end` in the 3 entries. Otherwise we would need to
296 // allocate room for these in the derived class, as not all compilers reuse
297 // padding space from the base class (clang and gcc do, MSVC does not, etc)
298 uint8_t storage[3];
299 // # LINT.ThenChange(cord_rep_btree.h:copy_raw)
300
301 // Returns true if this instance's tag matches the requested type.
IsRingCordRep302 constexpr bool IsRing() const { return tag == RING; }
IsSubstringCordRep303 constexpr bool IsSubstring() const { return tag == SUBSTRING; }
IsCrcCordRep304 constexpr bool IsCrc() const { return tag == CRC; }
IsExternalCordRep305 constexpr bool IsExternal() const { return tag == EXTERNAL; }
IsFlatCordRep306 constexpr bool IsFlat() const { return tag >= FLAT; }
IsBtreeCordRep307 constexpr bool IsBtree() const { return tag == BTREE; }
308
309 inline CordRepRing* ring();
310 inline const CordRepRing* ring() const;
311 inline CordRepSubstring* substring();
312 inline const CordRepSubstring* substring() const;
313 inline CordRepCrc* crc();
314 inline const CordRepCrc* crc() const;
315 inline CordRepExternal* external();
316 inline const CordRepExternal* external() const;
317 inline CordRepFlat* flat();
318 inline const CordRepFlat* flat() const;
319 inline CordRepBtree* btree();
320 inline const CordRepBtree* btree() const;
321
322 // --------------------------------------------------------------------
323 // Memory management
324
325 // Destroys the provided `rep`.
326 static void Destroy(CordRep* rep);
327
328 // Increments the reference count of `rep`.
329 // Requires `rep` to be a non-null pointer value.
330 static inline CordRep* Ref(CordRep* rep);
331
332 // Decrements the reference count of `rep`. Destroys rep if count reaches
333 // zero. Requires `rep` to be a non-null pointer value.
334 static inline void Unref(CordRep* rep);
335 };
336
337 struct CordRepSubstring : public CordRep {
338 size_t start; // Starting offset of substring in child
339 CordRep* child;
340
341 // Creates a substring on `child`, adopting a reference on `child`.
342 // Requires `child` to be either a flat or external node, and `pos` and `n` to
343 // form a non-empty partial sub range of `'child`, i.e.:
344 // `n > 0 && n < length && n + pos <= length`
345 static inline CordRepSubstring* Create(CordRep* child, size_t pos, size_t n);
346
347 // Creates a substring of `rep`. Does not adopt a reference on `rep`.
348 // Requires `IsDataEdge(rep) && n > 0 && pos + n <= rep->length`.
349 // If `n == rep->length` then this method returns `CordRep::Ref(rep)`
350 // If `rep` is a substring of a flat or external node, then this method will
351 // return a new substring of that flat or external node with `pos` adjusted
352 // with the original `start` position.
353 static inline CordRep* Substring(CordRep* rep, size_t pos, size_t n);
354 };
355
356 // Type for function pointer that will invoke the releaser function and also
357 // delete the `CordRepExternalImpl` corresponding to the passed in
358 // `CordRepExternal`.
359 using ExternalReleaserInvoker = void (*)(CordRepExternal*);
360
361 // External CordReps are allocated together with a type erased releaser. The
362 // releaser is stored in the memory directly following the CordRepExternal.
363 struct CordRepExternal : public CordRep {
364 CordRepExternal() = default;
CordRepExternalCordRepExternal365 explicit constexpr CordRepExternal(absl::string_view str)
366 : CordRep(RefcountAndFlags::Immortal{}, str.size()),
367 base(str.data()),
368 releaser_invoker(nullptr) {}
369
370 const char* base;
371 // Pointer to function that knows how to call and destroy the releaser.
372 ExternalReleaserInvoker releaser_invoker;
373
374 // Deletes (releases) the external rep.
375 // Requires rep != nullptr and rep->IsExternal()
376 static void Delete(CordRep* rep);
377 };
378
379 struct Rank1 {};
380 struct Rank0 : Rank1 {};
381
382 template <typename Releaser, typename = ::absl::base_internal::invoke_result_t<
383 Releaser, absl::string_view>>
InvokeReleaser(Rank0,Releaser && releaser,absl::string_view data)384 void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view data) {
385 ::absl::base_internal::invoke(std::forward<Releaser>(releaser), data);
386 }
387
388 template <typename Releaser,
389 typename = ::absl::base_internal::invoke_result_t<Releaser>>
InvokeReleaser(Rank1,Releaser && releaser,absl::string_view)390 void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view) {
391 ::absl::base_internal::invoke(std::forward<Releaser>(releaser));
392 }
393
394 // We use CompressedTuple so that we can benefit from EBCO.
395 template <typename Releaser>
396 struct CordRepExternalImpl
397 : public CordRepExternal,
398 public ::absl::container_internal::CompressedTuple<Releaser> {
399 // The extra int arg is so that we can avoid interfering with copy/move
400 // constructors while still benefitting from perfect forwarding.
401 template <typename T>
CordRepExternalImplCordRepExternalImpl402 CordRepExternalImpl(T&& releaser, int)
403 : CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) {
404 this->releaser_invoker = &Release;
405 }
406
~CordRepExternalImplCordRepExternalImpl407 ~CordRepExternalImpl() {
408 InvokeReleaser(Rank0{}, std::move(this->template get<0>()),
409 absl::string_view(base, length));
410 }
411
ReleaseCordRepExternalImpl412 static void Release(CordRepExternal* rep) {
413 delete static_cast<CordRepExternalImpl*>(rep);
414 }
415 };
416
Create(CordRep * child,size_t pos,size_t n)417 inline CordRepSubstring* CordRepSubstring::Create(CordRep* child, size_t pos,
418 size_t n) {
419 assert(child != nullptr);
420 assert(n > 0);
421 assert(n < child->length);
422 assert(pos < child->length);
423 assert(n <= child->length - pos);
424
425 // TODO(b/217376272): Harden internal logic.
426 // Move to strategical places inside the Cord logic and make this an assert.
427 if (ABSL_PREDICT_FALSE(!(child->IsExternal() || child->IsFlat()))) {
428 LogFatalNodeType(child);
429 }
430
431 CordRepSubstring* rep = new CordRepSubstring();
432 rep->length = n;
433 rep->tag = SUBSTRING;
434 rep->start = pos;
435 rep->child = child;
436 return rep;
437 }
438
Substring(CordRep * rep,size_t pos,size_t n)439 inline CordRep* CordRepSubstring::Substring(CordRep* rep, size_t pos,
440 size_t n) {
441 assert(rep != nullptr);
442 assert(n != 0);
443 assert(pos < rep->length);
444 assert(n <= rep->length - pos);
445 if (n == rep->length) return CordRep::Ref(rep);
446 if (rep->IsSubstring()) {
447 pos += rep->substring()->start;
448 rep = rep->substring()->child;
449 }
450 CordRepSubstring* substr = new CordRepSubstring();
451 substr->length = n;
452 substr->tag = SUBSTRING;
453 substr->start = pos;
454 substr->child = CordRep::Ref(rep);
455 return substr;
456 }
457
Delete(CordRep * rep)458 inline void CordRepExternal::Delete(CordRep* rep) {
459 assert(rep != nullptr && rep->IsExternal());
460 auto* rep_external = static_cast<CordRepExternal*>(rep);
461 assert(rep_external->releaser_invoker != nullptr);
462 rep_external->releaser_invoker(rep_external);
463 }
464
465 template <typename Str>
466 struct ConstInitExternalStorage {
467 ABSL_CONST_INIT static CordRepExternal value;
468 };
469
470 template <typename Str>
471 ABSL_CONST_INIT CordRepExternal
472 ConstInitExternalStorage<Str>::value(Str::value);
473
474 enum {
475 kMaxInline = 15,
476 };
477
GetOrNull(absl::string_view data,size_t pos)478 constexpr char GetOrNull(absl::string_view data, size_t pos) {
479 return pos < data.size() ? data[pos] : '\0';
480 }
481
482 // We store cordz_info as 64 bit pointer value in little endian format. This
483 // guarantees that the least significant byte of cordz_info matches the first
484 // byte of the inline data representation in `data`, which holds the inlined
485 // size or the 'is_tree' bit.
486 using cordz_info_t = int64_t;
487
488 // Assert that the `cordz_info` pointer value perfectly overlaps the last half
489 // of `data` and can hold a pointer value.
490 static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, "");
491 static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), "");
492
493 // LittleEndianByte() creates a little endian representation of 'value', i.e.:
494 // a little endian value where the first byte in the host's representation
495 // holds 'value`, with all other bytes being 0.
LittleEndianByte(unsigned char value)496 static constexpr cordz_info_t LittleEndianByte(unsigned char value) {
497 #if defined(ABSL_IS_BIG_ENDIAN)
498 return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8);
499 #else
500 return value;
501 #endif
502 }
503
504 class InlineData {
505 public:
506 // DefaultInitType forces the use of the default initialization constructor.
507 enum DefaultInitType { kDefaultInit };
508
509 // kNullCordzInfo holds the little endian representation of intptr_t(1)
510 // This is the 'null' / initial value of 'cordz_info'. The null value
511 // is specifically big endian 1 as with 64-bit pointers, the last
512 // byte of cordz_info overlaps with the last byte holding the tag.
513 static constexpr cordz_info_t kNullCordzInfo = LittleEndianByte(1);
514
515 // kTagOffset contains the offset of the control byte / tag. This constant is
516 // intended mostly for debugging purposes: do not remove this constant as it
517 // is actively inspected and used by gdb pretty printing code.
518 static constexpr size_t kTagOffset = 0;
519
520 // Implement `~InlineData()` conditionally: we only need this destructor to
521 // unpoison poisoned instances under *SAN, and it will only compile correctly
522 // if the current compiler supports `absl::is_constant_evaluated()`.
523 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
~InlineData()524 ~InlineData() noexcept { unpoison(); }
525 #endif
526
InlineData()527 constexpr InlineData() noexcept { poison_this(); }
528
InlineData(DefaultInitType)529 explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) {
530 poison_this();
531 }
532
InlineData(CordRep * rep)533 explicit InlineData(CordRep* rep) noexcept : rep_(rep) {
534 ABSL_ASSERT(rep != nullptr);
535 }
536
537 // Explicit constexpr constructor to create a constexpr InlineData
538 // value. Creates an inlined SSO value if `rep` is null, otherwise
539 // creates a tree instance value.
InlineData(absl::string_view sv,CordRep * rep)540 constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept
541 : rep_(rep ? Rep(rep) : Rep(sv)) {
542 poison();
543 }
544
545 constexpr InlineData(const InlineData& rhs) noexcept;
546 InlineData& operator=(const InlineData& rhs) noexcept;
547
548 friend bool operator==(const InlineData& lhs, const InlineData& rhs) {
549 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
550 const Rep l = lhs.rep_.SanitizerSafeCopy();
551 const Rep r = rhs.rep_.SanitizerSafeCopy();
552 return memcmp(&l, &r, sizeof(l)) == 0;
553 #else
554 return memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
555 #endif
556 }
557 friend bool operator!=(const InlineData& lhs, const InlineData& rhs) {
558 return !operator==(lhs, rhs);
559 }
560
561 // Poisons the unused inlined SSO data if the current instance
562 // is inlined, else un-poisons the entire instance.
563 constexpr void poison();
564
565 // Un-poisons this instance.
566 constexpr void unpoison();
567
568 // Poisons the current instance. This is used on default initialization.
569 constexpr void poison_this();
570
571 // Returns true if the current instance is empty.
572 // The 'empty value' is an inlined data value of zero length.
is_empty()573 bool is_empty() const { return rep_.tag() == 0; }
574
575 // Returns true if the current instance holds a tree value.
is_tree()576 bool is_tree() const { return (rep_.tag() & 1) != 0; }
577
578 // Returns true if the current instance holds a cordz_info value.
579 // Requires the current instance to hold a tree value.
is_profiled()580 bool is_profiled() const {
581 assert(is_tree());
582 return rep_.cordz_info() != kNullCordzInfo;
583 }
584
585 // Returns true if either of the provided instances hold a cordz_info value.
586 // This method is more efficient than the equivalent `data1.is_profiled() ||
587 // data2.is_profiled()`. Requires both arguments to hold a tree.
is_either_profiled(const InlineData & data1,const InlineData & data2)588 static bool is_either_profiled(const InlineData& data1,
589 const InlineData& data2) {
590 assert(data1.is_tree() && data2.is_tree());
591 return (data1.rep_.cordz_info() | data2.rep_.cordz_info()) !=
592 kNullCordzInfo;
593 }
594
595 // Returns the cordz_info sampling instance for this instance, or nullptr
596 // if the current instance is not sampled and does not have CordzInfo data.
597 // Requires the current instance to hold a tree value.
cordz_info()598 CordzInfo* cordz_info() const {
599 assert(is_tree());
600 intptr_t info = static_cast<intptr_t>(absl::little_endian::ToHost64(
601 static_cast<uint64_t>(rep_.cordz_info())));
602 assert(info & 1);
603 return reinterpret_cast<CordzInfo*>(info - 1);
604 }
605
606 // Sets the current cordz_info sampling instance for this instance, or nullptr
607 // if the current instance is not sampled and does not have CordzInfo data.
608 // Requires the current instance to hold a tree value.
set_cordz_info(CordzInfo * cordz_info)609 void set_cordz_info(CordzInfo* cordz_info) {
610 assert(is_tree());
611 uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1;
612 rep_.set_cordz_info(
613 static_cast<cordz_info_t>(absl::little_endian::FromHost64(info)));
614 }
615
616 // Resets the current cordz_info to null / empty.
clear_cordz_info()617 void clear_cordz_info() {
618 assert(is_tree());
619 rep_.set_cordz_info(kNullCordzInfo);
620 }
621
622 // Returns a read only pointer to the character data inside this instance.
623 // Requires the current instance to hold inline data.
as_chars()624 const char* as_chars() const {
625 assert(!is_tree());
626 return rep_.as_chars();
627 }
628
629 // Returns a mutable pointer to the character data inside this instance.
630 // Should be used for 'write only' operations setting an inlined value.
631 // Applications can set the value of inlined data either before or after
632 // setting the inlined size, i.e., both of the below are valid:
633 //
634 // // Set inlined data and inline size
635 // memcpy(data_.as_chars(), data, size);
636 // data_.set_inline_size(size);
637 //
638 // // Set inlined size and inline data
639 // data_.set_inline_size(size);
640 // memcpy(data_.as_chars(), data, size);
641 //
642 // It's an error to read from the returned pointer without a preceding write
643 // if the current instance does not hold inline data, i.e.: is_tree() == true.
as_chars()644 char* as_chars() { return rep_.as_chars(); }
645
646 // Returns the tree value of this value.
647 // Requires the current instance to hold a tree value.
as_tree()648 CordRep* as_tree() const {
649 assert(is_tree());
650 return rep_.tree();
651 }
652
set_inline_data(const char * data,size_t n)653 void set_inline_data(const char* data, size_t n) {
654 ABSL_ASSERT(n <= kMaxInline);
655 unpoison();
656 rep_.set_tag(static_cast<int8_t>(n << 1));
657 SmallMemmove<true>(rep_.as_chars(), data, n);
658 poison();
659 }
660
copy_max_inline_to(char * dst)661 void copy_max_inline_to(char* dst) const {
662 assert(!is_tree());
663 memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline);
664 }
665
666 // Initialize this instance to holding the tree value `rep`,
667 // initializing the cordz_info to null, i.e.: 'not profiled'.
make_tree(CordRep * rep)668 void make_tree(CordRep* rep) {
669 unpoison();
670 rep_.make_tree(rep);
671 }
672
673 // Set the tree value of this instance to 'rep`.
674 // Requires the current instance to already hold a tree value.
675 // Does not affect the value of cordz_info.
set_tree(CordRep * rep)676 void set_tree(CordRep* rep) {
677 assert(is_tree());
678 rep_.set_tree(rep);
679 }
680
681 // Returns the size of the inlined character data inside this instance.
682 // Requires the current instance to hold inline data.
inline_size()683 size_t inline_size() const { return rep_.inline_size(); }
684
685 // Sets the size of the inlined character data inside this instance.
686 // Requires `size` to be <= kMaxInline.
687 // See the documentation on 'as_chars()' for more information and examples.
set_inline_size(size_t size)688 void set_inline_size(size_t size) {
689 unpoison();
690 rep_.set_inline_size(size);
691 poison();
692 }
693
694 // Compares 'this' inlined data with rhs. The comparison is a straightforward
695 // lexicographic comparison. `Compare()` returns values as follows:
696 //
697 // -1 'this' InlineData instance is smaller
698 // 0 the InlineData instances are equal
699 // 1 'this' InlineData instance larger
Compare(const InlineData & rhs)700 int Compare(const InlineData& rhs) const {
701 return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy());
702 }
703
704 private:
705 struct Rep {
706 // See cordz_info_t for forced alignment and size of `cordz_info` details.
707 struct AsTree {
AsTreeRep::AsTree708 explicit constexpr AsTree(absl::cord_internal::CordRep* tree)
709 : rep(tree) {}
710 cordz_info_t cordz_info = kNullCordzInfo;
711 absl::cord_internal::CordRep* rep;
712 };
713
RepRep714 explicit Rep(DefaultInitType) {}
RepRep715 constexpr Rep() : data{0} {}
716 constexpr Rep(const Rep&) = default;
717 constexpr Rep& operator=(const Rep&) = default;
718
RepRep719 explicit constexpr Rep(CordRep* rep) : as_tree(rep) {}
720
RepRep721 explicit constexpr Rep(absl::string_view chars)
722 : data{static_cast<char>((chars.size() << 1)),
723 GetOrNull(chars, 0),
724 GetOrNull(chars, 1),
725 GetOrNull(chars, 2),
726 GetOrNull(chars, 3),
727 GetOrNull(chars, 4),
728 GetOrNull(chars, 5),
729 GetOrNull(chars, 6),
730 GetOrNull(chars, 7),
731 GetOrNull(chars, 8),
732 GetOrNull(chars, 9),
733 GetOrNull(chars, 10),
734 GetOrNull(chars, 11),
735 GetOrNull(chars, 12),
736 GetOrNull(chars, 13),
737 GetOrNull(chars, 14)} {}
738
739 // Disable sanitizer as we must always be able to read `tag`.
740 ABSL_CORD_INTERNAL_NO_SANITIZE
tagRep741 int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; }
set_tagRep742 void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(this)[0] = rhs; }
743
as_charsRep744 char* as_chars() { return data + 1; }
as_charsRep745 const char* as_chars() const { return data + 1; }
746
is_treeRep747 bool is_tree() const { return (tag() & 1) != 0; }
748
inline_sizeRep749 size_t inline_size() const {
750 ABSL_ASSERT(!is_tree());
751 return static_cast<size_t>(tag()) >> 1;
752 }
753
set_inline_sizeRep754 void set_inline_size(size_t size) {
755 ABSL_ASSERT(size <= kMaxInline);
756 set_tag(static_cast<int8_t>(size << 1));
757 }
758
treeRep759 CordRep* tree() const { return as_tree.rep; }
set_treeRep760 void set_tree(CordRep* rhs) { as_tree.rep = rhs; }
761
cordz_infoRep762 cordz_info_t cordz_info() const { return as_tree.cordz_info; }
set_cordz_infoRep763 void set_cordz_info(cordz_info_t rhs) { as_tree.cordz_info = rhs; }
764
make_treeRep765 void make_tree(CordRep* tree) {
766 as_tree.rep = tree;
767 as_tree.cordz_info = kNullCordzInfo;
768 }
769
770 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
SanitizerSafeCopyRep771 constexpr Rep SanitizerSafeCopy() const {
772 if (!absl::is_constant_evaluated()) {
773 Rep res;
774 if (is_tree()) {
775 res = *this;
776 } else {
777 res.set_tag(tag());
778 memcpy(res.as_chars(), as_chars(), inline_size());
779 }
780 return res;
781 } else {
782 return *this;
783 }
784 }
785 #else
SanitizerSafeCopyRep786 constexpr const Rep& SanitizerSafeCopy() const { return *this; }
787 #endif
788
789 // If the data has length <= kMaxInline, we store it in `data`, and
790 // store the size in the first char of `data` shifted left + 1.
791 // Else we store it in a tree and store a pointer to that tree in
792 // `as_tree.rep` with a tagged pointer to make `tag() & 1` non zero.
793 union {
794 char data[kMaxInline + 1];
795 AsTree as_tree;
796 };
797 };
798
799 // Private implementation of `Compare()`
Compare(const Rep & lhs,const Rep & rhs)800 static inline int Compare(const Rep& lhs, const Rep& rhs) {
801 uint64_t x, y;
802 memcpy(&x, lhs.as_chars(), sizeof(x));
803 memcpy(&y, rhs.as_chars(), sizeof(y));
804 if (x == y) {
805 memcpy(&x, lhs.as_chars() + 7, sizeof(x));
806 memcpy(&y, rhs.as_chars() + 7, sizeof(y));
807 if (x == y) {
808 if (lhs.inline_size() == rhs.inline_size()) return 0;
809 return lhs.inline_size() < rhs.inline_size() ? -1 : 1;
810 }
811 }
812 x = absl::big_endian::FromHost64(x);
813 y = absl::big_endian::FromHost64(y);
814 return x < y ? -1 : 1;
815 }
816
817 Rep rep_;
818 };
819
820 static_assert(sizeof(InlineData) == kMaxInline + 1, "");
821
822 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
823
InlineData(const InlineData & rhs)824 constexpr InlineData::InlineData(const InlineData& rhs) noexcept
825 : rep_(rhs.rep_.SanitizerSafeCopy()) {
826 poison();
827 }
828
829 inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept {
830 unpoison();
831 rep_ = rhs.rep_.SanitizerSafeCopy();
832 poison();
833 return *this;
834 }
835
poison_this()836 constexpr void InlineData::poison_this() {
837 if (!absl::is_constant_evaluated()) {
838 container_internal::SanitizerPoisonObject(this);
839 }
840 }
841
unpoison()842 constexpr void InlineData::unpoison() {
843 if (!absl::is_constant_evaluated()) {
844 container_internal::SanitizerUnpoisonObject(this);
845 }
846 }
847
poison()848 constexpr void InlineData::poison() {
849 if (!absl::is_constant_evaluated()) {
850 if (is_tree()) {
851 container_internal::SanitizerUnpoisonObject(this);
852 } else if (const size_t size = inline_size()) {
853 if (size < kMaxInline) {
854 const char* end = rep_.as_chars() + size;
855 container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size);
856 }
857 } else {
858 container_internal::SanitizerPoisonObject(this);
859 }
860 }
861 }
862
863 #else // ABSL_INTERNAL_CORD_HAVE_SANITIZER
864
865 constexpr InlineData::InlineData(const InlineData&) noexcept = default;
866 inline InlineData& InlineData::operator=(const InlineData&) noexcept = default;
867
poison_this()868 constexpr void InlineData::poison_this() {}
unpoison()869 constexpr void InlineData::unpoison() {}
poison()870 constexpr void InlineData::poison() {}
871
872 #endif // ABSL_INTERNAL_CORD_HAVE_SANITIZER
873
substring()874 inline CordRepSubstring* CordRep::substring() {
875 assert(IsSubstring());
876 return static_cast<CordRepSubstring*>(this);
877 }
878
substring()879 inline const CordRepSubstring* CordRep::substring() const {
880 assert(IsSubstring());
881 return static_cast<const CordRepSubstring*>(this);
882 }
883
external()884 inline CordRepExternal* CordRep::external() {
885 assert(IsExternal());
886 return static_cast<CordRepExternal*>(this);
887 }
888
external()889 inline const CordRepExternal* CordRep::external() const {
890 assert(IsExternal());
891 return static_cast<const CordRepExternal*>(this);
892 }
893
Ref(CordRep * rep)894 inline CordRep* CordRep::Ref(CordRep* rep) {
895 // ABSL_ASSUME is a workaround for
896 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105585
897 ABSL_ASSUME(rep != nullptr);
898 rep->refcount.Increment();
899 return rep;
900 }
901
Unref(CordRep * rep)902 inline void CordRep::Unref(CordRep* rep) {
903 assert(rep != nullptr);
904 // Expect refcount to be 0. Avoiding the cost of an atomic decrement should
905 // typically outweigh the cost of an extra branch checking for ref == 1.
906 if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) {
907 Destroy(rep);
908 }
909 }
910
911 } // namespace cord_internal
912
913 ABSL_NAMESPACE_END
914 } // namespace absl
915 #endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
916