1 // Copyright 2020 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_
16 #define ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_
17 
18 #include <cassert>
19 #include <cstddef>
20 #include <cstdint>
21 #include <memory>
22 
23 #include "absl/base/config.h"
24 #include "absl/base/macros.h"
25 #include "absl/strings/internal/cord_internal.h"
26 
27 namespace absl {
28 ABSL_NAMESPACE_BEGIN
29 namespace cord_internal {
30 
31 // Note: all constants below are never ODR used and internal to cord, we define
32 // these as static constexpr to avoid 'in struct' definition and usage clutter.
33 
34 // Largest and smallest flat node lengths we are willing to allocate
35 // Flat allocation size is stored in tag, which currently can encode sizes up
36 // to 4K, encoded as multiple of either 8 or 32 bytes.
37 // If we allow for larger sizes, we need to change this to 8/64, 16/128, etc.
38 // kMinFlatSize is bounded by tag needing to be at least FLAT * 8 bytes, and
39 // ideally a 'nice' size aligning with allocation and cacheline sizes like 32.
40 // kMaxFlatSize is bounded by the size resulting in a computed tag no greater
41 // than MAX_FLAT_TAG. MAX_FLAT_TAG provides for additional 'high' tag values.
42 static constexpr size_t kFlatOverhead = offsetof(CordRep, storage);
43 static constexpr size_t kMinFlatSize = 32;
44 static constexpr size_t kMaxFlatSize = 4096;
45 static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead;
46 static constexpr size_t kMinFlatLength = kMinFlatSize - kFlatOverhead;
47 static constexpr size_t kMaxLargeFlatSize = 256 * 1024;
48 static constexpr size_t kMaxLargeFlatLength = kMaxLargeFlatSize - kFlatOverhead;
49 
50 // kTagBase should make the Size <--> Tag computation resilient
51 // against changes to the value of FLAT when we add a new tag..
52 static constexpr uint8_t kTagBase = FLAT - 4;
53 
54 // Converts the provided rounded size to the corresponding tag
AllocatedSizeToTagUnchecked(size_t size)55 constexpr uint8_t AllocatedSizeToTagUnchecked(size_t size) {
56   return static_cast<uint8_t>(size <= 512 ? kTagBase + size / 8
57                               : size <= 8192
58                                   ? kTagBase + 512 / 8 + size / 64 - 512 / 64
59                                   : kTagBase + 512 / 8 + ((8192 - 512) / 64) +
60                                         size / 4096 - 8192 / 4096);
61 }
62 
63 // Converts the provided tag to the corresponding allocated size
TagToAllocatedSize(uint8_t tag)64 constexpr size_t TagToAllocatedSize(uint8_t tag) {
65   return (tag <= kTagBase + 512 / 8) ? tag * 8 - kTagBase * 8
66          : (tag <= kTagBase + (512 / 8) + ((8192 - 512) / 64))
67              ? 512 + tag * 64 - kTagBase * 64 - 512 / 8 * 64
68              : 8192 + tag * 4096 - kTagBase * 4096 -
69                    ((512 / 8) + ((8192 - 512) / 64)) * 4096;
70 }
71 
72 static_assert(AllocatedSizeToTagUnchecked(kMinFlatSize) == FLAT, "");
73 static_assert(AllocatedSizeToTagUnchecked(kMaxLargeFlatSize) == MAX_FLAT_TAG,
74               "");
75 
76 // RoundUp logically performs `((n + m - 1) / m) * m` to round up to the nearest
77 // multiple of `m`, optimized for the invariant that `m` is a power of 2.
RoundUp(size_t n,size_t m)78 constexpr size_t RoundUp(size_t n, size_t m) {
79   return (n + m - 1) & (0 - m);
80 }
81 
82 // Returns the size to the nearest equal or larger value that can be
83 // expressed exactly as a tag value.
RoundUpForTag(size_t size)84 inline size_t RoundUpForTag(size_t size) {
85   return RoundUp(size, (size <= 512) ? 8 : (size <= 8192 ? 64 : 4096));
86 }
87 
88 // Converts the allocated size to a tag, rounding down if the size
89 // does not exactly match a 'tag expressible' size value. The result is
90 // undefined if the size exceeds the maximum size that can be encoded in
91 // a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>).
AllocatedSizeToTag(size_t size)92 inline uint8_t AllocatedSizeToTag(size_t size) {
93   const uint8_t tag = AllocatedSizeToTagUnchecked(size);
94   assert(tag <= MAX_FLAT_TAG);
95   return tag;
96 }
97 
98 // Converts the provided tag to the corresponding available data length
TagToLength(uint8_t tag)99 constexpr size_t TagToLength(uint8_t tag) {
100   return TagToAllocatedSize(tag) - kFlatOverhead;
101 }
102 
103 // Enforce that kMaxFlatSize maps to a well-known exact tag value.
104 static_assert(TagToAllocatedSize(MAX_FLAT_TAG) == kMaxLargeFlatSize,
105               "Bad tag logic");
106 
107 struct CordRepFlat : public CordRep {
108   // Tag for explicit 'large flat' allocation
109   struct Large {};
110 
111   // Creates a new flat node.
112   template <size_t max_flat_size, typename... Args>
NewImplCordRepFlat113   static CordRepFlat* NewImpl(size_t len, Args... args ABSL_ATTRIBUTE_UNUSED) {
114     if (len <= kMinFlatLength) {
115       len = kMinFlatLength;
116     } else if (len > max_flat_size - kFlatOverhead) {
117       len = max_flat_size - kFlatOverhead;
118     }
119 
120     // Round size up so it matches a size we can exactly express in a tag.
121     const size_t size = RoundUpForTag(len + kFlatOverhead);
122     void* const raw_rep = ::operator new(size);
123     CordRepFlat* rep = new (raw_rep) CordRepFlat();
124     rep->tag = AllocatedSizeToTag(size);
125     return rep;
126   }
127 
NewCordRepFlat128   static CordRepFlat* New(size_t len) { return NewImpl<kMaxFlatSize>(len); }
129 
NewCordRepFlat130   static CordRepFlat* New(Large, size_t len) {
131     return NewImpl<kMaxLargeFlatSize>(len);
132   }
133 
134   // Deletes a CordRepFlat instance created previously through a call to New().
135   // Flat CordReps are allocated and constructed with raw ::operator new and
136   // placement new, and must be destructed and deallocated accordingly.
DeleteCordRepFlat137   static void Delete(CordRep*rep) {
138     assert(rep->tag >= FLAT && rep->tag <= MAX_FLAT_TAG);
139 
140 #if defined(__cpp_sized_deallocation)
141     size_t size = TagToAllocatedSize(rep->tag);
142     rep->~CordRep();
143     ::operator delete(rep, size);
144 #else
145     rep->~CordRep();
146     ::operator delete(rep);
147 #endif
148   }
149 
150   // Create a CordRepFlat containing `data`, with an optional additional
151   // extra capacity of up to `extra` bytes. Requires that `data.size()`
152   // is less than kMaxFlatLength.
153   static CordRepFlat* Create(absl::string_view data, size_t extra = 0) {
154     assert(data.size() <= kMaxFlatLength);
155     CordRepFlat* flat = New(data.size() + (std::min)(extra, kMaxFlatLength));
156     memcpy(flat->Data(), data.data(), data.size());
157     flat->length = data.size();
158     return flat;
159   }
160 
161   // Returns a pointer to the data inside this flat rep.
DataCordRepFlat162   char* Data() { return reinterpret_cast<char*>(storage); }
DataCordRepFlat163   const char* Data() const { return reinterpret_cast<const char*>(storage); }
164 
165   // Returns the maximum capacity (payload size) of this instance.
CapacityCordRepFlat166   size_t Capacity() const { return TagToLength(tag); }
167 
168   // Returns the allocated size (payload + overhead) of this instance.
AllocatedSizeCordRepFlat169   size_t AllocatedSize() const { return TagToAllocatedSize(tag); }
170 };
171 
172 // Now that CordRepFlat is defined, we can define CordRep's helper casts:
flat()173 inline CordRepFlat* CordRep::flat() {
174   assert(tag >= FLAT && tag <= MAX_FLAT_TAG);
175   return reinterpret_cast<CordRepFlat*>(this);
176 }
177 
flat()178 inline const CordRepFlat* CordRep::flat() const {
179   assert(tag >= FLAT && tag <= MAX_FLAT_TAG);
180   return reinterpret_cast<const CordRepFlat*>(this);
181 }
182 
183 }  // namespace cord_internal
184 ABSL_NAMESPACE_END
185 }  // namespace absl
186 
187 #endif  // ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_
188