1 // Copyright 2022 The Abseil Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_ 16 #define ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_ 17 18 #include <atomic> 19 #include <cstddef> 20 #include <deque> 21 22 #include "absl/base/config.h" 23 #include "absl/crc/crc32c.h" 24 25 namespace absl { 26 ABSL_NAMESPACE_BEGIN 27 namespace crc_internal { 28 29 // CrcCordState is a copy-on-write class that holds the chunked CRC32C data 30 // that allows CrcCord to perform efficient substring operations. CrcCordState 31 // is used as a member variable in CrcCord. When a CrcCord is converted to a 32 // Cord, the CrcCordState is shallow-copied into the root node of the Cord. If 33 // the converted Cord is modified outside of CrcCord, the CrcCordState is 34 // discarded from the Cord. If the Cord is converted back to a CrcCord, and the 35 // Cord is still carrying the CrcCordState in its root node, the CrcCord can 36 // re-use the CrcCordState, making the construction of the CrcCord cheap. 37 // 38 // CrcCordState does not try to encapsulate the CRC32C state (CrcCord requires 39 // knowledge of how CrcCordState represents the CRC32C state). It does 40 // encapsulate the copy-on-write nature of the state. 41 class CrcCordState { 42 public: 43 // Constructors. 44 CrcCordState(); 45 CrcCordState(const CrcCordState&); 46 CrcCordState(CrcCordState&&); 47 48 // Destructor. Atomically unreferences the data. 49 ~CrcCordState(); 50 51 // Copy and move operators. 52 CrcCordState& operator=(const CrcCordState&); 53 CrcCordState& operator=(CrcCordState&&); 54 55 // A (length, crc) pair. 56 struct PrefixCrc { 57 PrefixCrc() = default; PrefixCrcPrefixCrc58 PrefixCrc(size_t length_arg, absl::crc32c_t crc_arg) 59 : length(length_arg), crc(crc_arg) {} 60 61 size_t length = 0; 62 63 // TODO(absl-team): Memory stomping often zeros out memory. If this struct 64 // gets overwritten, we could end up with {0, 0}, which is the correct CRC 65 // for a string of length 0. Consider storing a scrambled value and 66 // unscrambling it before verifying it. 67 absl::crc32c_t crc = absl::crc32c_t{0}; 68 }; 69 70 // The representation of the chunked CRC32C data. 71 struct Rep { 72 // `removed_prefix` is the crc and length of any prefix that has been 73 // removed from the Cord (for example, by calling 74 // `CrcCord::RemovePrefix()`). To get the checkum of any prefix of the cord, 75 // this value must be subtracted from `prefix_crc`. See `Checksum()` for an 76 // example. 77 // 78 // CrcCordState is said to be "normalized" if removed_prefix.length == 0. 79 PrefixCrc removed_prefix; 80 81 // A deque of (length, crc) pairs, representing length and crc of a prefix 82 // of the Cord, before removed_prefix has been subtracted. The lengths of 83 // the prefixes are stored in increasing order. If the Cord is not empty, 84 // the last value in deque is the contains the CRC32C of the entire Cord 85 // when removed_prefix is subtracted from it. 86 std::deque<PrefixCrc> prefix_crc; 87 }; 88 89 // Returns a reference to the representation of the chunked CRC32C data. rep()90 const Rep& rep() const { return refcounted_rep_->rep; } 91 92 // Returns a mutable reference to the representation of the chunked CRC32C 93 // data. Calling this function will copy the data if another instance also 94 // holds a reference to the data, so it is important to call rep() instead if 95 // the data may not be mutated. mutable_rep()96 Rep* mutable_rep() { 97 if (refcounted_rep_->count.load(std::memory_order_acquire) != 1) { 98 RefcountedRep* copy = new RefcountedRep; 99 copy->rep = refcounted_rep_->rep; 100 Unref(refcounted_rep_); 101 refcounted_rep_ = copy; 102 } 103 return &refcounted_rep_->rep; 104 } 105 106 // Returns the CRC32C of the entire Cord. 107 absl::crc32c_t Checksum() const; 108 109 // Returns true if the chunked CRC32C cached is normalized. IsNormalized()110 bool IsNormalized() const { return rep().removed_prefix.length == 0; } 111 112 // Normalizes the chunked CRC32C checksum cache by substracting any removed 113 // prefix from the chunks. 114 void Normalize(); 115 116 // Returns the number of cached chunks. NumChunks()117 size_t NumChunks() const { return rep().prefix_crc.size(); } 118 119 // Helper that returns the (length, crc) of the `n`-th cached chunked. 120 PrefixCrc NormalizedPrefixCrcAtNthChunk(size_t n) const; 121 122 // Poisons all chunks to so that Checksum() will likely be incorrect with high 123 // probability. 124 void Poison(); 125 126 private: 127 struct RefcountedRep { 128 std::atomic<int32_t> count{1}; 129 Rep rep; 130 }; 131 132 // Adds a reference to the shared global empty `RefcountedRep`, and returns a 133 // pointer to the `RefcountedRep`. This is an optimization to avoid unneeded 134 // allocations when the allocation is unlikely to ever be used. The returned 135 // pointer can be `Unref()`ed when it is no longer needed. Since the returned 136 // instance will always have a reference counter greater than 1, attempts to 137 // modify it (by calling `mutable_rep()`) will create a new unshared copy. 138 static RefcountedRep* RefSharedEmptyRep(); 139 Ref(RefcountedRep * r)140 static void Ref(RefcountedRep* r) { 141 assert(r != nullptr); 142 r->count.fetch_add(1, std::memory_order_relaxed); 143 } 144 Unref(RefcountedRep * r)145 static void Unref(RefcountedRep* r) { 146 assert(r != nullptr); 147 if (r->count.fetch_sub(1, std::memory_order_acq_rel) == 1) { 148 delete r; 149 } 150 } 151 152 RefcountedRep* refcounted_rep_; 153 }; 154 155 } // namespace crc_internal 156 ABSL_NAMESPACE_END 157 } // namespace absl 158 159 #endif // ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_ 160