1 // Copyright 2022 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_
16 #define ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_
17 
18 #include <atomic>
19 #include <cstddef>
20 #include <deque>
21 
22 #include "absl/base/config.h"
23 #include "absl/crc/crc32c.h"
24 
25 namespace absl {
26 ABSL_NAMESPACE_BEGIN
27 namespace crc_internal {
28 
29 // CrcCordState is a copy-on-write class that holds the chunked CRC32C data
30 // that allows CrcCord to perform efficient substring operations. CrcCordState
31 // is used as a member variable in CrcCord. When a CrcCord is converted to a
32 // Cord, the CrcCordState is shallow-copied into the root node of the Cord. If
33 // the converted Cord is modified outside of CrcCord, the CrcCordState is
34 // discarded from the Cord. If the Cord is converted back to a CrcCord, and the
35 // Cord is still carrying the CrcCordState in its root node, the CrcCord can
36 // re-use the CrcCordState, making the construction of the CrcCord cheap.
37 //
38 // CrcCordState does not try to encapsulate the CRC32C state (CrcCord requires
39 // knowledge of how CrcCordState represents the CRC32C state). It does
40 // encapsulate the copy-on-write nature of the state.
41 class CrcCordState {
42  public:
43   // Constructors.
44   CrcCordState();
45   CrcCordState(const CrcCordState&);
46   CrcCordState(CrcCordState&&);
47 
48   // Destructor. Atomically unreferences the data.
49   ~CrcCordState();
50 
51   // Copy and move operators.
52   CrcCordState& operator=(const CrcCordState&);
53   CrcCordState& operator=(CrcCordState&&);
54 
55   // A (length, crc) pair.
56   struct PrefixCrc {
57     PrefixCrc() = default;
PrefixCrcPrefixCrc58     PrefixCrc(size_t length_arg, absl::crc32c_t crc_arg)
59         : length(length_arg), crc(crc_arg) {}
60 
61     size_t length = 0;
62 
63     // TODO(absl-team): Memory stomping often zeros out memory. If this struct
64     // gets overwritten, we could end up with {0, 0}, which is the correct CRC
65     // for a string of length 0. Consider storing a scrambled value and
66     // unscrambling it before verifying it.
67     absl::crc32c_t crc = absl::crc32c_t{0};
68   };
69 
70   // The representation of the chunked CRC32C data.
71   struct Rep {
72     // `removed_prefix` is the crc and length of any prefix that has been
73     // removed from the Cord (for example, by calling
74     // `CrcCord::RemovePrefix()`). To get the checkum of any prefix of the cord,
75     // this value must be subtracted from `prefix_crc`. See `Checksum()` for an
76     // example.
77     //
78     // CrcCordState is said to be "normalized" if removed_prefix.length == 0.
79     PrefixCrc removed_prefix;
80 
81     // A deque of (length, crc) pairs, representing length and crc of a prefix
82     // of the Cord, before removed_prefix has been subtracted. The lengths of
83     // the prefixes are stored in increasing order. If the Cord is not empty,
84     // the last value in deque is the contains the CRC32C of the entire Cord
85     // when removed_prefix is subtracted from it.
86     std::deque<PrefixCrc> prefix_crc;
87   };
88 
89   // Returns a reference to the representation of the chunked CRC32C data.
rep()90   const Rep& rep() const { return refcounted_rep_->rep; }
91 
92   // Returns a mutable reference to the representation of the chunked CRC32C
93   // data. Calling this function will copy the data if another instance also
94   // holds a reference to the data, so it is important to call rep() instead if
95   // the data may not be mutated.
mutable_rep()96   Rep* mutable_rep() {
97     if (refcounted_rep_->count.load(std::memory_order_acquire) != 1) {
98       RefcountedRep* copy = new RefcountedRep;
99       copy->rep = refcounted_rep_->rep;
100       Unref(refcounted_rep_);
101       refcounted_rep_ = copy;
102     }
103     return &refcounted_rep_->rep;
104   }
105 
106   // Returns the CRC32C of the entire Cord.
107   absl::crc32c_t Checksum() const;
108 
109   // Returns true if the chunked CRC32C cached is normalized.
IsNormalized()110   bool IsNormalized() const { return rep().removed_prefix.length == 0; }
111 
112   // Normalizes the chunked CRC32C checksum cache by substracting any removed
113   // prefix from the chunks.
114   void Normalize();
115 
116   // Returns the number of cached chunks.
NumChunks()117   size_t NumChunks() const { return rep().prefix_crc.size(); }
118 
119   // Helper that returns the (length, crc) of the `n`-th cached chunked.
120   PrefixCrc NormalizedPrefixCrcAtNthChunk(size_t n) const;
121 
122   // Poisons all chunks to so that Checksum() will likely be incorrect with high
123   // probability.
124   void Poison();
125 
126  private:
127   struct RefcountedRep {
128     std::atomic<int32_t> count{1};
129     Rep rep;
130   };
131 
132   // Adds a reference to the shared global empty `RefcountedRep`, and returns a
133   // pointer to the `RefcountedRep`. This is an optimization to avoid unneeded
134   // allocations when the allocation is unlikely to ever be used. The returned
135   // pointer can be `Unref()`ed when it is no longer needed.  Since the returned
136   // instance will always have a reference counter greater than 1, attempts to
137   // modify it (by calling `mutable_rep()`) will create a new unshared copy.
138   static RefcountedRep* RefSharedEmptyRep();
139 
Ref(RefcountedRep * r)140   static void Ref(RefcountedRep* r) {
141     assert(r != nullptr);
142     r->count.fetch_add(1, std::memory_order_relaxed);
143   }
144 
Unref(RefcountedRep * r)145   static void Unref(RefcountedRep* r) {
146     assert(r != nullptr);
147     if (r->count.fetch_sub(1, std::memory_order_acq_rel) == 1) {
148       delete r;
149     }
150   }
151 
152   RefcountedRep* refcounted_rep_;
153 };
154 
155 }  // namespace crc_internal
156 ABSL_NAMESPACE_END
157 }  // namespace absl
158 
159 #endif  // ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_
160