1 // Copyright 2022 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ABSL_CRC_INTERNAL_CRC_INTERNAL_H_
16 #define ABSL_CRC_INTERNAL_CRC_INTERNAL_H_
17
18 #include <cstdint>
19 #include <memory>
20 #include <vector>
21
22 #include "absl/base/internal/raw_logging.h"
23 #include "absl/crc/internal/crc.h"
24
25 namespace absl {
26 ABSL_NAMESPACE_BEGIN
27
28 namespace crc_internal {
29
30 // Prefetch constants used in some Extend() implementations
31 constexpr int kPrefetchHorizon = ABSL_CACHELINE_SIZE * 4; // Prefetch this far
32 // Shorter prefetch distance for smaller buffers
33 constexpr int kPrefetchHorizonMedium = ABSL_CACHELINE_SIZE * 1;
34 static_assert(kPrefetchHorizon >= 64, "CRCPrefetchHorizon less than loop len");
35
36 // We require the Scramble() function:
37 // - to be reversible (Unscramble() must exist)
38 // - to be non-linear in the polynomial's Galois field (so the CRC of a
39 // scrambled CRC is not linearly affected by the scrambled CRC, even if
40 // using the same polynomial)
41 // - not to be its own inverse. Preferably, if X=Scramble^N(X) and N!=0, then
42 // N is large.
43 // - to be fast.
44 // - not to change once defined.
45 // We introduce non-linearity in two ways:
46 // Addition of a constant.
47 // - The carries introduce non-linearity; we use bits of an irrational
48 // (phi) to make it unlikely that we introduce no carries.
49 // Rotate by a constant number of bits.
50 // - We use floor(degree/2)+1, which does not divide the degree, and
51 // splits the bits nearly evenly, which makes it less likely the
52 // halves will be the same or one will be all zeroes.
53 // We do both things to improve the chances of non-linearity in the face of
54 // bit patterns with low numbers of bits set, while still being fast.
55 // Below is the constant that we add. The bits are the first 128 bits of the
56 // fractional part of phi, with a 1 ored into the bottom bit to maximize the
57 // cycle length of repeated adds.
58 constexpr uint64_t kScrambleHi = (static_cast<uint64_t>(0x4f1bbcdcU) << 32) |
59 static_cast<uint64_t>(0xbfa53e0aU);
60 constexpr uint64_t kScrambleLo = (static_cast<uint64_t>(0xf9ce6030U) << 32) |
61 static_cast<uint64_t>(0x2e76e41bU);
62
63 class CRCImpl : public CRC { // Implemention of the abstract class CRC
64 public:
65 using Uint32By256 = uint32_t[256];
66
CRCImpl()67 CRCImpl() {}
68 ~CRCImpl() override = default;
69
70 // The internal version of CRC::New().
71 static CRCImpl* NewInternal();
72
73 void Empty(uint32_t* crc) const override;
74
75 // Fill in a table for updating a CRC by one word of 'word_size' bytes
76 // [last_lo, last_hi] contains the answer if the last bit in the word
77 // is set.
78 static void FillWordTable(uint32_t poly, uint32_t last, int word_size,
79 Uint32By256* t);
80
81 // Build the table for extending by zeroes, returning the number of entries.
82 // For a in {1, 2, ..., ZEROES_BASE-1}, b in {0, 1, 2, 3, ...},
83 // entry j=a-1+(ZEROES_BASE-1)*b
84 // contains a polynomial Pi such that multiplying
85 // a CRC by Pi mod P, where P is the CRC polynomial, is equivalent to
86 // appending a*2**(ZEROES_BASE_LG*b) zero bytes to the original string.
87 static int FillZeroesTable(uint32_t poly, Uint32By256* t);
88
89 virtual void InitTables() = 0;
90
91 private:
92 CRCImpl(const CRCImpl&) = delete;
93 CRCImpl& operator=(const CRCImpl&) = delete;
94 };
95
96 // This is the 32-bit implementation. It handles all sizes from 8 to 32.
97 class CRC32 : public CRCImpl {
98 public:
CRC32()99 CRC32() {}
~CRC32()100 ~CRC32() override {}
101
102 void Extend(uint32_t* crc, const void* bytes, size_t length) const override;
103 void ExtendByZeroes(uint32_t* crc, size_t length) const override;
104 void Scramble(uint32_t* crc) const override;
105 void Unscramble(uint32_t* crc) const override;
106 void UnextendByZeroes(uint32_t* crc, size_t length) const override;
107
108 void InitTables() override;
109
110 private:
111 // Common implementation guts for ExtendByZeroes and UnextendByZeroes().
112 //
113 // zeroes_table is a table as returned by FillZeroesTable(), containing
114 // polynomials representing CRCs of strings-of-zeros of various lenghts,
115 // and which can be combined by polynomial multiplication. poly_table is
116 // a table of CRC byte extension values. These tables are determined by
117 // the generator polynomial.
118 //
119 // These will be set to reverse_zeroes_ and reverse_table0_ for Unextend, and
120 // CRC32::zeroes_ and CRC32::table0_ for Extend.
121 void ExtendByZeroesImpl(uint32_t* crc, size_t length,
122 const uint32_t zeroes_table[256],
123 const uint32_t poly_table[256]) const;
124
125 uint32_t table0_[256]; // table of byte extensions
126 uint32_t zeroes_[256]; // table of zero extensions
127
128 // table of 4-byte extensions shifted by 12 bytes of zeroes
129 uint32_t table_[4][256];
130
131 // Reverse lookup tables, using the alternate polynomial used by
132 // UnextendByZeroes().
133 uint32_t reverse_table0_[256]; // table of reverse byte extensions
134 uint32_t reverse_zeroes_[256]; // table of reverse zero extensions
135
136 CRC32(const CRC32&) = delete;
137 CRC32& operator=(const CRC32&) = delete;
138 };
139
140 // Helpers
141
142 // Return a bit mask containing len 1-bits.
143 // Requires 0 < len <= sizeof(T)
144 template <typename T>
MaskOfLength(int len)145 T MaskOfLength(int len) {
146 // shift 2 by len-1 rather than 1 by len because shifts of wordsize
147 // are undefined.
148 return (T(2) << (len - 1)) - 1;
149 }
150
151 // Rotate low-order "width" bits of "in" right by "r" bits,
152 // setting other bits in word to arbitrary values.
153 template <typename T>
RotateRight(T in,int width,int r)154 T RotateRight(T in, int width, int r) {
155 return (in << (width - r)) | ((in >> r) & MaskOfLength<T>(width - r));
156 }
157
158 // RoundUp<N>(p) returns the lowest address >= p aligned to an N-byte
159 // boundary. Requires that N is a power of 2.
160 template <int alignment>
RoundUp(const uint8_t * p)161 const uint8_t* RoundUp(const uint8_t* p) {
162 static_assert((alignment & (alignment - 1)) == 0, "alignment is not 2^n");
163 constexpr uintptr_t mask = alignment - 1;
164 const uintptr_t as_uintptr = reinterpret_cast<uintptr_t>(p);
165 return reinterpret_cast<const uint8_t*>((as_uintptr + mask) & ~mask);
166 }
167
168 // Return a newly created CRC32AcceleratedX86ARMCombined if we can use Intel's
169 // or ARM's CRC acceleration for a given polynomial. Return nullptr otherwise.
170 CRCImpl* TryNewCRC32AcceleratedX86ARMCombined();
171
172 // Return all possible hardware accelerated implementations. For testing only.
173 std::vector<std::unique_ptr<CRCImpl>> NewCRC32AcceleratedX86ARMCombinedAll();
174
175 } // namespace crc_internal
176 ABSL_NAMESPACE_END
177 } // namespace absl
178
179 #endif // ABSL_CRC_INTERNAL_CRC_INTERNAL_H_
180