1*46dbe239SXin Li #pragma once
2*46dbe239SXin Li #ifndef FXDIV_H
3*46dbe239SXin Li #define FXDIV_H
4*46dbe239SXin Li
5*46dbe239SXin Li #if defined(__cplusplus) && (__cplusplus >= 201103L)
6*46dbe239SXin Li #include <cstddef>
7*46dbe239SXin Li #include <cstdint>
8*46dbe239SXin Li #include <climits>
9*46dbe239SXin Li #elif !defined(__OPENCL_VERSION__)
10*46dbe239SXin Li #include <stddef.h>
11*46dbe239SXin Li #include <stdint.h>
12*46dbe239SXin Li #include <limits.h>
13*46dbe239SXin Li #endif
14*46dbe239SXin Li
15*46dbe239SXin Li #if defined(_MSC_VER)
16*46dbe239SXin Li #include <intrin.h>
17*46dbe239SXin Li #if defined(_M_IX86) || defined(_M_X64)
18*46dbe239SXin Li #include <immintrin.h>
19*46dbe239SXin Li #endif
20*46dbe239SXin Li #endif
21*46dbe239SXin Li
22*46dbe239SXin Li #ifndef FXDIV_USE_INLINE_ASSEMBLY
23*46dbe239SXin Li #define FXDIV_USE_INLINE_ASSEMBLY 0
24*46dbe239SXin Li #endif
25*46dbe239SXin Li
fxdiv_mulext_uint32_t(uint32_t a,uint32_t b)26*46dbe239SXin Li static inline uint64_t fxdiv_mulext_uint32_t(uint32_t a, uint32_t b) {
27*46dbe239SXin Li #if defined(_MSC_VER) && defined(_M_IX86)
28*46dbe239SXin Li return (uint64_t) __emulu((unsigned int) a, (unsigned int) b);
29*46dbe239SXin Li #else
30*46dbe239SXin Li return (uint64_t) a * (uint64_t) b;
31*46dbe239SXin Li #endif
32*46dbe239SXin Li }
33*46dbe239SXin Li
fxdiv_mulhi_uint32_t(uint32_t a,uint32_t b)34*46dbe239SXin Li static inline uint32_t fxdiv_mulhi_uint32_t(uint32_t a, uint32_t b) {
35*46dbe239SXin Li #if defined(__OPENCL_VERSION__)
36*46dbe239SXin Li return mul_hi(a, b);
37*46dbe239SXin Li #elif defined(__CUDA_ARCH__)
38*46dbe239SXin Li return (uint32_t) __umulhi((unsigned int) a, (unsigned int) b);
39*46dbe239SXin Li #elif defined(_MSC_VER) && defined(_M_IX86)
40*46dbe239SXin Li return (uint32_t) (__emulu((unsigned int) a, (unsigned int) b) >> 32);
41*46dbe239SXin Li #elif defined(_MSC_VER) && defined(_M_ARM)
42*46dbe239SXin Li return (uint32_t) _MulUnsignedHigh((unsigned long) a, (unsigned long) b);
43*46dbe239SXin Li #else
44*46dbe239SXin Li return (uint32_t) (((uint64_t) a * (uint64_t) b) >> 32);
45*46dbe239SXin Li #endif
46*46dbe239SXin Li }
47*46dbe239SXin Li
fxdiv_mulhi_uint64_t(uint64_t a,uint64_t b)48*46dbe239SXin Li static inline uint64_t fxdiv_mulhi_uint64_t(uint64_t a, uint64_t b) {
49*46dbe239SXin Li #if defined(__OPENCL_VERSION__)
50*46dbe239SXin Li return mul_hi(a, b);
51*46dbe239SXin Li #elif defined(__CUDA_ARCH__)
52*46dbe239SXin Li return (uint64_t) __umul64hi((unsigned long long) a, (unsigned long long) b);
53*46dbe239SXin Li #elif defined(_MSC_VER) && defined(_M_X64)
54*46dbe239SXin Li return (uint64_t) __umulh((unsigned __int64) a, (unsigned __int64) b);
55*46dbe239SXin Li #elif defined(__GNUC__) && defined(__SIZEOF_INT128__)
56*46dbe239SXin Li return (uint64_t) (((((unsigned __int128) a) * ((unsigned __int128) b))) >> 64);
57*46dbe239SXin Li #else
58*46dbe239SXin Li const uint32_t a_lo = (uint32_t) a;
59*46dbe239SXin Li const uint32_t a_hi = (uint32_t) (a >> 32);
60*46dbe239SXin Li const uint32_t b_lo = (uint32_t) b;
61*46dbe239SXin Li const uint32_t b_hi = (uint32_t) (b >> 32);
62*46dbe239SXin Li
63*46dbe239SXin Li const uint64_t t = fxdiv_mulext_uint32_t(a_hi, b_lo) +
64*46dbe239SXin Li (uint64_t) fxdiv_mulhi_uint32_t(a_lo, b_lo);
65*46dbe239SXin Li return fxdiv_mulext_uint32_t(a_hi, b_hi) + (t >> 32) +
66*46dbe239SXin Li ((fxdiv_mulext_uint32_t(a_lo, b_hi) + (uint64_t) (uint32_t) t) >> 32);
67*46dbe239SXin Li #endif
68*46dbe239SXin Li }
69*46dbe239SXin Li
fxdiv_mulhi_size_t(size_t a,size_t b)70*46dbe239SXin Li static inline size_t fxdiv_mulhi_size_t(size_t a, size_t b) {
71*46dbe239SXin Li #if SIZE_MAX == UINT32_MAX
72*46dbe239SXin Li return (size_t) fxdiv_mulhi_uint32_t((uint32_t) a, (uint32_t) b);
73*46dbe239SXin Li #elif SIZE_MAX == UINT64_MAX
74*46dbe239SXin Li return (size_t) fxdiv_mulhi_uint64_t((uint64_t) a, (uint64_t) b);
75*46dbe239SXin Li #else
76*46dbe239SXin Li #error Unsupported platform
77*46dbe239SXin Li #endif
78*46dbe239SXin Li }
79*46dbe239SXin Li
80*46dbe239SXin Li struct fxdiv_divisor_uint32_t {
81*46dbe239SXin Li uint32_t value;
82*46dbe239SXin Li uint32_t m;
83*46dbe239SXin Li uint8_t s1;
84*46dbe239SXin Li uint8_t s2;
85*46dbe239SXin Li };
86*46dbe239SXin Li
87*46dbe239SXin Li struct fxdiv_result_uint32_t {
88*46dbe239SXin Li uint32_t quotient;
89*46dbe239SXin Li uint32_t remainder;
90*46dbe239SXin Li };
91*46dbe239SXin Li
92*46dbe239SXin Li struct fxdiv_divisor_uint64_t {
93*46dbe239SXin Li uint64_t value;
94*46dbe239SXin Li uint64_t m;
95*46dbe239SXin Li uint8_t s1;
96*46dbe239SXin Li uint8_t s2;
97*46dbe239SXin Li };
98*46dbe239SXin Li
99*46dbe239SXin Li struct fxdiv_result_uint64_t {
100*46dbe239SXin Li uint64_t quotient;
101*46dbe239SXin Li uint64_t remainder;
102*46dbe239SXin Li };
103*46dbe239SXin Li
104*46dbe239SXin Li struct fxdiv_divisor_size_t {
105*46dbe239SXin Li size_t value;
106*46dbe239SXin Li size_t m;
107*46dbe239SXin Li uint8_t s1;
108*46dbe239SXin Li uint8_t s2;
109*46dbe239SXin Li };
110*46dbe239SXin Li
111*46dbe239SXin Li struct fxdiv_result_size_t {
112*46dbe239SXin Li size_t quotient;
113*46dbe239SXin Li size_t remainder;
114*46dbe239SXin Li };
115*46dbe239SXin Li
fxdiv_init_uint32_t(uint32_t d)116*46dbe239SXin Li static inline struct fxdiv_divisor_uint32_t fxdiv_init_uint32_t(uint32_t d) {
117*46dbe239SXin Li struct fxdiv_divisor_uint32_t result = { d };
118*46dbe239SXin Li if (d == 1) {
119*46dbe239SXin Li result.m = UINT32_C(1);
120*46dbe239SXin Li result.s1 = 0;
121*46dbe239SXin Li result.s2 = 0;
122*46dbe239SXin Li } else {
123*46dbe239SXin Li #if defined(__OPENCL_VERSION__)
124*46dbe239SXin Li const uint32_t l_minus_1 = 31 - clz(d - 1);
125*46dbe239SXin Li #elif defined(__CUDA_ARCH__)
126*46dbe239SXin Li const uint32_t l_minus_1 = 31 - __clz((int) (d - 1));
127*46dbe239SXin Li #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64))
128*46dbe239SXin Li unsigned long l_minus_1;
129*46dbe239SXin Li _BitScanReverse(&l_minus_1, (unsigned long) (d - 1));
130*46dbe239SXin Li #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && FXDIV_USE_INLINE_ASSEMBLY
131*46dbe239SXin Li uint32_t l_minus_1;
132*46dbe239SXin Li __asm__("BSRL %[d_minus_1], %[l_minus_1]"
133*46dbe239SXin Li : [l_minus_1] "=r" (l_minus_1)
134*46dbe239SXin Li : [d_minus_1] "r" (d - 1)
135*46dbe239SXin Li : "cc");
136*46dbe239SXin Li #elif defined(__GNUC__)
137*46dbe239SXin Li const uint32_t l_minus_1 = 31 - __builtin_clz(d - 1);
138*46dbe239SXin Li #else
139*46dbe239SXin Li /* Based on Algorithm 2 from Hacker's delight */
140*46dbe239SXin Li
141*46dbe239SXin Li uint32_t l_minus_1 = 0;
142*46dbe239SXin Li uint32_t x = d - 1;
143*46dbe239SXin Li uint32_t y = x >> 16;
144*46dbe239SXin Li if (y != 0) {
145*46dbe239SXin Li l_minus_1 += 16;
146*46dbe239SXin Li x = y;
147*46dbe239SXin Li }
148*46dbe239SXin Li y = x >> 8;
149*46dbe239SXin Li if (y != 0) {
150*46dbe239SXin Li l_minus_1 += 8;
151*46dbe239SXin Li x = y;
152*46dbe239SXin Li }
153*46dbe239SXin Li y = x >> 4;
154*46dbe239SXin Li if (y != 0) {
155*46dbe239SXin Li l_minus_1 += 4;
156*46dbe239SXin Li x = y;
157*46dbe239SXin Li }
158*46dbe239SXin Li y = x >> 2;
159*46dbe239SXin Li if (y != 0) {
160*46dbe239SXin Li l_minus_1 += 2;
161*46dbe239SXin Li x = y;
162*46dbe239SXin Li }
163*46dbe239SXin Li if ((x & 2) != 0) {
164*46dbe239SXin Li l_minus_1 += 1;
165*46dbe239SXin Li }
166*46dbe239SXin Li #endif
167*46dbe239SXin Li uint32_t u_hi = (UINT32_C(2) << (uint32_t) l_minus_1) - d;
168*46dbe239SXin Li
169*46dbe239SXin Li /* Division of 64-bit number u_hi:UINT32_C(0) by 32-bit number d, 32-bit quotient output q */
170*46dbe239SXin Li #if defined(__GNUC__) && defined(__i386__) && FXDIV_USE_INLINE_ASSEMBLY
171*46dbe239SXin Li uint32_t q;
172*46dbe239SXin Li __asm__("DIVL %[d]"
173*46dbe239SXin Li : "=a" (q), "+d" (u_hi)
174*46dbe239SXin Li : [d] "r" (d), "a" (0)
175*46dbe239SXin Li : "cc");
176*46dbe239SXin Li #elif (defined(_MSC_VER) && _MSC_VER >= 1920) && !defined(__clang__) && !defined(__INTEL_COMPILER) && (defined(_M_IX86) || defined(_M_X64))
177*46dbe239SXin Li unsigned int remainder;
178*46dbe239SXin Li const uint32_t q = (uint32_t) _udiv64((unsigned __int64) ((uint64_t) u_hi << 32), (unsigned int) d, &remainder);
179*46dbe239SXin Li #else
180*46dbe239SXin Li const uint32_t q = ((uint64_t) u_hi << 32) / d;
181*46dbe239SXin Li #endif
182*46dbe239SXin Li
183*46dbe239SXin Li result.m = q + UINT32_C(1);
184*46dbe239SXin Li result.s1 = 1;
185*46dbe239SXin Li result.s2 = (uint8_t) l_minus_1;
186*46dbe239SXin Li }
187*46dbe239SXin Li return result;
188*46dbe239SXin Li }
189*46dbe239SXin Li
fxdiv_init_uint64_t(uint64_t d)190*46dbe239SXin Li static inline struct fxdiv_divisor_uint64_t fxdiv_init_uint64_t(uint64_t d) {
191*46dbe239SXin Li struct fxdiv_divisor_uint64_t result = { d };
192*46dbe239SXin Li if (d == 1) {
193*46dbe239SXin Li result.m = UINT64_C(1);
194*46dbe239SXin Li result.s1 = 0;
195*46dbe239SXin Li result.s2 = 0;
196*46dbe239SXin Li } else {
197*46dbe239SXin Li #if defined(__OPENCL_VERSION__)
198*46dbe239SXin Li const uint32_t nlz_d = clz(d);
199*46dbe239SXin Li const uint32_t l_minus_1 = 63 - clz(d - 1);
200*46dbe239SXin Li #elif defined(__CUDA_ARCH__)
201*46dbe239SXin Li const uint32_t nlz_d = __clzll((long long) d);
202*46dbe239SXin Li const uint32_t l_minus_1 = 63 - __clzll((long long) (d - 1));
203*46dbe239SXin Li #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
204*46dbe239SXin Li unsigned long l_minus_1;
205*46dbe239SXin Li _BitScanReverse64(&l_minus_1, (unsigned __int64) (d - 1));
206*46dbe239SXin Li unsigned long bsr_d;
207*46dbe239SXin Li _BitScanReverse64(&bsr_d, (unsigned __int64) d);
208*46dbe239SXin Li const uint32_t nlz_d = bsr_d ^ 0x3F;
209*46dbe239SXin Li #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_ARM))
210*46dbe239SXin Li const uint64_t d_minus_1 = d - 1;
211*46dbe239SXin Li const uint8_t d_is_power_of_2 = (d & d_minus_1) == 0;
212*46dbe239SXin Li unsigned long l_minus_1;
213*46dbe239SXin Li if ((uint32_t) (d_minus_1 >> 32) == 0) {
214*46dbe239SXin Li _BitScanReverse(&l_minus_1, (unsigned long) d_minus_1);
215*46dbe239SXin Li } else {
216*46dbe239SXin Li _BitScanReverse(&l_minus_1, (unsigned long) (uint32_t) (d_minus_1 >> 32));
217*46dbe239SXin Li l_minus_1 += 32;
218*46dbe239SXin Li }
219*46dbe239SXin Li const uint32_t nlz_d = ((uint8_t) l_minus_1 ^ UINT8_C(0x3F)) - d_is_power_of_2;
220*46dbe239SXin Li #elif defined(__GNUC__) && defined(__x86_64__) && FXDIV_USE_INLINE_ASSEMBLY
221*46dbe239SXin Li uint64_t l_minus_1;
222*46dbe239SXin Li __asm__("BSRQ %[d_minus_1], %[l_minus_1]"
223*46dbe239SXin Li : [l_minus_1] "=r" (l_minus_1)
224*46dbe239SXin Li : [d_minus_1] "r" (d - 1)
225*46dbe239SXin Li : "cc");
226*46dbe239SXin Li #elif defined(__GNUC__)
227*46dbe239SXin Li const uint32_t l_minus_1 = 63 - __builtin_clzll(d - 1);
228*46dbe239SXin Li const uint32_t nlz_d = __builtin_clzll(d);
229*46dbe239SXin Li #else
230*46dbe239SXin Li /* Based on Algorithm 2 from Hacker's delight */
231*46dbe239SXin Li const uint64_t d_minus_1 = d - 1;
232*46dbe239SXin Li const uint32_t d_is_power_of_2 = (d & d_minus_1) == 0;
233*46dbe239SXin Li uint32_t l_minus_1 = 0;
234*46dbe239SXin Li uint32_t x = (uint32_t) d_minus_1;
235*46dbe239SXin Li uint32_t y = d_minus_1 >> 32;
236*46dbe239SXin Li if (y != 0) {
237*46dbe239SXin Li l_minus_1 += 32;
238*46dbe239SXin Li x = y;
239*46dbe239SXin Li }
240*46dbe239SXin Li y = x >> 16;
241*46dbe239SXin Li if (y != 0) {
242*46dbe239SXin Li l_minus_1 += 16;
243*46dbe239SXin Li x = y;
244*46dbe239SXin Li }
245*46dbe239SXin Li y = x >> 8;
246*46dbe239SXin Li if (y != 0) {
247*46dbe239SXin Li l_minus_1 += 8;
248*46dbe239SXin Li x = y;
249*46dbe239SXin Li }
250*46dbe239SXin Li y = x >> 4;
251*46dbe239SXin Li if (y != 0) {
252*46dbe239SXin Li l_minus_1 += 4;
253*46dbe239SXin Li x = y;
254*46dbe239SXin Li }
255*46dbe239SXin Li y = x >> 2;
256*46dbe239SXin Li if (y != 0) {
257*46dbe239SXin Li l_minus_1 += 2;
258*46dbe239SXin Li x = y;
259*46dbe239SXin Li }
260*46dbe239SXin Li if ((x & 2) != 0) {
261*46dbe239SXin Li l_minus_1 += 1;
262*46dbe239SXin Li }
263*46dbe239SXin Li const uint32_t nlz_d = (l_minus_1 ^ UINT32_C(0x3F)) - d_is_power_of_2;
264*46dbe239SXin Li #endif
265*46dbe239SXin Li uint64_t u_hi = (UINT64_C(2) << (uint32_t) l_minus_1) - d;
266*46dbe239SXin Li
267*46dbe239SXin Li /* Division of 128-bit number u_hi:UINT64_C(0) by 64-bit number d, 64-bit quotient output q */
268*46dbe239SXin Li #if defined(__GNUC__) && defined(__x86_64__) && FXDIV_USE_INLINE_ASSEMBLY
269*46dbe239SXin Li uint64_t q;
270*46dbe239SXin Li __asm__("DIVQ %[d]"
271*46dbe239SXin Li : "=a" (q), "+d" (u_hi)
272*46dbe239SXin Li : [d] "r" (d), "a" (UINT64_C(0))
273*46dbe239SXin Li : "cc");
274*46dbe239SXin Li #elif 0 && defined(__GNUC__) && defined(__SIZEOF_INT128__)
275*46dbe239SXin Li /* GCC, Clang, and Intel Compiler fail to inline optimized implementation and call into support library for 128-bit division */
276*46dbe239SXin Li const uint64_t q = (uint64_t) (((unsigned __int128) u_hi << 64) / ((unsigned __int128) d));
277*46dbe239SXin Li #elif (defined(_MSC_VER) && _MSC_VER >= 1920) && !defined(__clang__) && !defined(__INTEL_COMPILER) && defined(_M_X64)
278*46dbe239SXin Li unsigned __int64 remainder;
279*46dbe239SXin Li const uint64_t q = (uint64_t) _udiv128((unsigned __int64) u_hi, 0, (unsigned __int64) d, &remainder);
280*46dbe239SXin Li #else
281*46dbe239SXin Li /* Implementation based on code from Hacker's delight */
282*46dbe239SXin Li
283*46dbe239SXin Li /* Normalize divisor and shift divident left */
284*46dbe239SXin Li d <<= nlz_d;
285*46dbe239SXin Li u_hi <<= nlz_d;
286*46dbe239SXin Li /* Break divisor up into two 32-bit digits */
287*46dbe239SXin Li const uint64_t d_hi = (uint32_t) (d >> 32);
288*46dbe239SXin Li const uint32_t d_lo = (uint32_t) d;
289*46dbe239SXin Li
290*46dbe239SXin Li /* Compute the first quotient digit, q1 */
291*46dbe239SXin Li uint64_t q1 = u_hi / d_hi;
292*46dbe239SXin Li uint64_t r1 = u_hi - q1 * d_hi;
293*46dbe239SXin Li
294*46dbe239SXin Li while ((q1 >> 32) != 0 || fxdiv_mulext_uint32_t((uint32_t) q1, d_lo) > (r1 << 32)) {
295*46dbe239SXin Li q1 -= 1;
296*46dbe239SXin Li r1 += d_hi;
297*46dbe239SXin Li if ((r1 >> 32) != 0) {
298*46dbe239SXin Li break;
299*46dbe239SXin Li }
300*46dbe239SXin Li }
301*46dbe239SXin Li
302*46dbe239SXin Li /* Multiply and subtract. */
303*46dbe239SXin Li u_hi = (u_hi << 32) - q1 * d;
304*46dbe239SXin Li
305*46dbe239SXin Li /* Compute the second quotient digit, q0 */
306*46dbe239SXin Li uint64_t q0 = u_hi / d_hi;
307*46dbe239SXin Li uint64_t r0 = u_hi - q0 * d_hi;
308*46dbe239SXin Li
309*46dbe239SXin Li while ((q0 >> 32) != 0 || fxdiv_mulext_uint32_t((uint32_t) q0, d_lo) > (r0 << 32)) {
310*46dbe239SXin Li q0 -= 1;
311*46dbe239SXin Li r0 += d_hi;
312*46dbe239SXin Li if ((r0 >> 32) != 0) {
313*46dbe239SXin Li break;
314*46dbe239SXin Li }
315*46dbe239SXin Li }
316*46dbe239SXin Li const uint64_t q = (q1 << 32) | (uint32_t) q0;
317*46dbe239SXin Li #endif
318*46dbe239SXin Li result.m = q + UINT64_C(1);
319*46dbe239SXin Li result.s1 = 1;
320*46dbe239SXin Li result.s2 = (uint8_t) l_minus_1;
321*46dbe239SXin Li }
322*46dbe239SXin Li return result;
323*46dbe239SXin Li }
324*46dbe239SXin Li
fxdiv_init_size_t(size_t d)325*46dbe239SXin Li static inline struct fxdiv_divisor_size_t fxdiv_init_size_t(size_t d) {
326*46dbe239SXin Li #if SIZE_MAX == UINT32_MAX
327*46dbe239SXin Li const struct fxdiv_divisor_uint32_t uint_result = fxdiv_init_uint32_t((uint32_t) d);
328*46dbe239SXin Li #elif SIZE_MAX == UINT64_MAX
329*46dbe239SXin Li const struct fxdiv_divisor_uint64_t uint_result = fxdiv_init_uint64_t((uint64_t) d);
330*46dbe239SXin Li #else
331*46dbe239SXin Li #error Unsupported platform
332*46dbe239SXin Li #endif
333*46dbe239SXin Li struct fxdiv_divisor_size_t size_result = {
334*46dbe239SXin Li (size_t) uint_result.value,
335*46dbe239SXin Li (size_t) uint_result.m,
336*46dbe239SXin Li uint_result.s1,
337*46dbe239SXin Li uint_result.s2
338*46dbe239SXin Li };
339*46dbe239SXin Li return size_result;
340*46dbe239SXin Li }
341*46dbe239SXin Li
fxdiv_quotient_uint32_t(uint32_t n,const struct fxdiv_divisor_uint32_t divisor)342*46dbe239SXin Li static inline uint32_t fxdiv_quotient_uint32_t(uint32_t n, const struct fxdiv_divisor_uint32_t divisor) {
343*46dbe239SXin Li const uint32_t t = fxdiv_mulhi_uint32_t(n, divisor.m);
344*46dbe239SXin Li return (t + ((n - t) >> divisor.s1)) >> divisor.s2;
345*46dbe239SXin Li }
346*46dbe239SXin Li
fxdiv_quotient_uint64_t(uint64_t n,const struct fxdiv_divisor_uint64_t divisor)347*46dbe239SXin Li static inline uint64_t fxdiv_quotient_uint64_t(uint64_t n, const struct fxdiv_divisor_uint64_t divisor) {
348*46dbe239SXin Li const uint64_t t = fxdiv_mulhi_uint64_t(n, divisor.m);
349*46dbe239SXin Li return (t + ((n - t) >> divisor.s1)) >> divisor.s2;
350*46dbe239SXin Li }
351*46dbe239SXin Li
fxdiv_quotient_size_t(size_t n,const struct fxdiv_divisor_size_t divisor)352*46dbe239SXin Li static inline size_t fxdiv_quotient_size_t(size_t n, const struct fxdiv_divisor_size_t divisor) {
353*46dbe239SXin Li #if SIZE_MAX == UINT32_MAX
354*46dbe239SXin Li const struct fxdiv_divisor_uint32_t uint32_divisor = {
355*46dbe239SXin Li (uint32_t) divisor.value,
356*46dbe239SXin Li (uint32_t) divisor.m,
357*46dbe239SXin Li divisor.s1,
358*46dbe239SXin Li divisor.s2
359*46dbe239SXin Li };
360*46dbe239SXin Li return fxdiv_quotient_uint32_t((uint32_t) n, uint32_divisor);
361*46dbe239SXin Li #elif SIZE_MAX == UINT64_MAX
362*46dbe239SXin Li const struct fxdiv_divisor_uint64_t uint64_divisor = {
363*46dbe239SXin Li (uint64_t) divisor.value,
364*46dbe239SXin Li (uint64_t) divisor.m,
365*46dbe239SXin Li divisor.s1,
366*46dbe239SXin Li divisor.s2
367*46dbe239SXin Li };
368*46dbe239SXin Li return fxdiv_quotient_uint64_t((uint64_t) n, uint64_divisor);
369*46dbe239SXin Li #else
370*46dbe239SXin Li #error Unsupported platform
371*46dbe239SXin Li #endif
372*46dbe239SXin Li }
373*46dbe239SXin Li
fxdiv_remainder_uint32_t(uint32_t n,const struct fxdiv_divisor_uint32_t divisor)374*46dbe239SXin Li static inline uint32_t fxdiv_remainder_uint32_t(uint32_t n, const struct fxdiv_divisor_uint32_t divisor) {
375*46dbe239SXin Li const uint32_t quotient = fxdiv_quotient_uint32_t(n, divisor);
376*46dbe239SXin Li return n - quotient * divisor.value;
377*46dbe239SXin Li }
378*46dbe239SXin Li
fxdiv_remainder_uint64_t(uint64_t n,const struct fxdiv_divisor_uint64_t divisor)379*46dbe239SXin Li static inline uint64_t fxdiv_remainder_uint64_t(uint64_t n, const struct fxdiv_divisor_uint64_t divisor) {
380*46dbe239SXin Li const uint64_t quotient = fxdiv_quotient_uint64_t(n, divisor);
381*46dbe239SXin Li return n - quotient * divisor.value;
382*46dbe239SXin Li }
383*46dbe239SXin Li
fxdiv_remainder_size_t(size_t n,const struct fxdiv_divisor_size_t divisor)384*46dbe239SXin Li static inline size_t fxdiv_remainder_size_t(size_t n, const struct fxdiv_divisor_size_t divisor) {
385*46dbe239SXin Li const size_t quotient = fxdiv_quotient_size_t(n, divisor);
386*46dbe239SXin Li return n - quotient * divisor.value;
387*46dbe239SXin Li }
388*46dbe239SXin Li
fxdiv_round_down_uint32_t(uint32_t n,const struct fxdiv_divisor_uint32_t granularity)389*46dbe239SXin Li static inline uint32_t fxdiv_round_down_uint32_t(uint32_t n, const struct fxdiv_divisor_uint32_t granularity) {
390*46dbe239SXin Li const uint32_t quotient = fxdiv_quotient_uint32_t(n, granularity);
391*46dbe239SXin Li return quotient * granularity.value;
392*46dbe239SXin Li }
393*46dbe239SXin Li
fxdiv_round_down_uint64_t(uint64_t n,const struct fxdiv_divisor_uint64_t granularity)394*46dbe239SXin Li static inline uint64_t fxdiv_round_down_uint64_t(uint64_t n, const struct fxdiv_divisor_uint64_t granularity) {
395*46dbe239SXin Li const uint64_t quotient = fxdiv_quotient_uint64_t(n, granularity);
396*46dbe239SXin Li return quotient * granularity.value;
397*46dbe239SXin Li }
398*46dbe239SXin Li
fxdiv_round_down_size_t(size_t n,const struct fxdiv_divisor_size_t granularity)399*46dbe239SXin Li static inline size_t fxdiv_round_down_size_t(size_t n, const struct fxdiv_divisor_size_t granularity) {
400*46dbe239SXin Li const size_t quotient = fxdiv_quotient_size_t(n, granularity);
401*46dbe239SXin Li return quotient * granularity.value;
402*46dbe239SXin Li }
403*46dbe239SXin Li
fxdiv_divide_uint32_t(uint32_t n,const struct fxdiv_divisor_uint32_t divisor)404*46dbe239SXin Li static inline struct fxdiv_result_uint32_t fxdiv_divide_uint32_t(uint32_t n, const struct fxdiv_divisor_uint32_t divisor) {
405*46dbe239SXin Li const uint32_t quotient = fxdiv_quotient_uint32_t(n, divisor);
406*46dbe239SXin Li const uint32_t remainder = n - quotient * divisor.value;
407*46dbe239SXin Li struct fxdiv_result_uint32_t result = { quotient, remainder };
408*46dbe239SXin Li return result;
409*46dbe239SXin Li }
410*46dbe239SXin Li
fxdiv_divide_uint64_t(uint64_t n,const struct fxdiv_divisor_uint64_t divisor)411*46dbe239SXin Li static inline struct fxdiv_result_uint64_t fxdiv_divide_uint64_t(uint64_t n, const struct fxdiv_divisor_uint64_t divisor) {
412*46dbe239SXin Li const uint64_t quotient = fxdiv_quotient_uint64_t(n, divisor);
413*46dbe239SXin Li const uint64_t remainder = n - quotient * divisor.value;
414*46dbe239SXin Li struct fxdiv_result_uint64_t result = { quotient, remainder };
415*46dbe239SXin Li return result;
416*46dbe239SXin Li }
417*46dbe239SXin Li
fxdiv_divide_size_t(size_t n,const struct fxdiv_divisor_size_t divisor)418*46dbe239SXin Li static inline struct fxdiv_result_size_t fxdiv_divide_size_t(size_t n, const struct fxdiv_divisor_size_t divisor) {
419*46dbe239SXin Li const size_t quotient = fxdiv_quotient_size_t(n, divisor);
420*46dbe239SXin Li const size_t remainder = n - quotient * divisor.value;
421*46dbe239SXin Li struct fxdiv_result_size_t result = { quotient, remainder };
422*46dbe239SXin Li return result;
423*46dbe239SXin Li }
424*46dbe239SXin Li
425*46dbe239SXin Li #endif /* FXDIV_H */
426