1 /* Math module -- standard C math library functions, pi and e */
2
3 /* Here are some comments from Tim Peters, extracted from the
4 discussion attached to http://bugs.python.org/issue1640. They
5 describe the general aims of the math module with respect to
6 special values, IEEE-754 floating-point exceptions, and Python
7 exceptions.
8
9 These are the "spirit of 754" rules:
10
11 1. If the mathematical result is a real number, but of magnitude too
12 large to approximate by a machine float, overflow is signaled and the
13 result is an infinity (with the appropriate sign).
14
15 2. If the mathematical result is a real number, but of magnitude too
16 small to approximate by a machine float, underflow is signaled and the
17 result is a zero (with the appropriate sign).
18
19 3. At a singularity (a value x such that the limit of f(y) as y
20 approaches x exists and is an infinity), "divide by zero" is signaled
21 and the result is an infinity (with the appropriate sign). This is
22 complicated a little by that the left-side and right-side limits may
23 not be the same; e.g., 1/x approaches +inf or -inf as x approaches 0
24 from the positive or negative directions. In that specific case, the
25 sign of the zero determines the result of 1/0.
26
27 4. At a point where a function has no defined result in the extended
28 reals (i.e., the reals plus an infinity or two), invalid operation is
29 signaled and a NaN is returned.
30
31 And these are what Python has historically /tried/ to do (but not
32 always successfully, as platform libm behavior varies a lot):
33
34 For #1, raise OverflowError.
35
36 For #2, return a zero (with the appropriate sign if that happens by
37 accident ;-)).
38
39 For #3 and #4, raise ValueError. It may have made sense to raise
40 Python's ZeroDivisionError in #3, but historically that's only been
41 raised for division by zero and mod by zero.
42
43 */
44
45 /*
46 In general, on an IEEE-754 platform the aim is to follow the C99
47 standard, including Annex 'F', whenever possible. Where the
48 standard recommends raising the 'divide-by-zero' or 'invalid'
49 floating-point exceptions, Python should raise a ValueError. Where
50 the standard recommends raising 'overflow', Python should raise an
51 OverflowError. In all other circumstances a value should be
52 returned.
53 */
54
55 #ifndef Py_BUILD_CORE_BUILTIN
56 # define Py_BUILD_CORE_MODULE 1
57 #endif
58 #define NEEDS_PY_IDENTIFIER
59
60 #include "Python.h"
61 #include "pycore_bitutils.h" // _Py_bit_length()
62 #include "pycore_call.h" // _PyObject_CallNoArgs()
63 #include "pycore_dtoa.h" // _Py_dg_infinity()
64 #include "pycore_long.h" // _PyLong_GetZero()
65 #include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR
66 /* For DBL_EPSILON in _math.h */
67 #include <float.h>
68 /* For _Py_log1p with workarounds for buggy handling of zeros. */
69 #include "_math.h"
70
71 #include "clinic/mathmodule.c.h"
72
73 /*[clinic input]
74 module math
75 [clinic start generated code]*/
76 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=76bc7002685dd942]*/
77
78
79 /*
80 sin(pi*x), giving accurate results for all finite x (especially x
81 integral or close to an integer). This is here for use in the
82 reflection formula for the gamma function. It conforms to IEEE
83 754-2008 for finite arguments, but not for infinities or nans.
84 */
85
86 static const double pi = 3.141592653589793238462643383279502884197;
87 static const double logpi = 1.144729885849400174143427351353058711647;
88 #if !defined(HAVE_ERF) || !defined(HAVE_ERFC)
89 static const double sqrtpi = 1.772453850905516027298167483341145182798;
90 #endif /* !defined(HAVE_ERF) || !defined(HAVE_ERFC) */
91
92
93 /* Version of PyFloat_AsDouble() with in-line fast paths
94 for exact floats and integers. Gives a substantial
95 speed improvement for extracting float arguments.
96 */
97
98 #define ASSIGN_DOUBLE(target_var, obj, error_label) \
99 if (PyFloat_CheckExact(obj)) { \
100 target_var = PyFloat_AS_DOUBLE(obj); \
101 } \
102 else if (PyLong_CheckExact(obj)) { \
103 target_var = PyLong_AsDouble(obj); \
104 if (target_var == -1.0 && PyErr_Occurred()) { \
105 goto error_label; \
106 } \
107 } \
108 else { \
109 target_var = PyFloat_AsDouble(obj); \
110 if (target_var == -1.0 && PyErr_Occurred()) { \
111 goto error_label; \
112 } \
113 }
114
115 static double
m_sinpi(double x)116 m_sinpi(double x)
117 {
118 double y, r;
119 int n;
120 /* this function should only ever be called for finite arguments */
121 assert(Py_IS_FINITE(x));
122 y = fmod(fabs(x), 2.0);
123 n = (int)round(2.0*y);
124 assert(0 <= n && n <= 4);
125 switch (n) {
126 case 0:
127 r = sin(pi*y);
128 break;
129 case 1:
130 r = cos(pi*(y-0.5));
131 break;
132 case 2:
133 /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
134 -0.0 instead of 0.0 when y == 1.0. */
135 r = sin(pi*(1.0-y));
136 break;
137 case 3:
138 r = -cos(pi*(y-1.5));
139 break;
140 case 4:
141 r = sin(pi*(y-2.0));
142 break;
143 default:
144 Py_UNREACHABLE();
145 }
146 return copysign(1.0, x)*r;
147 }
148
149 /* Implementation of the real gamma function. In extensive but non-exhaustive
150 random tests, this function proved accurate to within <= 10 ulps across the
151 entire float domain. Note that accuracy may depend on the quality of the
152 system math functions, the pow function in particular. Special cases
153 follow C99 annex F. The parameters and method are tailored to platforms
154 whose double format is the IEEE 754 binary64 format.
155
156 Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
157 and g=6.024680040776729583740234375; these parameters are amongst those
158 used by the Boost library. Following Boost (again), we re-express the
159 Lanczos sum as a rational function, and compute it that way. The
160 coefficients below were computed independently using MPFR, and have been
161 double-checked against the coefficients in the Boost source code.
162
163 For x < 0.0 we use the reflection formula.
164
165 There's one minor tweak that deserves explanation: Lanczos' formula for
166 Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5). For many x
167 values, x+g-0.5 can be represented exactly. However, in cases where it
168 can't be represented exactly the small error in x+g-0.5 can be magnified
169 significantly by the pow and exp calls, especially for large x. A cheap
170 correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
171 involved in the computation of x+g-0.5 (that is, e = computed value of
172 x+g-0.5 - exact value of x+g-0.5). Here's the proof:
173
174 Correction factor
175 -----------------
176 Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
177 double, and e is tiny. Then:
178
179 pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
180 = pow(y, x-0.5)/exp(y) * C,
181
182 where the correction_factor C is given by
183
184 C = pow(1-e/y, x-0.5) * exp(e)
185
186 Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
187
188 C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
189
190 But y-(x-0.5) = g+e, and g+e ~ g. So we get C ~ 1 + e*g/y, and
191
192 pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
193
194 Note that for accuracy, when computing r*C it's better to do
195
196 r + e*g/y*r;
197
198 than
199
200 r * (1 + e*g/y);
201
202 since the addition in the latter throws away most of the bits of
203 information in e*g/y.
204 */
205
206 #define LANCZOS_N 13
207 static const double lanczos_g = 6.024680040776729583740234375;
208 static const double lanczos_g_minus_half = 5.524680040776729583740234375;
209 static const double lanczos_num_coeffs[LANCZOS_N] = {
210 23531376880.410759688572007674451636754734846804940,
211 42919803642.649098768957899047001988850926355848959,
212 35711959237.355668049440185451547166705960488635843,
213 17921034426.037209699919755754458931112671403265390,
214 6039542586.3520280050642916443072979210699388420708,
215 1439720407.3117216736632230727949123939715485786772,
216 248874557.86205415651146038641322942321632125127801,
217 31426415.585400194380614231628318205362874684987640,
218 2876370.6289353724412254090516208496135991145378768,
219 186056.26539522349504029498971604569928220784236328,
220 8071.6720023658162106380029022722506138218516325024,
221 210.82427775157934587250973392071336271166969580291,
222 2.5066282746310002701649081771338373386264310793408
223 };
224
225 /* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
226 static const double lanczos_den_coeffs[LANCZOS_N] = {
227 0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0,
228 13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0};
229
230 /* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
231 #define NGAMMA_INTEGRAL 23
232 static const double gamma_integral[NGAMMA_INTEGRAL] = {
233 1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0,
234 3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0,
235 1307674368000.0, 20922789888000.0, 355687428096000.0,
236 6402373705728000.0, 121645100408832000.0, 2432902008176640000.0,
237 51090942171709440000.0, 1124000727777607680000.0,
238 };
239
240 /* Lanczos' sum L_g(x), for positive x */
241
242 static double
lanczos_sum(double x)243 lanczos_sum(double x)
244 {
245 double num = 0.0, den = 0.0;
246 int i;
247 assert(x > 0.0);
248 /* evaluate the rational function lanczos_sum(x). For large
249 x, the obvious algorithm risks overflow, so we instead
250 rescale the denominator and numerator of the rational
251 function by x**(1-LANCZOS_N) and treat this as a
252 rational function in 1/x. This also reduces the error for
253 larger x values. The choice of cutoff point (5.0 below) is
254 somewhat arbitrary; in tests, smaller cutoff values than
255 this resulted in lower accuracy. */
256 if (x < 5.0) {
257 for (i = LANCZOS_N; --i >= 0; ) {
258 num = num * x + lanczos_num_coeffs[i];
259 den = den * x + lanczos_den_coeffs[i];
260 }
261 }
262 else {
263 for (i = 0; i < LANCZOS_N; i++) {
264 num = num / x + lanczos_num_coeffs[i];
265 den = den / x + lanczos_den_coeffs[i];
266 }
267 }
268 return num/den;
269 }
270
271 /* Constant for +infinity, generated in the same way as float('inf'). */
272
273 static double
m_inf(void)274 m_inf(void)
275 {
276 #if _PY_SHORT_FLOAT_REPR == 1
277 return _Py_dg_infinity(0);
278 #else
279 return Py_HUGE_VAL;
280 #endif
281 }
282
283 /* Constant nan value, generated in the same way as float('nan'). */
284 /* We don't currently assume that Py_NAN is defined everywhere. */
285
286 #if _PY_SHORT_FLOAT_REPR == 1
287
288 static double
m_nan(void)289 m_nan(void)
290 {
291 #if _PY_SHORT_FLOAT_REPR == 1
292 return _Py_dg_stdnan(0);
293 #else
294 return Py_NAN;
295 #endif
296 }
297
298 #endif
299
300 static double
m_tgamma(double x)301 m_tgamma(double x)
302 {
303 double absx, r, y, z, sqrtpow;
304
305 /* special cases */
306 if (!Py_IS_FINITE(x)) {
307 if (Py_IS_NAN(x) || x > 0.0)
308 return x; /* tgamma(nan) = nan, tgamma(inf) = inf */
309 else {
310 errno = EDOM;
311 return Py_NAN; /* tgamma(-inf) = nan, invalid */
312 }
313 }
314 if (x == 0.0) {
315 errno = EDOM;
316 /* tgamma(+-0.0) = +-inf, divide-by-zero */
317 return copysign(Py_HUGE_VAL, x);
318 }
319
320 /* integer arguments */
321 if (x == floor(x)) {
322 if (x < 0.0) {
323 errno = EDOM; /* tgamma(n) = nan, invalid for */
324 return Py_NAN; /* negative integers n */
325 }
326 if (x <= NGAMMA_INTEGRAL)
327 return gamma_integral[(int)x - 1];
328 }
329 absx = fabs(x);
330
331 /* tiny arguments: tgamma(x) ~ 1/x for x near 0 */
332 if (absx < 1e-20) {
333 r = 1.0/x;
334 if (Py_IS_INFINITY(r))
335 errno = ERANGE;
336 return r;
337 }
338
339 /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
340 x > 200, and underflows to +-0.0 for x < -200, not a negative
341 integer. */
342 if (absx > 200.0) {
343 if (x < 0.0) {
344 return 0.0/m_sinpi(x);
345 }
346 else {
347 errno = ERANGE;
348 return Py_HUGE_VAL;
349 }
350 }
351
352 y = absx + lanczos_g_minus_half;
353 /* compute error in sum */
354 if (absx > lanczos_g_minus_half) {
355 /* note: the correction can be foiled by an optimizing
356 compiler that (incorrectly) thinks that an expression like
357 a + b - a - b can be optimized to 0.0. This shouldn't
358 happen in a standards-conforming compiler. */
359 double q = y - absx;
360 z = q - lanczos_g_minus_half;
361 }
362 else {
363 double q = y - lanczos_g_minus_half;
364 z = q - absx;
365 }
366 z = z * lanczos_g / y;
367 if (x < 0.0) {
368 r = -pi / m_sinpi(absx) / absx * exp(y) / lanczos_sum(absx);
369 r -= z * r;
370 if (absx < 140.0) {
371 r /= pow(y, absx - 0.5);
372 }
373 else {
374 sqrtpow = pow(y, absx / 2.0 - 0.25);
375 r /= sqrtpow;
376 r /= sqrtpow;
377 }
378 }
379 else {
380 r = lanczos_sum(absx) / exp(y);
381 r += z * r;
382 if (absx < 140.0) {
383 r *= pow(y, absx - 0.5);
384 }
385 else {
386 sqrtpow = pow(y, absx / 2.0 - 0.25);
387 r *= sqrtpow;
388 r *= sqrtpow;
389 }
390 }
391 if (Py_IS_INFINITY(r))
392 errno = ERANGE;
393 return r;
394 }
395
396 /*
397 lgamma: natural log of the absolute value of the Gamma function.
398 For large arguments, Lanczos' formula works extremely well here.
399 */
400
401 static double
m_lgamma(double x)402 m_lgamma(double x)
403 {
404 double r;
405 double absx;
406
407 /* special cases */
408 if (!Py_IS_FINITE(x)) {
409 if (Py_IS_NAN(x))
410 return x; /* lgamma(nan) = nan */
411 else
412 return Py_HUGE_VAL; /* lgamma(+-inf) = +inf */
413 }
414
415 /* integer arguments */
416 if (x == floor(x) && x <= 2.0) {
417 if (x <= 0.0) {
418 errno = EDOM; /* lgamma(n) = inf, divide-by-zero for */
419 return Py_HUGE_VAL; /* integers n <= 0 */
420 }
421 else {
422 return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */
423 }
424 }
425
426 absx = fabs(x);
427 /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
428 if (absx < 1e-20)
429 return -log(absx);
430
431 /* Lanczos' formula. We could save a fraction of a ulp in accuracy by
432 having a second set of numerator coefficients for lanczos_sum that
433 absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
434 subtraction below; it's probably not worth it. */
435 r = log(lanczos_sum(absx)) - lanczos_g;
436 r += (absx - 0.5) * (log(absx + lanczos_g - 0.5) - 1);
437 if (x < 0.0)
438 /* Use reflection formula to get value for negative x. */
439 r = logpi - log(fabs(m_sinpi(absx))) - log(absx) - r;
440 if (Py_IS_INFINITY(r))
441 errno = ERANGE;
442 return r;
443 }
444
445 #if !defined(HAVE_ERF) || !defined(HAVE_ERFC)
446
447 /*
448 Implementations of the error function erf(x) and the complementary error
449 function erfc(x).
450
451 Method: we use a series approximation for erf for small x, and a continued
452 fraction approximation for erfc(x) for larger x;
453 combined with the relations erf(-x) = -erf(x) and erfc(x) = 1.0 - erf(x),
454 this gives us erf(x) and erfc(x) for all x.
455
456 The series expansion used is:
457
458 erf(x) = x*exp(-x*x)/sqrt(pi) * [
459 2/1 + 4/3 x**2 + 8/15 x**4 + 16/105 x**6 + ...]
460
461 The coefficient of x**(2k-2) here is 4**k*factorial(k)/factorial(2*k).
462 This series converges well for smallish x, but slowly for larger x.
463
464 The continued fraction expansion used is:
465
466 erfc(x) = x*exp(-x*x)/sqrt(pi) * [1/(0.5 + x**2 -) 0.5/(2.5 + x**2 - )
467 3.0/(4.5 + x**2 - ) 7.5/(6.5 + x**2 - ) ...]
468
469 after the first term, the general term has the form:
470
471 k*(k-0.5)/(2*k+0.5 + x**2 - ...).
472
473 This expansion converges fast for larger x, but convergence becomes
474 infinitely slow as x approaches 0.0. The (somewhat naive) continued
475 fraction evaluation algorithm used below also risks overflow for large x;
476 but for large x, erfc(x) == 0.0 to within machine precision. (For
477 example, erfc(30.0) is approximately 2.56e-393).
478
479 Parameters: use series expansion for abs(x) < ERF_SERIES_CUTOFF and
480 continued fraction expansion for ERF_SERIES_CUTOFF <= abs(x) <
481 ERFC_CONTFRAC_CUTOFF. ERFC_SERIES_TERMS and ERFC_CONTFRAC_TERMS are the
482 numbers of terms to use for the relevant expansions. */
483
484 #define ERF_SERIES_CUTOFF 1.5
485 #define ERF_SERIES_TERMS 25
486 #define ERFC_CONTFRAC_CUTOFF 30.0
487 #define ERFC_CONTFRAC_TERMS 50
488
489 /*
490 Error function, via power series.
491
492 Given a finite float x, return an approximation to erf(x).
493 Converges reasonably fast for small x.
494 */
495
496 static double
m_erf_series(double x)497 m_erf_series(double x)
498 {
499 double x2, acc, fk, result;
500 int i, saved_errno;
501
502 x2 = x * x;
503 acc = 0.0;
504 fk = (double)ERF_SERIES_TERMS + 0.5;
505 for (i = 0; i < ERF_SERIES_TERMS; i++) {
506 acc = 2.0 + x2 * acc / fk;
507 fk -= 1.0;
508 }
509 /* Make sure the exp call doesn't affect errno;
510 see m_erfc_contfrac for more. */
511 saved_errno = errno;
512 result = acc * x * exp(-x2) / sqrtpi;
513 errno = saved_errno;
514 return result;
515 }
516
517 /*
518 Complementary error function, via continued fraction expansion.
519
520 Given a positive float x, return an approximation to erfc(x). Converges
521 reasonably fast for x large (say, x > 2.0), and should be safe from
522 overflow if x and nterms are not too large. On an IEEE 754 machine, with x
523 <= 30.0, we're safe up to nterms = 100. For x >= 30.0, erfc(x) is smaller
524 than the smallest representable nonzero float. */
525
526 static double
m_erfc_contfrac(double x)527 m_erfc_contfrac(double x)
528 {
529 double x2, a, da, p, p_last, q, q_last, b, result;
530 int i, saved_errno;
531
532 if (x >= ERFC_CONTFRAC_CUTOFF)
533 return 0.0;
534
535 x2 = x*x;
536 a = 0.0;
537 da = 0.5;
538 p = 1.0; p_last = 0.0;
539 q = da + x2; q_last = 1.0;
540 for (i = 0; i < ERFC_CONTFRAC_TERMS; i++) {
541 double temp;
542 a += da;
543 da += 2.0;
544 b = da + x2;
545 temp = p; p = b*p - a*p_last; p_last = temp;
546 temp = q; q = b*q - a*q_last; q_last = temp;
547 }
548 /* Issue #8986: On some platforms, exp sets errno on underflow to zero;
549 save the current errno value so that we can restore it later. */
550 saved_errno = errno;
551 result = p / q * x * exp(-x2) / sqrtpi;
552 errno = saved_errno;
553 return result;
554 }
555
556 #endif /* !defined(HAVE_ERF) || !defined(HAVE_ERFC) */
557
558 /* Error function erf(x), for general x */
559
560 static double
m_erf(double x)561 m_erf(double x)
562 {
563 #ifdef HAVE_ERF
564 return erf(x);
565 #else
566 double absx, cf;
567
568 if (Py_IS_NAN(x))
569 return x;
570 absx = fabs(x);
571 if (absx < ERF_SERIES_CUTOFF)
572 return m_erf_series(x);
573 else {
574 cf = m_erfc_contfrac(absx);
575 return x > 0.0 ? 1.0 - cf : cf - 1.0;
576 }
577 #endif
578 }
579
580 /* Complementary error function erfc(x), for general x. */
581
582 static double
m_erfc(double x)583 m_erfc(double x)
584 {
585 #ifdef HAVE_ERFC
586 return erfc(x);
587 #else
588 double absx, cf;
589
590 if (Py_IS_NAN(x))
591 return x;
592 absx = fabs(x);
593 if (absx < ERF_SERIES_CUTOFF)
594 return 1.0 - m_erf_series(x);
595 else {
596 cf = m_erfc_contfrac(absx);
597 return x > 0.0 ? cf : 2.0 - cf;
598 }
599 #endif
600 }
601
602 /*
603 wrapper for atan2 that deals directly with special cases before
604 delegating to the platform libm for the remaining cases. This
605 is necessary to get consistent behaviour across platforms.
606 Windows, FreeBSD and alpha Tru64 are amongst platforms that don't
607 always follow C99.
608 */
609
610 static double
m_atan2(double y,double x)611 m_atan2(double y, double x)
612 {
613 if (Py_IS_NAN(x) || Py_IS_NAN(y))
614 return Py_NAN;
615 if (Py_IS_INFINITY(y)) {
616 if (Py_IS_INFINITY(x)) {
617 if (copysign(1., x) == 1.)
618 /* atan2(+-inf, +inf) == +-pi/4 */
619 return copysign(0.25*Py_MATH_PI, y);
620 else
621 /* atan2(+-inf, -inf) == +-pi*3/4 */
622 return copysign(0.75*Py_MATH_PI, y);
623 }
624 /* atan2(+-inf, x) == +-pi/2 for finite x */
625 return copysign(0.5*Py_MATH_PI, y);
626 }
627 if (Py_IS_INFINITY(x) || y == 0.) {
628 if (copysign(1., x) == 1.)
629 /* atan2(+-y, +inf) = atan2(+-0, +x) = +-0. */
630 return copysign(0., y);
631 else
632 /* atan2(+-y, -inf) = atan2(+-0., -x) = +-pi. */
633 return copysign(Py_MATH_PI, y);
634 }
635 return atan2(y, x);
636 }
637
638
639 /* IEEE 754-style remainder operation: x - n*y where n*y is the nearest
640 multiple of y to x, taking n even in the case of a tie. Assuming an IEEE 754
641 binary floating-point format, the result is always exact. */
642
643 static double
m_remainder(double x,double y)644 m_remainder(double x, double y)
645 {
646 /* Deal with most common case first. */
647 if (Py_IS_FINITE(x) && Py_IS_FINITE(y)) {
648 double absx, absy, c, m, r;
649
650 if (y == 0.0) {
651 return Py_NAN;
652 }
653
654 absx = fabs(x);
655 absy = fabs(y);
656 m = fmod(absx, absy);
657
658 /*
659 Warning: some subtlety here. What we *want* to know at this point is
660 whether the remainder m is less than, equal to, or greater than half
661 of absy. However, we can't do that comparison directly because we
662 can't be sure that 0.5*absy is representable (the multiplication
663 might incur precision loss due to underflow). So instead we compare
664 m with the complement c = absy - m: m < 0.5*absy if and only if m <
665 c, and so on. The catch is that absy - m might also not be
666 representable, but it turns out that it doesn't matter:
667
668 - if m > 0.5*absy then absy - m is exactly representable, by
669 Sterbenz's lemma, so m > c
670 - if m == 0.5*absy then again absy - m is exactly representable
671 and m == c
672 - if m < 0.5*absy then either (i) 0.5*absy is exactly representable,
673 in which case 0.5*absy < absy - m, so 0.5*absy <= c and hence m <
674 c, or (ii) absy is tiny, either subnormal or in the lowest normal
675 binade. Then absy - m is exactly representable and again m < c.
676 */
677
678 c = absy - m;
679 if (m < c) {
680 r = m;
681 }
682 else if (m > c) {
683 r = -c;
684 }
685 else {
686 /*
687 Here absx is exactly halfway between two multiples of absy,
688 and we need to choose the even multiple. x now has the form
689
690 absx = n * absy + m
691
692 for some integer n (recalling that m = 0.5*absy at this point).
693 If n is even we want to return m; if n is odd, we need to
694 return -m.
695
696 So
697
698 0.5 * (absx - m) = (n/2) * absy
699
700 and now reducing modulo absy gives us:
701
702 | m, if n is odd
703 fmod(0.5 * (absx - m), absy) = |
704 | 0, if n is even
705
706 Now m - 2.0 * fmod(...) gives the desired result: m
707 if n is even, -m if m is odd.
708
709 Note that all steps in fmod(0.5 * (absx - m), absy)
710 will be computed exactly, with no rounding error
711 introduced.
712 */
713 assert(m == c);
714 r = m - 2.0 * fmod(0.5 * (absx - m), absy);
715 }
716 return copysign(1.0, x) * r;
717 }
718
719 /* Special values. */
720 if (Py_IS_NAN(x)) {
721 return x;
722 }
723 if (Py_IS_NAN(y)) {
724 return y;
725 }
726 if (Py_IS_INFINITY(x)) {
727 return Py_NAN;
728 }
729 assert(Py_IS_INFINITY(y));
730 return x;
731 }
732
733
734 /*
735 Various platforms (Solaris, OpenBSD) do nonstandard things for log(0),
736 log(-ve), log(NaN). Here are wrappers for log and log10 that deal with
737 special values directly, passing positive non-special values through to
738 the system log/log10.
739 */
740
741 static double
m_log(double x)742 m_log(double x)
743 {
744 if (Py_IS_FINITE(x)) {
745 if (x > 0.0)
746 return log(x);
747 errno = EDOM;
748 if (x == 0.0)
749 return -Py_HUGE_VAL; /* log(0) = -inf */
750 else
751 return Py_NAN; /* log(-ve) = nan */
752 }
753 else if (Py_IS_NAN(x))
754 return x; /* log(nan) = nan */
755 else if (x > 0.0)
756 return x; /* log(inf) = inf */
757 else {
758 errno = EDOM;
759 return Py_NAN; /* log(-inf) = nan */
760 }
761 }
762
763 /*
764 log2: log to base 2.
765
766 Uses an algorithm that should:
767
768 (a) produce exact results for powers of 2, and
769 (b) give a monotonic log2 (for positive finite floats),
770 assuming that the system log is monotonic.
771 */
772
773 static double
m_log2(double x)774 m_log2(double x)
775 {
776 if (!Py_IS_FINITE(x)) {
777 if (Py_IS_NAN(x))
778 return x; /* log2(nan) = nan */
779 else if (x > 0.0)
780 return x; /* log2(+inf) = +inf */
781 else {
782 errno = EDOM;
783 return Py_NAN; /* log2(-inf) = nan, invalid-operation */
784 }
785 }
786
787 if (x > 0.0) {
788 #ifdef HAVE_LOG2
789 return log2(x);
790 #else
791 double m;
792 int e;
793 m = frexp(x, &e);
794 /* We want log2(m * 2**e) == log(m) / log(2) + e. Care is needed when
795 * x is just greater than 1.0: in that case e is 1, log(m) is negative,
796 * and we get significant cancellation error from the addition of
797 * log(m) / log(2) to e. The slight rewrite of the expression below
798 * avoids this problem.
799 */
800 if (x >= 1.0) {
801 return log(2.0 * m) / log(2.0) + (e - 1);
802 }
803 else {
804 return log(m) / log(2.0) + e;
805 }
806 #endif
807 }
808 else if (x == 0.0) {
809 errno = EDOM;
810 return -Py_HUGE_VAL; /* log2(0) = -inf, divide-by-zero */
811 }
812 else {
813 errno = EDOM;
814 return Py_NAN; /* log2(-inf) = nan, invalid-operation */
815 }
816 }
817
818 static double
m_log10(double x)819 m_log10(double x)
820 {
821 if (Py_IS_FINITE(x)) {
822 if (x > 0.0)
823 return log10(x);
824 errno = EDOM;
825 if (x == 0.0)
826 return -Py_HUGE_VAL; /* log10(0) = -inf */
827 else
828 return Py_NAN; /* log10(-ve) = nan */
829 }
830 else if (Py_IS_NAN(x))
831 return x; /* log10(nan) = nan */
832 else if (x > 0.0)
833 return x; /* log10(inf) = inf */
834 else {
835 errno = EDOM;
836 return Py_NAN; /* log10(-inf) = nan */
837 }
838 }
839
840
841 static PyObject *
math_gcd(PyObject * module,PyObject * const * args,Py_ssize_t nargs)842 math_gcd(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
843 {
844 PyObject *res, *x;
845 Py_ssize_t i;
846
847 if (nargs == 0) {
848 return PyLong_FromLong(0);
849 }
850 res = PyNumber_Index(args[0]);
851 if (res == NULL) {
852 return NULL;
853 }
854 if (nargs == 1) {
855 Py_SETREF(res, PyNumber_Absolute(res));
856 return res;
857 }
858
859 PyObject *one = _PyLong_GetOne(); // borrowed ref
860 for (i = 1; i < nargs; i++) {
861 x = _PyNumber_Index(args[i]);
862 if (x == NULL) {
863 Py_DECREF(res);
864 return NULL;
865 }
866 if (res == one) {
867 /* Fast path: just check arguments.
868 It is okay to use identity comparison here. */
869 Py_DECREF(x);
870 continue;
871 }
872 Py_SETREF(res, _PyLong_GCD(res, x));
873 Py_DECREF(x);
874 if (res == NULL) {
875 return NULL;
876 }
877 }
878 return res;
879 }
880
881 PyDoc_STRVAR(math_gcd_doc,
882 "gcd($module, *integers)\n"
883 "--\n"
884 "\n"
885 "Greatest Common Divisor.");
886
887
888 static PyObject *
long_lcm(PyObject * a,PyObject * b)889 long_lcm(PyObject *a, PyObject *b)
890 {
891 PyObject *g, *m, *f, *ab;
892
893 if (Py_SIZE(a) == 0 || Py_SIZE(b) == 0) {
894 return PyLong_FromLong(0);
895 }
896 g = _PyLong_GCD(a, b);
897 if (g == NULL) {
898 return NULL;
899 }
900 f = PyNumber_FloorDivide(a, g);
901 Py_DECREF(g);
902 if (f == NULL) {
903 return NULL;
904 }
905 m = PyNumber_Multiply(f, b);
906 Py_DECREF(f);
907 if (m == NULL) {
908 return NULL;
909 }
910 ab = PyNumber_Absolute(m);
911 Py_DECREF(m);
912 return ab;
913 }
914
915
916 static PyObject *
math_lcm(PyObject * module,PyObject * const * args,Py_ssize_t nargs)917 math_lcm(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
918 {
919 PyObject *res, *x;
920 Py_ssize_t i;
921
922 if (nargs == 0) {
923 return PyLong_FromLong(1);
924 }
925 res = PyNumber_Index(args[0]);
926 if (res == NULL) {
927 return NULL;
928 }
929 if (nargs == 1) {
930 Py_SETREF(res, PyNumber_Absolute(res));
931 return res;
932 }
933
934 PyObject *zero = _PyLong_GetZero(); // borrowed ref
935 for (i = 1; i < nargs; i++) {
936 x = PyNumber_Index(args[i]);
937 if (x == NULL) {
938 Py_DECREF(res);
939 return NULL;
940 }
941 if (res == zero) {
942 /* Fast path: just check arguments.
943 It is okay to use identity comparison here. */
944 Py_DECREF(x);
945 continue;
946 }
947 Py_SETREF(res, long_lcm(res, x));
948 Py_DECREF(x);
949 if (res == NULL) {
950 return NULL;
951 }
952 }
953 return res;
954 }
955
956
957 PyDoc_STRVAR(math_lcm_doc,
958 "lcm($module, *integers)\n"
959 "--\n"
960 "\n"
961 "Least Common Multiple.");
962
963
964 /* Call is_error when errno != 0, and where x is the result libm
965 * returned. is_error will usually set up an exception and return
966 * true (1), but may return false (0) without setting up an exception.
967 */
968 static int
is_error(double x)969 is_error(double x)
970 {
971 int result = 1; /* presumption of guilt */
972 assert(errno); /* non-zero errno is a precondition for calling */
973 if (errno == EDOM)
974 PyErr_SetString(PyExc_ValueError, "math domain error");
975
976 else if (errno == ERANGE) {
977 /* ANSI C generally requires libm functions to set ERANGE
978 * on overflow, but also generally *allows* them to set
979 * ERANGE on underflow too. There's no consistency about
980 * the latter across platforms.
981 * Alas, C99 never requires that errno be set.
982 * Here we suppress the underflow errors (libm functions
983 * should return a zero on underflow, and +- HUGE_VAL on
984 * overflow, so testing the result for zero suffices to
985 * distinguish the cases).
986 *
987 * On some platforms (Ubuntu/ia64) it seems that errno can be
988 * set to ERANGE for subnormal results that do *not* underflow
989 * to zero. So to be safe, we'll ignore ERANGE whenever the
990 * function result is less than 1.5 in absolute value.
991 *
992 * bpo-46018: Changed to 1.5 to ensure underflows in expm1()
993 * are correctly detected, since the function may underflow
994 * toward -1.0 rather than 0.0.
995 */
996 if (fabs(x) < 1.5)
997 result = 0;
998 else
999 PyErr_SetString(PyExc_OverflowError,
1000 "math range error");
1001 }
1002 else
1003 /* Unexpected math error */
1004 PyErr_SetFromErrno(PyExc_ValueError);
1005 return result;
1006 }
1007
1008 /*
1009 math_1 is used to wrap a libm function f that takes a double
1010 argument and returns a double.
1011
1012 The error reporting follows these rules, which are designed to do
1013 the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
1014 platforms.
1015
1016 - a NaN result from non-NaN inputs causes ValueError to be raised
1017 - an infinite result from finite inputs causes OverflowError to be
1018 raised if can_overflow is 1, or raises ValueError if can_overflow
1019 is 0.
1020 - if the result is finite and errno == EDOM then ValueError is
1021 raised
1022 - if the result is finite and nonzero and errno == ERANGE then
1023 OverflowError is raised
1024
1025 The last rule is used to catch overflow on platforms which follow
1026 C89 but for which HUGE_VAL is not an infinity.
1027
1028 For the majority of one-argument functions these rules are enough
1029 to ensure that Python's functions behave as specified in 'Annex F'
1030 of the C99 standard, with the 'invalid' and 'divide-by-zero'
1031 floating-point exceptions mapping to Python's ValueError and the
1032 'overflow' floating-point exception mapping to OverflowError.
1033 math_1 only works for functions that don't have singularities *and*
1034 the possibility of overflow; fortunately, that covers everything we
1035 care about right now.
1036 */
1037
1038 static PyObject *
math_1_to_whatever(PyObject * arg,double (* func)(double),PyObject * (* from_double_func)(double),int can_overflow)1039 math_1_to_whatever(PyObject *arg, double (*func) (double),
1040 PyObject *(*from_double_func) (double),
1041 int can_overflow)
1042 {
1043 double x, r;
1044 x = PyFloat_AsDouble(arg);
1045 if (x == -1.0 && PyErr_Occurred())
1046 return NULL;
1047 errno = 0;
1048 r = (*func)(x);
1049 if (Py_IS_NAN(r) && !Py_IS_NAN(x)) {
1050 PyErr_SetString(PyExc_ValueError,
1051 "math domain error"); /* invalid arg */
1052 return NULL;
1053 }
1054 if (Py_IS_INFINITY(r) && Py_IS_FINITE(x)) {
1055 if (can_overflow)
1056 PyErr_SetString(PyExc_OverflowError,
1057 "math range error"); /* overflow */
1058 else
1059 PyErr_SetString(PyExc_ValueError,
1060 "math domain error"); /* singularity */
1061 return NULL;
1062 }
1063 if (Py_IS_FINITE(r) && errno && is_error(r))
1064 /* this branch unnecessary on most platforms */
1065 return NULL;
1066
1067 return (*from_double_func)(r);
1068 }
1069
1070 /* variant of math_1, to be used when the function being wrapped is known to
1071 set errno properly (that is, errno = EDOM for invalid or divide-by-zero,
1072 errno = ERANGE for overflow). */
1073
1074 static PyObject *
math_1a(PyObject * arg,double (* func)(double))1075 math_1a(PyObject *arg, double (*func) (double))
1076 {
1077 double x, r;
1078 x = PyFloat_AsDouble(arg);
1079 if (x == -1.0 && PyErr_Occurred())
1080 return NULL;
1081 errno = 0;
1082 r = (*func)(x);
1083 if (errno && is_error(r))
1084 return NULL;
1085 return PyFloat_FromDouble(r);
1086 }
1087
1088 /*
1089 math_2 is used to wrap a libm function f that takes two double
1090 arguments and returns a double.
1091
1092 The error reporting follows these rules, which are designed to do
1093 the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
1094 platforms.
1095
1096 - a NaN result from non-NaN inputs causes ValueError to be raised
1097 - an infinite result from finite inputs causes OverflowError to be
1098 raised.
1099 - if the result is finite and errno == EDOM then ValueError is
1100 raised
1101 - if the result is finite and nonzero and errno == ERANGE then
1102 OverflowError is raised
1103
1104 The last rule is used to catch overflow on platforms which follow
1105 C89 but for which HUGE_VAL is not an infinity.
1106
1107 For most two-argument functions (copysign, fmod, hypot, atan2)
1108 these rules are enough to ensure that Python's functions behave as
1109 specified in 'Annex F' of the C99 standard, with the 'invalid' and
1110 'divide-by-zero' floating-point exceptions mapping to Python's
1111 ValueError and the 'overflow' floating-point exception mapping to
1112 OverflowError.
1113 */
1114
1115 static PyObject *
math_1(PyObject * arg,double (* func)(double),int can_overflow)1116 math_1(PyObject *arg, double (*func) (double), int can_overflow)
1117 {
1118 return math_1_to_whatever(arg, func, PyFloat_FromDouble, can_overflow);
1119 }
1120
1121 static PyObject *
math_2(PyObject * const * args,Py_ssize_t nargs,double (* func)(double,double),const char * funcname)1122 math_2(PyObject *const *args, Py_ssize_t nargs,
1123 double (*func) (double, double), const char *funcname)
1124 {
1125 double x, y, r;
1126 if (!_PyArg_CheckPositional(funcname, nargs, 2, 2))
1127 return NULL;
1128 x = PyFloat_AsDouble(args[0]);
1129 if (x == -1.0 && PyErr_Occurred()) {
1130 return NULL;
1131 }
1132 y = PyFloat_AsDouble(args[1]);
1133 if (y == -1.0 && PyErr_Occurred()) {
1134 return NULL;
1135 }
1136 errno = 0;
1137 r = (*func)(x, y);
1138 if (Py_IS_NAN(r)) {
1139 if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
1140 errno = EDOM;
1141 else
1142 errno = 0;
1143 }
1144 else if (Py_IS_INFINITY(r)) {
1145 if (Py_IS_FINITE(x) && Py_IS_FINITE(y))
1146 errno = ERANGE;
1147 else
1148 errno = 0;
1149 }
1150 if (errno && is_error(r))
1151 return NULL;
1152 else
1153 return PyFloat_FromDouble(r);
1154 }
1155
1156 #define FUNC1(funcname, func, can_overflow, docstring) \
1157 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1158 return math_1(args, func, can_overflow); \
1159 }\
1160 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1161
1162 #define FUNC1A(funcname, func, docstring) \
1163 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1164 return math_1a(args, func); \
1165 }\
1166 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1167
1168 #define FUNC2(funcname, func, docstring) \
1169 static PyObject * math_##funcname(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { \
1170 return math_2(args, nargs, func, #funcname); \
1171 }\
1172 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1173
1174 FUNC1(acos, acos, 0,
1175 "acos($module, x, /)\n--\n\n"
1176 "Return the arc cosine (measured in radians) of x.\n\n"
1177 "The result is between 0 and pi.")
1178 FUNC1(acosh, acosh, 0,
1179 "acosh($module, x, /)\n--\n\n"
1180 "Return the inverse hyperbolic cosine of x.")
1181 FUNC1(asin, asin, 0,
1182 "asin($module, x, /)\n--\n\n"
1183 "Return the arc sine (measured in radians) of x.\n\n"
1184 "The result is between -pi/2 and pi/2.")
1185 FUNC1(asinh, asinh, 0,
1186 "asinh($module, x, /)\n--\n\n"
1187 "Return the inverse hyperbolic sine of x.")
1188 FUNC1(atan, atan, 0,
1189 "atan($module, x, /)\n--\n\n"
1190 "Return the arc tangent (measured in radians) of x.\n\n"
1191 "The result is between -pi/2 and pi/2.")
1192 FUNC2(atan2, m_atan2,
1193 "atan2($module, y, x, /)\n--\n\n"
1194 "Return the arc tangent (measured in radians) of y/x.\n\n"
1195 "Unlike atan(y/x), the signs of both x and y are considered.")
1196 FUNC1(atanh, atanh, 0,
1197 "atanh($module, x, /)\n--\n\n"
1198 "Return the inverse hyperbolic tangent of x.")
1199 FUNC1(cbrt, cbrt, 0,
1200 "cbrt($module, x, /)\n--\n\n"
1201 "Return the cube root of x.")
1202
1203 /*[clinic input]
1204 math.ceil
1205
1206 x as number: object
1207 /
1208
1209 Return the ceiling of x as an Integral.
1210
1211 This is the smallest integer >= x.
1212 [clinic start generated code]*/
1213
1214 static PyObject *
math_ceil(PyObject * module,PyObject * number)1215 math_ceil(PyObject *module, PyObject *number)
1216 /*[clinic end generated code: output=6c3b8a78bc201c67 input=2725352806399cab]*/
1217 {
1218 _Py_IDENTIFIER(__ceil__);
1219
1220 if (!PyFloat_CheckExact(number)) {
1221 PyObject *method = _PyObject_LookupSpecialId(number, &PyId___ceil__);
1222 if (method != NULL) {
1223 PyObject *result = _PyObject_CallNoArgs(method);
1224 Py_DECREF(method);
1225 return result;
1226 }
1227 if (PyErr_Occurred())
1228 return NULL;
1229 }
1230 double x = PyFloat_AsDouble(number);
1231 if (x == -1.0 && PyErr_Occurred())
1232 return NULL;
1233
1234 return PyLong_FromDouble(ceil(x));
1235 }
1236
1237 FUNC2(copysign, copysign,
1238 "copysign($module, x, y, /)\n--\n\n"
1239 "Return a float with the magnitude (absolute value) of x but the sign of y.\n\n"
1240 "On platforms that support signed zeros, copysign(1.0, -0.0)\n"
1241 "returns -1.0.\n")
1242 FUNC1(cos, cos, 0,
1243 "cos($module, x, /)\n--\n\n"
1244 "Return the cosine of x (measured in radians).")
1245 FUNC1(cosh, cosh, 1,
1246 "cosh($module, x, /)\n--\n\n"
1247 "Return the hyperbolic cosine of x.")
1248 FUNC1A(erf, m_erf,
1249 "erf($module, x, /)\n--\n\n"
1250 "Error function at x.")
1251 FUNC1A(erfc, m_erfc,
1252 "erfc($module, x, /)\n--\n\n"
1253 "Complementary error function at x.")
1254 FUNC1(exp, exp, 1,
1255 "exp($module, x, /)\n--\n\n"
1256 "Return e raised to the power of x.")
1257 FUNC1(exp2, exp2, 1,
1258 "exp2($module, x, /)\n--\n\n"
1259 "Return 2 raised to the power of x.")
1260 FUNC1(expm1, expm1, 1,
1261 "expm1($module, x, /)\n--\n\n"
1262 "Return exp(x)-1.\n\n"
1263 "This function avoids the loss of precision involved in the direct "
1264 "evaluation of exp(x)-1 for small x.")
1265 FUNC1(fabs, fabs, 0,
1266 "fabs($module, x, /)\n--\n\n"
1267 "Return the absolute value of the float x.")
1268
1269 /*[clinic input]
1270 math.floor
1271
1272 x as number: object
1273 /
1274
1275 Return the floor of x as an Integral.
1276
1277 This is the largest integer <= x.
1278 [clinic start generated code]*/
1279
1280 static PyObject *
math_floor(PyObject * module,PyObject * number)1281 math_floor(PyObject *module, PyObject *number)
1282 /*[clinic end generated code: output=c6a65c4884884b8a input=63af6b5d7ebcc3d6]*/
1283 {
1284 double x;
1285
1286 _Py_IDENTIFIER(__floor__);
1287
1288 if (PyFloat_CheckExact(number)) {
1289 x = PyFloat_AS_DOUBLE(number);
1290 }
1291 else
1292 {
1293 PyObject *method = _PyObject_LookupSpecialId(number, &PyId___floor__);
1294 if (method != NULL) {
1295 PyObject *result = _PyObject_CallNoArgs(method);
1296 Py_DECREF(method);
1297 return result;
1298 }
1299 if (PyErr_Occurred())
1300 return NULL;
1301 x = PyFloat_AsDouble(number);
1302 if (x == -1.0 && PyErr_Occurred())
1303 return NULL;
1304 }
1305 return PyLong_FromDouble(floor(x));
1306 }
1307
1308 FUNC1A(gamma, m_tgamma,
1309 "gamma($module, x, /)\n--\n\n"
1310 "Gamma function at x.")
1311 FUNC1A(lgamma, m_lgamma,
1312 "lgamma($module, x, /)\n--\n\n"
1313 "Natural logarithm of absolute value of Gamma function at x.")
1314 FUNC1(log1p, m_log1p, 0,
1315 "log1p($module, x, /)\n--\n\n"
1316 "Return the natural logarithm of 1+x (base e).\n\n"
1317 "The result is computed in a way which is accurate for x near zero.")
1318 FUNC2(remainder, m_remainder,
1319 "remainder($module, x, y, /)\n--\n\n"
1320 "Difference between x and the closest integer multiple of y.\n\n"
1321 "Return x - n*y where n*y is the closest integer multiple of y.\n"
1322 "In the case where x is exactly halfway between two multiples of\n"
1323 "y, the nearest even value of n is used. The result is always exact.")
1324 FUNC1(sin, sin, 0,
1325 "sin($module, x, /)\n--\n\n"
1326 "Return the sine of x (measured in radians).")
1327 FUNC1(sinh, sinh, 1,
1328 "sinh($module, x, /)\n--\n\n"
1329 "Return the hyperbolic sine of x.")
1330 FUNC1(sqrt, sqrt, 0,
1331 "sqrt($module, x, /)\n--\n\n"
1332 "Return the square root of x.")
1333 FUNC1(tan, tan, 0,
1334 "tan($module, x, /)\n--\n\n"
1335 "Return the tangent of x (measured in radians).")
1336 FUNC1(tanh, tanh, 0,
1337 "tanh($module, x, /)\n--\n\n"
1338 "Return the hyperbolic tangent of x.")
1339
1340 /* Precision summation function as msum() by Raymond Hettinger in
1341 <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/393090>,
1342 enhanced with the exact partials sum and roundoff from Mark
1343 Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
1344 See those links for more details, proofs and other references.
1345
1346 Note 1: IEEE 754R floating point semantics are assumed,
1347 but the current implementation does not re-establish special
1348 value semantics across iterations (i.e. handling -Inf + Inf).
1349
1350 Note 2: No provision is made for intermediate overflow handling;
1351 therefore, sum([1e+308, 1e-308, 1e+308]) returns 1e+308 while
1352 sum([1e+308, 1e+308, 1e-308]) raises an OverflowError due to the
1353 overflow of the first partial sum.
1354
1355 Note 3: The intermediate values lo, yr, and hi are declared volatile so
1356 aggressive compilers won't algebraically reduce lo to always be exactly 0.0.
1357 Also, the volatile declaration forces the values to be stored in memory as
1358 regular doubles instead of extended long precision (80-bit) values. This
1359 prevents double rounding because any addition or subtraction of two doubles
1360 can be resolved exactly into double-sized hi and lo values. As long as the
1361 hi value gets forced into a double before yr and lo are computed, the extra
1362 bits in downstream extended precision operations (x87 for example) will be
1363 exactly zero and therefore can be losslessly stored back into a double,
1364 thereby preventing double rounding.
1365
1366 Note 4: A similar implementation is in Modules/cmathmodule.c.
1367 Be sure to update both when making changes.
1368
1369 Note 5: The signature of math.fsum() differs from builtins.sum()
1370 because the start argument doesn't make sense in the context of
1371 accurate summation. Since the partials table is collapsed before
1372 returning a result, sum(seq2, start=sum(seq1)) may not equal the
1373 accurate result returned by sum(itertools.chain(seq1, seq2)).
1374 */
1375
1376 #define NUM_PARTIALS 32 /* initial partials array size, on stack */
1377
1378 /* Extend the partials array p[] by doubling its size. */
1379 static int /* non-zero on error */
_fsum_realloc(double ** p_ptr,Py_ssize_t n,double * ps,Py_ssize_t * m_ptr)1380 _fsum_realloc(double **p_ptr, Py_ssize_t n,
1381 double *ps, Py_ssize_t *m_ptr)
1382 {
1383 void *v = NULL;
1384 Py_ssize_t m = *m_ptr;
1385
1386 m += m; /* double */
1387 if (n < m && (size_t)m < ((size_t)PY_SSIZE_T_MAX / sizeof(double))) {
1388 double *p = *p_ptr;
1389 if (p == ps) {
1390 v = PyMem_Malloc(sizeof(double) * m);
1391 if (v != NULL)
1392 memcpy(v, ps, sizeof(double) * n);
1393 }
1394 else
1395 v = PyMem_Realloc(p, sizeof(double) * m);
1396 }
1397 if (v == NULL) { /* size overflow or no memory */
1398 PyErr_SetString(PyExc_MemoryError, "math.fsum partials");
1399 return 1;
1400 }
1401 *p_ptr = (double*) v;
1402 *m_ptr = m;
1403 return 0;
1404 }
1405
1406 /* Full precision summation of a sequence of floats.
1407
1408 def msum(iterable):
1409 partials = [] # sorted, non-overlapping partial sums
1410 for x in iterable:
1411 i = 0
1412 for y in partials:
1413 if abs(x) < abs(y):
1414 x, y = y, x
1415 hi = x + y
1416 lo = y - (hi - x)
1417 if lo:
1418 partials[i] = lo
1419 i += 1
1420 x = hi
1421 partials[i:] = [x]
1422 return sum_exact(partials)
1423
1424 Rounded x+y stored in hi with the roundoff stored in lo. Together hi+lo
1425 are exactly equal to x+y. The inner loop applies hi/lo summation to each
1426 partial so that the list of partial sums remains exact.
1427
1428 Sum_exact() adds the partial sums exactly and correctly rounds the final
1429 result (using the round-half-to-even rule). The items in partials remain
1430 non-zero, non-special, non-overlapping and strictly increasing in
1431 magnitude, but possibly not all having the same sign.
1432
1433 Depends on IEEE 754 arithmetic guarantees and half-even rounding.
1434 */
1435
1436 /*[clinic input]
1437 math.fsum
1438
1439 seq: object
1440 /
1441
1442 Return an accurate floating point sum of values in the iterable seq.
1443
1444 Assumes IEEE-754 floating point arithmetic.
1445 [clinic start generated code]*/
1446
1447 static PyObject *
math_fsum(PyObject * module,PyObject * seq)1448 math_fsum(PyObject *module, PyObject *seq)
1449 /*[clinic end generated code: output=ba5c672b87fe34fc input=c51b7d8caf6f6e82]*/
1450 {
1451 PyObject *item, *iter, *sum = NULL;
1452 Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
1453 double x, y, t, ps[NUM_PARTIALS], *p = ps;
1454 double xsave, special_sum = 0.0, inf_sum = 0.0;
1455 volatile double hi, yr, lo;
1456
1457 iter = PyObject_GetIter(seq);
1458 if (iter == NULL)
1459 return NULL;
1460
1461 for(;;) { /* for x in iterable */
1462 assert(0 <= n && n <= m);
1463 assert((m == NUM_PARTIALS && p == ps) ||
1464 (m > NUM_PARTIALS && p != NULL));
1465
1466 item = PyIter_Next(iter);
1467 if (item == NULL) {
1468 if (PyErr_Occurred())
1469 goto _fsum_error;
1470 break;
1471 }
1472 ASSIGN_DOUBLE(x, item, error_with_item);
1473 Py_DECREF(item);
1474
1475 xsave = x;
1476 for (i = j = 0; j < n; j++) { /* for y in partials */
1477 y = p[j];
1478 if (fabs(x) < fabs(y)) {
1479 t = x; x = y; y = t;
1480 }
1481 hi = x + y;
1482 yr = hi - x;
1483 lo = y - yr;
1484 if (lo != 0.0)
1485 p[i++] = lo;
1486 x = hi;
1487 }
1488
1489 n = i; /* ps[i:] = [x] */
1490 if (x != 0.0) {
1491 if (! Py_IS_FINITE(x)) {
1492 /* a nonfinite x could arise either as
1493 a result of intermediate overflow, or
1494 as a result of a nan or inf in the
1495 summands */
1496 if (Py_IS_FINITE(xsave)) {
1497 PyErr_SetString(PyExc_OverflowError,
1498 "intermediate overflow in fsum");
1499 goto _fsum_error;
1500 }
1501 if (Py_IS_INFINITY(xsave))
1502 inf_sum += xsave;
1503 special_sum += xsave;
1504 /* reset partials */
1505 n = 0;
1506 }
1507 else if (n >= m && _fsum_realloc(&p, n, ps, &m))
1508 goto _fsum_error;
1509 else
1510 p[n++] = x;
1511 }
1512 }
1513
1514 if (special_sum != 0.0) {
1515 if (Py_IS_NAN(inf_sum))
1516 PyErr_SetString(PyExc_ValueError,
1517 "-inf + inf in fsum");
1518 else
1519 sum = PyFloat_FromDouble(special_sum);
1520 goto _fsum_error;
1521 }
1522
1523 hi = 0.0;
1524 if (n > 0) {
1525 hi = p[--n];
1526 /* sum_exact(ps, hi) from the top, stop when the sum becomes
1527 inexact. */
1528 while (n > 0) {
1529 x = hi;
1530 y = p[--n];
1531 assert(fabs(y) < fabs(x));
1532 hi = x + y;
1533 yr = hi - x;
1534 lo = y - yr;
1535 if (lo != 0.0)
1536 break;
1537 }
1538 /* Make half-even rounding work across multiple partials.
1539 Needed so that sum([1e-16, 1, 1e16]) will round-up the last
1540 digit to two instead of down to zero (the 1e-16 makes the 1
1541 slightly closer to two). With a potential 1 ULP rounding
1542 error fixed-up, math.fsum() can guarantee commutativity. */
1543 if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||
1544 (lo > 0.0 && p[n-1] > 0.0))) {
1545 y = lo * 2.0;
1546 x = hi + y;
1547 yr = x - hi;
1548 if (y == yr)
1549 hi = x;
1550 }
1551 }
1552 sum = PyFloat_FromDouble(hi);
1553
1554 _fsum_error:
1555 Py_DECREF(iter);
1556 if (p != ps)
1557 PyMem_Free(p);
1558 return sum;
1559
1560 error_with_item:
1561 Py_DECREF(item);
1562 goto _fsum_error;
1563 }
1564
1565 #undef NUM_PARTIALS
1566
1567
1568 static unsigned long
count_set_bits(unsigned long n)1569 count_set_bits(unsigned long n)
1570 {
1571 unsigned long count = 0;
1572 while (n != 0) {
1573 ++count;
1574 n &= n - 1; /* clear least significant bit */
1575 }
1576 return count;
1577 }
1578
1579 /* Integer square root
1580
1581 Given a nonnegative integer `n`, we want to compute the largest integer
1582 `a` for which `a * a <= n`, or equivalently the integer part of the exact
1583 square root of `n`.
1584
1585 We use an adaptive-precision pure-integer version of Newton's iteration. Given
1586 a positive integer `n`, the algorithm produces at each iteration an integer
1587 approximation `a` to the square root of `n >> s` for some even integer `s`,
1588 with `s` decreasing as the iterations progress. On the final iteration, `s` is
1589 zero and we have an approximation to the square root of `n` itself.
1590
1591 At every step, the approximation `a` is strictly within 1.0 of the true square
1592 root, so we have
1593
1594 (a - 1)**2 < (n >> s) < (a + 1)**2
1595
1596 After the final iteration, a check-and-correct step is needed to determine
1597 whether `a` or `a - 1` gives the desired integer square root of `n`.
1598
1599 The algorithm is remarkable in its simplicity. There's no need for a
1600 per-iteration check-and-correct step, and termination is straightforward: the
1601 number of iterations is known in advance (it's exactly `floor(log2(log2(n)))`
1602 for `n > 1`). The only tricky part of the correctness proof is in establishing
1603 that the bound `(a - 1)**2 < (n >> s) < (a + 1)**2` is maintained from one
1604 iteration to the next. A sketch of the proof of this is given below.
1605
1606 In addition to the proof sketch, a formal, computer-verified proof
1607 of correctness (using Lean) of an equivalent recursive algorithm can be found
1608 here:
1609
1610 https://github.com/mdickinson/snippets/blob/master/proofs/isqrt/src/isqrt.lean
1611
1612
1613 Here's Python code equivalent to the C implementation below:
1614
1615 def isqrt(n):
1616 """
1617 Return the integer part of the square root of the input.
1618 """
1619 n = operator.index(n)
1620
1621 if n < 0:
1622 raise ValueError("isqrt() argument must be nonnegative")
1623 if n == 0:
1624 return 0
1625
1626 c = (n.bit_length() - 1) // 2
1627 a = 1
1628 d = 0
1629 for s in reversed(range(c.bit_length())):
1630 # Loop invariant: (a-1)**2 < (n >> 2*(c - d)) < (a+1)**2
1631 e = d
1632 d = c >> s
1633 a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1634
1635 return a - (a*a > n)
1636
1637
1638 Sketch of proof of correctness
1639 ------------------------------
1640
1641 The delicate part of the correctness proof is showing that the loop invariant
1642 is preserved from one iteration to the next. That is, just before the line
1643
1644 a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1645
1646 is executed in the above code, we know that
1647
1648 (1) (a - 1)**2 < (n >> 2*(c - e)) < (a + 1)**2.
1649
1650 (since `e` is always the value of `d` from the previous iteration). We must
1651 prove that after that line is executed, we have
1652
1653 (a - 1)**2 < (n >> 2*(c - d)) < (a + 1)**2
1654
1655 To facilitate the proof, we make some changes of notation. Write `m` for
1656 `n >> 2*(c-d)`, and write `b` for the new value of `a`, so
1657
1658 b = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1659
1660 or equivalently:
1661
1662 (2) b = (a << d - e - 1) + (m >> d - e + 1) // a
1663
1664 Then we can rewrite (1) as:
1665
1666 (3) (a - 1)**2 < (m >> 2*(d - e)) < (a + 1)**2
1667
1668 and we must show that (b - 1)**2 < m < (b + 1)**2.
1669
1670 From this point on, we switch to mathematical notation, so `/` means exact
1671 division rather than integer division and `^` is used for exponentiation. We
1672 use the `√` symbol for the exact square root. In (3), we can remove the
1673 implicit floor operation to give:
1674
1675 (4) (a - 1)^2 < m / 4^(d - e) < (a + 1)^2
1676
1677 Taking square roots throughout (4), scaling by `2^(d-e)`, and rearranging gives
1678
1679 (5) 0 <= | 2^(d-e)a - √m | < 2^(d-e)
1680
1681 Squaring and dividing through by `2^(d-e+1) a` gives
1682
1683 (6) 0 <= 2^(d-e-1) a + m / (2^(d-e+1) a) - √m < 2^(d-e-1) / a
1684
1685 We'll show below that `2^(d-e-1) <= a`. Given that, we can replace the
1686 right-hand side of (6) with `1`, and now replacing the central
1687 term `m / (2^(d-e+1) a)` with its floor in (6) gives
1688
1689 (7) -1 < 2^(d-e-1) a + m // 2^(d-e+1) a - √m < 1
1690
1691 Or equivalently, from (2):
1692
1693 (7) -1 < b - √m < 1
1694
1695 and rearranging gives that `(b-1)^2 < m < (b+1)^2`, which is what we needed
1696 to prove.
1697
1698 We're not quite done: we still have to prove the inequality `2^(d - e - 1) <=
1699 a` that was used to get line (7) above. From the definition of `c`, we have
1700 `4^c <= n`, which implies
1701
1702 (8) 4^d <= m
1703
1704 also, since `e == d >> 1`, `d` is at most `2e + 1`, from which it follows
1705 that `2d - 2e - 1 <= d` and hence that
1706
1707 (9) 4^(2d - 2e - 1) <= m
1708
1709 Dividing both sides by `4^(d - e)` gives
1710
1711 (10) 4^(d - e - 1) <= m / 4^(d - e)
1712
1713 But we know from (4) that `m / 4^(d-e) < (a + 1)^2`, hence
1714
1715 (11) 4^(d - e - 1) < (a + 1)^2
1716
1717 Now taking square roots of both sides and observing that both `2^(d-e-1)` and
1718 `a` are integers gives `2^(d - e - 1) <= a`, which is what we needed. This
1719 completes the proof sketch.
1720
1721 */
1722
1723 /*
1724 The _approximate_isqrt_tab table provides approximate square roots for
1725 16-bit integers. For any n in the range 2**14 <= n < 2**16, the value
1726
1727 a = _approximate_isqrt_tab[(n >> 8) - 64]
1728
1729 is an approximate square root of n, satisfying (a - 1)**2 < n < (a + 1)**2.
1730
1731 The table was computed in Python using the expression:
1732
1733 [min(round(sqrt(256*n + 128)), 255) for n in range(64, 256)]
1734 */
1735
1736 static const uint8_t _approximate_isqrt_tab[192] = {
1737 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
1738 140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, 150,
1739 151, 151, 152, 153, 154, 155, 156, 156, 157, 158, 159, 160,
1740 160, 161, 162, 163, 164, 164, 165, 166, 167, 167, 168, 169,
1741 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178,
1742 179, 179, 180, 181, 181, 182, 183, 183, 184, 185, 186, 186,
1743 187, 188, 188, 189, 190, 190, 191, 192, 192, 193, 194, 194,
1744 195, 196, 196, 197, 198, 198, 199, 200, 200, 201, 201, 202,
1745 203, 203, 204, 205, 205, 206, 206, 207, 208, 208, 209, 210,
1746 210, 211, 211, 212, 213, 213, 214, 214, 215, 216, 216, 217,
1747 217, 218, 219, 219, 220, 220, 221, 221, 222, 223, 223, 224,
1748 224, 225, 225, 226, 227, 227, 228, 228, 229, 229, 230, 230,
1749 231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 237, 237,
1750 238, 238, 239, 239, 240, 240, 241, 241, 242, 242, 243, 243,
1751 244, 244, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250,
1752 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 255,
1753 };
1754
1755 /* Approximate square root of a large 64-bit integer.
1756
1757 Given `n` satisfying `2**62 <= n < 2**64`, return `a`
1758 satisfying `(a - 1)**2 < n < (a + 1)**2`. */
1759
1760 static inline uint32_t
_approximate_isqrt(uint64_t n)1761 _approximate_isqrt(uint64_t n)
1762 {
1763 uint32_t u = _approximate_isqrt_tab[(n >> 56) - 64];
1764 u = (u << 7) + (uint32_t)(n >> 41) / u;
1765 return (u << 15) + (uint32_t)((n >> 17) / u);
1766 }
1767
1768 /*[clinic input]
1769 math.isqrt
1770
1771 n: object
1772 /
1773
1774 Return the integer part of the square root of the input.
1775 [clinic start generated code]*/
1776
1777 static PyObject *
math_isqrt(PyObject * module,PyObject * n)1778 math_isqrt(PyObject *module, PyObject *n)
1779 /*[clinic end generated code: output=35a6f7f980beab26 input=5b6e7ae4fa6c43d6]*/
1780 {
1781 int a_too_large, c_bit_length;
1782 size_t c, d;
1783 uint64_t m;
1784 uint32_t u;
1785 PyObject *a = NULL, *b;
1786
1787 n = _PyNumber_Index(n);
1788 if (n == NULL) {
1789 return NULL;
1790 }
1791
1792 if (_PyLong_Sign(n) < 0) {
1793 PyErr_SetString(
1794 PyExc_ValueError,
1795 "isqrt() argument must be nonnegative");
1796 goto error;
1797 }
1798 if (_PyLong_Sign(n) == 0) {
1799 Py_DECREF(n);
1800 return PyLong_FromLong(0);
1801 }
1802
1803 /* c = (n.bit_length() - 1) // 2 */
1804 c = _PyLong_NumBits(n);
1805 if (c == (size_t)(-1)) {
1806 goto error;
1807 }
1808 c = (c - 1U) / 2U;
1809
1810 /* Fast path: if c <= 31 then n < 2**64 and we can compute directly with a
1811 fast, almost branch-free algorithm. */
1812 if (c <= 31U) {
1813 int shift = 31 - (int)c;
1814 m = (uint64_t)PyLong_AsUnsignedLongLong(n);
1815 Py_DECREF(n);
1816 if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1817 return NULL;
1818 }
1819 u = _approximate_isqrt(m << 2*shift) >> shift;
1820 u -= (uint64_t)u * u > m;
1821 return PyLong_FromUnsignedLong(u);
1822 }
1823
1824 /* Slow path: n >= 2**64. We perform the first five iterations in C integer
1825 arithmetic, then switch to using Python long integers. */
1826
1827 /* From n >= 2**64 it follows that c.bit_length() >= 6. */
1828 c_bit_length = 6;
1829 while ((c >> c_bit_length) > 0U) {
1830 ++c_bit_length;
1831 }
1832
1833 /* Initialise d and a. */
1834 d = c >> (c_bit_length - 5);
1835 b = _PyLong_Rshift(n, 2U*c - 62U);
1836 if (b == NULL) {
1837 goto error;
1838 }
1839 m = (uint64_t)PyLong_AsUnsignedLongLong(b);
1840 Py_DECREF(b);
1841 if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1842 goto error;
1843 }
1844 u = _approximate_isqrt(m) >> (31U - d);
1845 a = PyLong_FromUnsignedLong(u);
1846 if (a == NULL) {
1847 goto error;
1848 }
1849
1850 for (int s = c_bit_length - 6; s >= 0; --s) {
1851 PyObject *q;
1852 size_t e = d;
1853
1854 d = c >> s;
1855
1856 /* q = (n >> 2*c - e - d + 1) // a */
1857 q = _PyLong_Rshift(n, 2U*c - d - e + 1U);
1858 if (q == NULL) {
1859 goto error;
1860 }
1861 Py_SETREF(q, PyNumber_FloorDivide(q, a));
1862 if (q == NULL) {
1863 goto error;
1864 }
1865
1866 /* a = (a << d - 1 - e) + q */
1867 Py_SETREF(a, _PyLong_Lshift(a, d - 1U - e));
1868 if (a == NULL) {
1869 Py_DECREF(q);
1870 goto error;
1871 }
1872 Py_SETREF(a, PyNumber_Add(a, q));
1873 Py_DECREF(q);
1874 if (a == NULL) {
1875 goto error;
1876 }
1877 }
1878
1879 /* The correct result is either a or a - 1. Figure out which, and
1880 decrement a if necessary. */
1881
1882 /* a_too_large = n < a * a */
1883 b = PyNumber_Multiply(a, a);
1884 if (b == NULL) {
1885 goto error;
1886 }
1887 a_too_large = PyObject_RichCompareBool(n, b, Py_LT);
1888 Py_DECREF(b);
1889 if (a_too_large == -1) {
1890 goto error;
1891 }
1892
1893 if (a_too_large) {
1894 Py_SETREF(a, PyNumber_Subtract(a, _PyLong_GetOne()));
1895 }
1896 Py_DECREF(n);
1897 return a;
1898
1899 error:
1900 Py_XDECREF(a);
1901 Py_DECREF(n);
1902 return NULL;
1903 }
1904
1905 /* Divide-and-conquer factorial algorithm
1906 *
1907 * Based on the formula and pseudo-code provided at:
1908 * http://www.luschny.de/math/factorial/binarysplitfact.html
1909 *
1910 * Faster algorithms exist, but they're more complicated and depend on
1911 * a fast prime factorization algorithm.
1912 *
1913 * Notes on the algorithm
1914 * ----------------------
1915 *
1916 * factorial(n) is written in the form 2**k * m, with m odd. k and m are
1917 * computed separately, and then combined using a left shift.
1918 *
1919 * The function factorial_odd_part computes the odd part m (i.e., the greatest
1920 * odd divisor) of factorial(n), using the formula:
1921 *
1922 * factorial_odd_part(n) =
1923 *
1924 * product_{i >= 0} product_{0 < j <= n / 2**i, j odd} j
1925 *
1926 * Example: factorial_odd_part(20) =
1927 *
1928 * (1) *
1929 * (1) *
1930 * (1 * 3 * 5) *
1931 * (1 * 3 * 5 * 7 * 9) *
1932 * (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1933 *
1934 * Here i goes from large to small: the first term corresponds to i=4 (any
1935 * larger i gives an empty product), and the last term corresponds to i=0.
1936 * Each term can be computed from the last by multiplying by the extra odd
1937 * numbers required: e.g., to get from the penultimate term to the last one,
1938 * we multiply by (11 * 13 * 15 * 17 * 19).
1939 *
1940 * To see a hint of why this formula works, here are the same numbers as above
1941 * but with the even parts (i.e., the appropriate powers of 2) included. For
1942 * each subterm in the product for i, we multiply that subterm by 2**i:
1943 *
1944 * factorial(20) =
1945 *
1946 * (16) *
1947 * (8) *
1948 * (4 * 12 * 20) *
1949 * (2 * 6 * 10 * 14 * 18) *
1950 * (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1951 *
1952 * The factorial_partial_product function computes the product of all odd j in
1953 * range(start, stop) for given start and stop. It's used to compute the
1954 * partial products like (11 * 13 * 15 * 17 * 19) in the example above. It
1955 * operates recursively, repeatedly splitting the range into two roughly equal
1956 * pieces until the subranges are small enough to be computed using only C
1957 * integer arithmetic.
1958 *
1959 * The two-valuation k (i.e., the exponent of the largest power of 2 dividing
1960 * the factorial) is computed independently in the main math_factorial
1961 * function. By standard results, its value is:
1962 *
1963 * two_valuation = n//2 + n//4 + n//8 + ....
1964 *
1965 * It can be shown (e.g., by complete induction on n) that two_valuation is
1966 * equal to n - count_set_bits(n), where count_set_bits(n) gives the number of
1967 * '1'-bits in the binary expansion of n.
1968 */
1969
1970 /* factorial_partial_product: Compute product(range(start, stop, 2)) using
1971 * divide and conquer. Assumes start and stop are odd and stop > start.
1972 * max_bits must be >= bit_length(stop - 2). */
1973
1974 static PyObject *
factorial_partial_product(unsigned long start,unsigned long stop,unsigned long max_bits)1975 factorial_partial_product(unsigned long start, unsigned long stop,
1976 unsigned long max_bits)
1977 {
1978 unsigned long midpoint, num_operands;
1979 PyObject *left = NULL, *right = NULL, *result = NULL;
1980
1981 /* If the return value will fit an unsigned long, then we can
1982 * multiply in a tight, fast loop where each multiply is O(1).
1983 * Compute an upper bound on the number of bits required to store
1984 * the answer.
1985 *
1986 * Storing some integer z requires floor(lg(z))+1 bits, which is
1987 * conveniently the value returned by bit_length(z). The
1988 * product x*y will require at most
1989 * bit_length(x) + bit_length(y) bits to store, based
1990 * on the idea that lg product = lg x + lg y.
1991 *
1992 * We know that stop - 2 is the largest number to be multiplied. From
1993 * there, we have: bit_length(answer) <= num_operands *
1994 * bit_length(stop - 2)
1995 */
1996
1997 num_operands = (stop - start) / 2;
1998 /* The "num_operands <= 8 * SIZEOF_LONG" check guards against the
1999 * unlikely case of an overflow in num_operands * max_bits. */
2000 if (num_operands <= 8 * SIZEOF_LONG &&
2001 num_operands * max_bits <= 8 * SIZEOF_LONG) {
2002 unsigned long j, total;
2003 for (total = start, j = start + 2; j < stop; j += 2)
2004 total *= j;
2005 return PyLong_FromUnsignedLong(total);
2006 }
2007
2008 /* find midpoint of range(start, stop), rounded up to next odd number. */
2009 midpoint = (start + num_operands) | 1;
2010 left = factorial_partial_product(start, midpoint,
2011 _Py_bit_length(midpoint - 2));
2012 if (left == NULL)
2013 goto error;
2014 right = factorial_partial_product(midpoint, stop, max_bits);
2015 if (right == NULL)
2016 goto error;
2017 result = PyNumber_Multiply(left, right);
2018
2019 error:
2020 Py_XDECREF(left);
2021 Py_XDECREF(right);
2022 return result;
2023 }
2024
2025 /* factorial_odd_part: compute the odd part of factorial(n). */
2026
2027 static PyObject *
factorial_odd_part(unsigned long n)2028 factorial_odd_part(unsigned long n)
2029 {
2030 long i;
2031 unsigned long v, lower, upper;
2032 PyObject *partial, *tmp, *inner, *outer;
2033
2034 inner = PyLong_FromLong(1);
2035 if (inner == NULL)
2036 return NULL;
2037 outer = inner;
2038 Py_INCREF(outer);
2039
2040 upper = 3;
2041 for (i = _Py_bit_length(n) - 2; i >= 0; i--) {
2042 v = n >> i;
2043 if (v <= 2)
2044 continue;
2045 lower = upper;
2046 /* (v + 1) | 1 = least odd integer strictly larger than n / 2**i */
2047 upper = (v + 1) | 1;
2048 /* Here inner is the product of all odd integers j in the range (0,
2049 n/2**(i+1)]. The factorial_partial_product call below gives the
2050 product of all odd integers j in the range (n/2**(i+1), n/2**i]. */
2051 partial = factorial_partial_product(lower, upper, _Py_bit_length(upper-2));
2052 /* inner *= partial */
2053 if (partial == NULL)
2054 goto error;
2055 tmp = PyNumber_Multiply(inner, partial);
2056 Py_DECREF(partial);
2057 if (tmp == NULL)
2058 goto error;
2059 Py_DECREF(inner);
2060 inner = tmp;
2061 /* Now inner is the product of all odd integers j in the range (0,
2062 n/2**i], giving the inner product in the formula above. */
2063
2064 /* outer *= inner; */
2065 tmp = PyNumber_Multiply(outer, inner);
2066 if (tmp == NULL)
2067 goto error;
2068 Py_DECREF(outer);
2069 outer = tmp;
2070 }
2071 Py_DECREF(inner);
2072 return outer;
2073
2074 error:
2075 Py_DECREF(outer);
2076 Py_DECREF(inner);
2077 return NULL;
2078 }
2079
2080
2081 /* Lookup table for small factorial values */
2082
2083 static const unsigned long SmallFactorials[] = {
2084 1, 1, 2, 6, 24, 120, 720, 5040, 40320,
2085 362880, 3628800, 39916800, 479001600,
2086 #if SIZEOF_LONG >= 8
2087 6227020800, 87178291200, 1307674368000,
2088 20922789888000, 355687428096000, 6402373705728000,
2089 121645100408832000, 2432902008176640000
2090 #endif
2091 };
2092
2093 /*[clinic input]
2094 math.factorial
2095
2096 n as arg: object
2097 /
2098
2099 Find n!.
2100
2101 Raise a ValueError if x is negative or non-integral.
2102 [clinic start generated code]*/
2103
2104 static PyObject *
math_factorial(PyObject * module,PyObject * arg)2105 math_factorial(PyObject *module, PyObject *arg)
2106 /*[clinic end generated code: output=6686f26fae00e9ca input=713fb771677e8c31]*/
2107 {
2108 long x, two_valuation;
2109 int overflow;
2110 PyObject *result, *odd_part;
2111
2112 x = PyLong_AsLongAndOverflow(arg, &overflow);
2113 if (x == -1 && PyErr_Occurred()) {
2114 return NULL;
2115 }
2116 else if (overflow == 1) {
2117 PyErr_Format(PyExc_OverflowError,
2118 "factorial() argument should not exceed %ld",
2119 LONG_MAX);
2120 return NULL;
2121 }
2122 else if (overflow == -1 || x < 0) {
2123 PyErr_SetString(PyExc_ValueError,
2124 "factorial() not defined for negative values");
2125 return NULL;
2126 }
2127
2128 /* use lookup table if x is small */
2129 if (x < (long)Py_ARRAY_LENGTH(SmallFactorials))
2130 return PyLong_FromUnsignedLong(SmallFactorials[x]);
2131
2132 /* else express in the form odd_part * 2**two_valuation, and compute as
2133 odd_part << two_valuation. */
2134 odd_part = factorial_odd_part(x);
2135 if (odd_part == NULL)
2136 return NULL;
2137 two_valuation = x - count_set_bits(x);
2138 result = _PyLong_Lshift(odd_part, two_valuation);
2139 Py_DECREF(odd_part);
2140 return result;
2141 }
2142
2143
2144 /*[clinic input]
2145 math.trunc
2146
2147 x: object
2148 /
2149
2150 Truncates the Real x to the nearest Integral toward 0.
2151
2152 Uses the __trunc__ magic method.
2153 [clinic start generated code]*/
2154
2155 static PyObject *
math_trunc(PyObject * module,PyObject * x)2156 math_trunc(PyObject *module, PyObject *x)
2157 /*[clinic end generated code: output=34b9697b707e1031 input=2168b34e0a09134d]*/
2158 {
2159 _Py_IDENTIFIER(__trunc__);
2160 PyObject *trunc, *result;
2161
2162 if (PyFloat_CheckExact(x)) {
2163 return PyFloat_Type.tp_as_number->nb_int(x);
2164 }
2165
2166 if (Py_TYPE(x)->tp_dict == NULL) {
2167 if (PyType_Ready(Py_TYPE(x)) < 0)
2168 return NULL;
2169 }
2170
2171 trunc = _PyObject_LookupSpecialId(x, &PyId___trunc__);
2172 if (trunc == NULL) {
2173 if (!PyErr_Occurred())
2174 PyErr_Format(PyExc_TypeError,
2175 "type %.100s doesn't define __trunc__ method",
2176 Py_TYPE(x)->tp_name);
2177 return NULL;
2178 }
2179 result = _PyObject_CallNoArgs(trunc);
2180 Py_DECREF(trunc);
2181 return result;
2182 }
2183
2184
2185 /*[clinic input]
2186 math.frexp
2187
2188 x: double
2189 /
2190
2191 Return the mantissa and exponent of x, as pair (m, e).
2192
2193 m is a float and e is an int, such that x = m * 2.**e.
2194 If x is 0, m and e are both 0. Else 0.5 <= abs(m) < 1.0.
2195 [clinic start generated code]*/
2196
2197 static PyObject *
math_frexp_impl(PyObject * module,double x)2198 math_frexp_impl(PyObject *module, double x)
2199 /*[clinic end generated code: output=03e30d252a15ad4a input=96251c9e208bc6e9]*/
2200 {
2201 int i;
2202 /* deal with special cases directly, to sidestep platform
2203 differences */
2204 if (Py_IS_NAN(x) || Py_IS_INFINITY(x) || !x) {
2205 i = 0;
2206 }
2207 else {
2208 x = frexp(x, &i);
2209 }
2210 return Py_BuildValue("(di)", x, i);
2211 }
2212
2213
2214 /*[clinic input]
2215 math.ldexp
2216
2217 x: double
2218 i: object
2219 /
2220
2221 Return x * (2**i).
2222
2223 This is essentially the inverse of frexp().
2224 [clinic start generated code]*/
2225
2226 static PyObject *
math_ldexp_impl(PyObject * module,double x,PyObject * i)2227 math_ldexp_impl(PyObject *module, double x, PyObject *i)
2228 /*[clinic end generated code: output=b6892f3c2df9cc6a input=17d5970c1a40a8c1]*/
2229 {
2230 double r;
2231 long exp;
2232 int overflow;
2233
2234 if (PyLong_Check(i)) {
2235 /* on overflow, replace exponent with either LONG_MAX
2236 or LONG_MIN, depending on the sign. */
2237 exp = PyLong_AsLongAndOverflow(i, &overflow);
2238 if (exp == -1 && PyErr_Occurred())
2239 return NULL;
2240 if (overflow)
2241 exp = overflow < 0 ? LONG_MIN : LONG_MAX;
2242 }
2243 else {
2244 PyErr_SetString(PyExc_TypeError,
2245 "Expected an int as second argument to ldexp.");
2246 return NULL;
2247 }
2248
2249 if (x == 0. || !Py_IS_FINITE(x)) {
2250 /* NaNs, zeros and infinities are returned unchanged */
2251 r = x;
2252 errno = 0;
2253 } else if (exp > INT_MAX) {
2254 /* overflow */
2255 r = copysign(Py_HUGE_VAL, x);
2256 errno = ERANGE;
2257 } else if (exp < INT_MIN) {
2258 /* underflow to +-0 */
2259 r = copysign(0., x);
2260 errno = 0;
2261 } else {
2262 errno = 0;
2263 r = ldexp(x, (int)exp);
2264 if (Py_IS_INFINITY(r))
2265 errno = ERANGE;
2266 }
2267
2268 if (errno && is_error(r))
2269 return NULL;
2270 return PyFloat_FromDouble(r);
2271 }
2272
2273
2274 /*[clinic input]
2275 math.modf
2276
2277 x: double
2278 /
2279
2280 Return the fractional and integer parts of x.
2281
2282 Both results carry the sign of x and are floats.
2283 [clinic start generated code]*/
2284
2285 static PyObject *
math_modf_impl(PyObject * module,double x)2286 math_modf_impl(PyObject *module, double x)
2287 /*[clinic end generated code: output=90cee0260014c3c0 input=b4cfb6786afd9035]*/
2288 {
2289 double y;
2290 /* some platforms don't do the right thing for NaNs and
2291 infinities, so we take care of special cases directly. */
2292 if (!Py_IS_FINITE(x)) {
2293 if (Py_IS_INFINITY(x))
2294 return Py_BuildValue("(dd)", copysign(0., x), x);
2295 else if (Py_IS_NAN(x))
2296 return Py_BuildValue("(dd)", x, x);
2297 }
2298
2299 errno = 0;
2300 x = modf(x, &y);
2301 return Py_BuildValue("(dd)", x, y);
2302 }
2303
2304
2305 /* A decent logarithm is easy to compute even for huge ints, but libm can't
2306 do that by itself -- loghelper can. func is log or log10, and name is
2307 "log" or "log10". Note that overflow of the result isn't possible: an int
2308 can contain no more than INT_MAX * SHIFT bits, so has value certainly less
2309 than 2**(2**64 * 2**16) == 2**2**80, and log2 of that is 2**80, which is
2310 small enough to fit in an IEEE single. log and log10 are even smaller.
2311 However, intermediate overflow is possible for an int if the number of bits
2312 in that int is larger than PY_SSIZE_T_MAX. */
2313
2314 static PyObject*
loghelper(PyObject * arg,double (* func)(double),const char * funcname)2315 loghelper(PyObject* arg, double (*func)(double), const char *funcname)
2316 {
2317 /* If it is int, do it ourselves. */
2318 if (PyLong_Check(arg)) {
2319 double x, result;
2320 Py_ssize_t e;
2321
2322 /* Negative or zero inputs give a ValueError. */
2323 if (Py_SIZE(arg) <= 0) {
2324 PyErr_SetString(PyExc_ValueError,
2325 "math domain error");
2326 return NULL;
2327 }
2328
2329 x = PyLong_AsDouble(arg);
2330 if (x == -1.0 && PyErr_Occurred()) {
2331 if (!PyErr_ExceptionMatches(PyExc_OverflowError))
2332 return NULL;
2333 /* Here the conversion to double overflowed, but it's possible
2334 to compute the log anyway. Clear the exception and continue. */
2335 PyErr_Clear();
2336 x = _PyLong_Frexp((PyLongObject *)arg, &e);
2337 if (x == -1.0 && PyErr_Occurred())
2338 return NULL;
2339 /* Value is ~= x * 2**e, so the log ~= log(x) + log(2) * e. */
2340 result = func(x) + func(2.0) * e;
2341 }
2342 else
2343 /* Successfully converted x to a double. */
2344 result = func(x);
2345 return PyFloat_FromDouble(result);
2346 }
2347
2348 /* Else let libm handle it by itself. */
2349 return math_1(arg, func, 0);
2350 }
2351
2352
2353 /*[clinic input]
2354 math.log
2355
2356 x: object
2357 [
2358 base: object(c_default="NULL") = math.e
2359 ]
2360 /
2361
2362 Return the logarithm of x to the given base.
2363
2364 If the base not specified, returns the natural logarithm (base e) of x.
2365 [clinic start generated code]*/
2366
2367 static PyObject *
math_log_impl(PyObject * module,PyObject * x,int group_right_1,PyObject * base)2368 math_log_impl(PyObject *module, PyObject *x, int group_right_1,
2369 PyObject *base)
2370 /*[clinic end generated code: output=7b5a39e526b73fc9 input=0f62d5726cbfebbd]*/
2371 {
2372 PyObject *num, *den;
2373 PyObject *ans;
2374
2375 num = loghelper(x, m_log, "log");
2376 if (num == NULL || base == NULL)
2377 return num;
2378
2379 den = loghelper(base, m_log, "log");
2380 if (den == NULL) {
2381 Py_DECREF(num);
2382 return NULL;
2383 }
2384
2385 ans = PyNumber_TrueDivide(num, den);
2386 Py_DECREF(num);
2387 Py_DECREF(den);
2388 return ans;
2389 }
2390
2391
2392 /*[clinic input]
2393 math.log2
2394
2395 x: object
2396 /
2397
2398 Return the base 2 logarithm of x.
2399 [clinic start generated code]*/
2400
2401 static PyObject *
math_log2(PyObject * module,PyObject * x)2402 math_log2(PyObject *module, PyObject *x)
2403 /*[clinic end generated code: output=5425899a4d5d6acb input=08321262bae4f39b]*/
2404 {
2405 return loghelper(x, m_log2, "log2");
2406 }
2407
2408
2409 /*[clinic input]
2410 math.log10
2411
2412 x: object
2413 /
2414
2415 Return the base 10 logarithm of x.
2416 [clinic start generated code]*/
2417
2418 static PyObject *
math_log10(PyObject * module,PyObject * x)2419 math_log10(PyObject *module, PyObject *x)
2420 /*[clinic end generated code: output=be72a64617df9c6f input=b2469d02c6469e53]*/
2421 {
2422 return loghelper(x, m_log10, "log10");
2423 }
2424
2425
2426 /*[clinic input]
2427 math.fmod
2428
2429 x: double
2430 y: double
2431 /
2432
2433 Return fmod(x, y), according to platform C.
2434
2435 x % y may differ.
2436 [clinic start generated code]*/
2437
2438 static PyObject *
math_fmod_impl(PyObject * module,double x,double y)2439 math_fmod_impl(PyObject *module, double x, double y)
2440 /*[clinic end generated code: output=7559d794343a27b5 input=4f84caa8cfc26a03]*/
2441 {
2442 double r;
2443 /* fmod(x, +/-Inf) returns x for finite x. */
2444 if (Py_IS_INFINITY(y) && Py_IS_FINITE(x))
2445 return PyFloat_FromDouble(x);
2446 errno = 0;
2447 r = fmod(x, y);
2448 if (Py_IS_NAN(r)) {
2449 if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
2450 errno = EDOM;
2451 else
2452 errno = 0;
2453 }
2454 if (errno && is_error(r))
2455 return NULL;
2456 else
2457 return PyFloat_FromDouble(r);
2458 }
2459
2460 /*
2461 Given a *vec* of values, compute the vector norm:
2462
2463 sqrt(sum(x ** 2 for x in vec))
2464
2465 The *max* variable should be equal to the largest fabs(x).
2466 The *n* variable is the length of *vec*.
2467 If n==0, then *max* should be 0.0.
2468 If an infinity is present in the vec, *max* should be INF.
2469 The *found_nan* variable indicates whether some member of
2470 the *vec* is a NaN.
2471
2472 To avoid overflow/underflow and to achieve high accuracy giving results
2473 that are almost always correctly rounded, four techniques are used:
2474
2475 * lossless scaling using a power-of-two scaling factor
2476 * accurate squaring using Veltkamp-Dekker splitting [1]
2477 * compensated summation using a variant of the Neumaier algorithm [2]
2478 * differential correction of the square root [3]
2479
2480 The usual presentation of the Neumaier summation algorithm has an
2481 expensive branch depending on which operand has the larger
2482 magnitude. We avoid this cost by arranging the calculation so that
2483 fabs(csum) is always as large as fabs(x).
2484
2485 To establish the invariant, *csum* is initialized to 1.0 which is
2486 always larger than x**2 after scaling or after division by *max*.
2487 After the loop is finished, the initial 1.0 is subtracted out for a
2488 net zero effect on the final sum. Since *csum* will be greater than
2489 1.0, the subtraction of 1.0 will not cause fractional digits to be
2490 dropped from *csum*.
2491
2492 To get the full benefit from compensated summation, the largest
2493 addend should be in the range: 0.5 <= |x| <= 1.0. Accordingly,
2494 scaling or division by *max* should not be skipped even if not
2495 otherwise needed to prevent overflow or loss of precision.
2496
2497 The assertion that hi*hi <= 1.0 is a bit subtle. Each vector element
2498 gets scaled to a magnitude below 1.0. The Veltkamp-Dekker splitting
2499 algorithm gives a *hi* value that is correctly rounded to half
2500 precision. When a value at or below 1.0 is correctly rounded, it
2501 never goes above 1.0. And when values at or below 1.0 are squared,
2502 they remain at or below 1.0, thus preserving the summation invariant.
2503
2504 Another interesting assertion is that csum+lo*lo == csum. In the loop,
2505 each scaled vector element has a magnitude less than 1.0. After the
2506 Veltkamp split, *lo* has a maximum value of 2**-27. So the maximum
2507 value of *lo* squared is 2**-54. The value of ulp(1.0)/2.0 is 2**-53.
2508 Given that csum >= 1.0, we have:
2509 lo**2 <= 2**-54 < 2**-53 == 1/2*ulp(1.0) <= ulp(csum)/2
2510 Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum.
2511
2512 To minimize loss of information during the accumulation of fractional
2513 values, each term has a separate accumulator. This also breaks up
2514 sequential dependencies in the inner loop so the CPU can maximize
2515 floating point throughput. [4] On a 2.6 GHz Haswell, adding one
2516 dimension has an incremental cost of only 5ns -- for example when
2517 moving from hypot(x,y) to hypot(x,y,z).
2518
2519 The square root differential correction is needed because a
2520 correctly rounded square root of a correctly rounded sum of
2521 squares can still be off by as much as one ulp.
2522
2523 The differential correction starts with a value *x* that is
2524 the difference between the square of *h*, the possibly inaccurately
2525 rounded square root, and the accurately computed sum of squares.
2526 The correction is the first order term of the Maclaurin series
2527 expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [5]
2528
2529 Essentially, this differential correction is equivalent to one
2530 refinement step in Newton's divide-and-average square root
2531 algorithm, effectively doubling the number of accurate bits.
2532 This technique is used in Dekker's SQRT2 algorithm and again in
2533 Borges' ALGORITHM 4 and 5.
2534
2535 Without proof for all cases, hypot() cannot claim to be always
2536 correctly rounded. However for n <= 1000, prior to the final addition
2537 that rounds the overall result, the internal accuracy of "h" together
2538 with its correction of "x / (2.0 * h)" is at least 100 bits. [6]
2539 Also, hypot() was tested against a Decimal implementation with
2540 prec=300. After 100 million trials, no incorrectly rounded examples
2541 were found. In addition, perfect commutativity (all permutations are
2542 exactly equal) was verified for 1 billion random inputs with n=5. [7]
2543
2544 References:
2545
2546 1. Veltkamp-Dekker splitting: http://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
2547 2. Compensated summation: http://www.ti3.tu-harburg.de/paper/rump/Ru08b.pdf
2548 3. Square root differential correction: https://arxiv.org/pdf/1904.09481.pdf
2549 4. Data dependency graph: https://bugs.python.org/file49439/hypot.png
2550 5. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
2551 6. Analysis of internal accuracy: https://bugs.python.org/file49484/best_frac.py
2552 7. Commutativity test: https://bugs.python.org/file49448/test_hypot_commutativity.py
2553
2554 */
2555
2556 static inline double
vector_norm(Py_ssize_t n,double * vec,double max,int found_nan)2557 vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
2558 {
2559 const double T27 = 134217729.0; /* ldexp(1.0, 27) + 1.0) */
2560 double x, scale, oldcsum, csum = 1.0, frac1 = 0.0, frac2 = 0.0, frac3 = 0.0;
2561 double t, hi, lo, h;
2562 int max_e;
2563 Py_ssize_t i;
2564
2565 if (Py_IS_INFINITY(max)) {
2566 return max;
2567 }
2568 if (found_nan) {
2569 return Py_NAN;
2570 }
2571 if (max == 0.0 || n <= 1) {
2572 return max;
2573 }
2574 frexp(max, &max_e);
2575 if (max_e >= -1023) {
2576 scale = ldexp(1.0, -max_e);
2577 assert(max * scale >= 0.5);
2578 assert(max * scale < 1.0);
2579 for (i=0 ; i < n ; i++) {
2580 x = vec[i];
2581 assert(Py_IS_FINITE(x) && fabs(x) <= max);
2582
2583 x *= scale;
2584 assert(fabs(x) < 1.0);
2585
2586 t = x * T27;
2587 hi = t - (t - x);
2588 lo = x - hi;
2589 assert(hi + lo == x);
2590
2591 x = hi * hi;
2592 assert(x <= 1.0);
2593 assert(fabs(csum) >= fabs(x));
2594 oldcsum = csum;
2595 csum += x;
2596 frac1 += (oldcsum - csum) + x;
2597
2598 x = 2.0 * hi * lo;
2599 assert(fabs(csum) >= fabs(x));
2600 oldcsum = csum;
2601 csum += x;
2602 frac2 += (oldcsum - csum) + x;
2603
2604 assert(csum + lo * lo == csum);
2605 frac3 += lo * lo;
2606 }
2607 h = sqrt(csum - 1.0 + (frac1 + frac2 + frac3));
2608
2609 x = h;
2610 t = x * T27;
2611 hi = t - (t - x);
2612 lo = x - hi;
2613 assert (hi + lo == x);
2614
2615 x = -hi * hi;
2616 assert(fabs(csum) >= fabs(x));
2617 oldcsum = csum;
2618 csum += x;
2619 frac1 += (oldcsum - csum) + x;
2620
2621 x = -2.0 * hi * lo;
2622 assert(fabs(csum) >= fabs(x));
2623 oldcsum = csum;
2624 csum += x;
2625 frac2 += (oldcsum - csum) + x;
2626
2627 x = -lo * lo;
2628 assert(fabs(csum) >= fabs(x));
2629 oldcsum = csum;
2630 csum += x;
2631 frac3 += (oldcsum - csum) + x;
2632
2633 x = csum - 1.0 + (frac1 + frac2 + frac3);
2634 return (h + x / (2.0 * h)) / scale;
2635 }
2636 /* When max_e < -1023, ldexp(1.0, -max_e) overflows.
2637 So instead of multiplying by a scale, we just divide by *max*.
2638 */
2639 for (i=0 ; i < n ; i++) {
2640 x = vec[i];
2641 assert(Py_IS_FINITE(x) && fabs(x) <= max);
2642 x /= max;
2643 x = x*x;
2644 assert(x <= 1.0);
2645 assert(fabs(csum) >= fabs(x));
2646 oldcsum = csum;
2647 csum += x;
2648 frac1 += (oldcsum - csum) + x;
2649 }
2650 return max * sqrt(csum - 1.0 + frac1);
2651 }
2652
2653 #define NUM_STACK_ELEMS 16
2654
2655 /*[clinic input]
2656 math.dist
2657
2658 p: object
2659 q: object
2660 /
2661
2662 Return the Euclidean distance between two points p and q.
2663
2664 The points should be specified as sequences (or iterables) of
2665 coordinates. Both inputs must have the same dimension.
2666
2667 Roughly equivalent to:
2668 sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q)))
2669 [clinic start generated code]*/
2670
2671 static PyObject *
math_dist_impl(PyObject * module,PyObject * p,PyObject * q)2672 math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
2673 /*[clinic end generated code: output=56bd9538d06bbcfe input=74e85e1b6092e68e]*/
2674 {
2675 PyObject *item;
2676 double max = 0.0;
2677 double x, px, qx, result;
2678 Py_ssize_t i, m, n;
2679 int found_nan = 0, p_allocated = 0, q_allocated = 0;
2680 double diffs_on_stack[NUM_STACK_ELEMS];
2681 double *diffs = diffs_on_stack;
2682
2683 if (!PyTuple_Check(p)) {
2684 p = PySequence_Tuple(p);
2685 if (p == NULL) {
2686 return NULL;
2687 }
2688 p_allocated = 1;
2689 }
2690 if (!PyTuple_Check(q)) {
2691 q = PySequence_Tuple(q);
2692 if (q == NULL) {
2693 if (p_allocated) {
2694 Py_DECREF(p);
2695 }
2696 return NULL;
2697 }
2698 q_allocated = 1;
2699 }
2700
2701 m = PyTuple_GET_SIZE(p);
2702 n = PyTuple_GET_SIZE(q);
2703 if (m != n) {
2704 PyErr_SetString(PyExc_ValueError,
2705 "both points must have the same number of dimensions");
2706 goto error_exit;
2707 }
2708 if (n > NUM_STACK_ELEMS) {
2709 diffs = (double *) PyObject_Malloc(n * sizeof(double));
2710 if (diffs == NULL) {
2711 PyErr_NoMemory();
2712 goto error_exit;
2713 }
2714 }
2715 for (i=0 ; i<n ; i++) {
2716 item = PyTuple_GET_ITEM(p, i);
2717 ASSIGN_DOUBLE(px, item, error_exit);
2718 item = PyTuple_GET_ITEM(q, i);
2719 ASSIGN_DOUBLE(qx, item, error_exit);
2720 x = fabs(px - qx);
2721 diffs[i] = x;
2722 found_nan |= Py_IS_NAN(x);
2723 if (x > max) {
2724 max = x;
2725 }
2726 }
2727 result = vector_norm(n, diffs, max, found_nan);
2728 if (diffs != diffs_on_stack) {
2729 PyObject_Free(diffs);
2730 }
2731 if (p_allocated) {
2732 Py_DECREF(p);
2733 }
2734 if (q_allocated) {
2735 Py_DECREF(q);
2736 }
2737 return PyFloat_FromDouble(result);
2738
2739 error_exit:
2740 if (diffs != diffs_on_stack) {
2741 PyObject_Free(diffs);
2742 }
2743 if (p_allocated) {
2744 Py_DECREF(p);
2745 }
2746 if (q_allocated) {
2747 Py_DECREF(q);
2748 }
2749 return NULL;
2750 }
2751
2752 /* AC: cannot convert yet, waiting for *args support */
2753 static PyObject *
math_hypot(PyObject * self,PyObject * const * args,Py_ssize_t nargs)2754 math_hypot(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
2755 {
2756 Py_ssize_t i;
2757 PyObject *item;
2758 double max = 0.0;
2759 double x, result;
2760 int found_nan = 0;
2761 double coord_on_stack[NUM_STACK_ELEMS];
2762 double *coordinates = coord_on_stack;
2763
2764 if (nargs > NUM_STACK_ELEMS) {
2765 coordinates = (double *) PyObject_Malloc(nargs * sizeof(double));
2766 if (coordinates == NULL) {
2767 return PyErr_NoMemory();
2768 }
2769 }
2770 for (i = 0; i < nargs; i++) {
2771 item = args[i];
2772 ASSIGN_DOUBLE(x, item, error_exit);
2773 x = fabs(x);
2774 coordinates[i] = x;
2775 found_nan |= Py_IS_NAN(x);
2776 if (x > max) {
2777 max = x;
2778 }
2779 }
2780 result = vector_norm(nargs, coordinates, max, found_nan);
2781 if (coordinates != coord_on_stack) {
2782 PyObject_Free(coordinates);
2783 }
2784 return PyFloat_FromDouble(result);
2785
2786 error_exit:
2787 if (coordinates != coord_on_stack) {
2788 PyObject_Free(coordinates);
2789 }
2790 return NULL;
2791 }
2792
2793 #undef NUM_STACK_ELEMS
2794
2795 PyDoc_STRVAR(math_hypot_doc,
2796 "hypot(*coordinates) -> value\n\n\
2797 Multidimensional Euclidean distance from the origin to a point.\n\
2798 \n\
2799 Roughly equivalent to:\n\
2800 sqrt(sum(x**2 for x in coordinates))\n\
2801 \n\
2802 For a two dimensional point (x, y), gives the hypotenuse\n\
2803 using the Pythagorean theorem: sqrt(x*x + y*y).\n\
2804 \n\
2805 For example, the hypotenuse of a 3/4/5 right triangle is:\n\
2806 \n\
2807 >>> hypot(3.0, 4.0)\n\
2808 5.0\n\
2809 ");
2810
2811 /* pow can't use math_2, but needs its own wrapper: the problem is
2812 that an infinite result can arise either as a result of overflow
2813 (in which case OverflowError should be raised) or as a result of
2814 e.g. 0.**-5. (for which ValueError needs to be raised.)
2815 */
2816
2817 /*[clinic input]
2818 math.pow
2819
2820 x: double
2821 y: double
2822 /
2823
2824 Return x**y (x to the power of y).
2825 [clinic start generated code]*/
2826
2827 static PyObject *
math_pow_impl(PyObject * module,double x,double y)2828 math_pow_impl(PyObject *module, double x, double y)
2829 /*[clinic end generated code: output=fff93e65abccd6b0 input=c26f1f6075088bfd]*/
2830 {
2831 double r;
2832 int odd_y;
2833
2834 /* deal directly with IEEE specials, to cope with problems on various
2835 platforms whose semantics don't exactly match C99 */
2836 r = 0.; /* silence compiler warning */
2837 if (!Py_IS_FINITE(x) || !Py_IS_FINITE(y)) {
2838 errno = 0;
2839 if (Py_IS_NAN(x))
2840 r = y == 0. ? 1. : x; /* NaN**0 = 1 */
2841 else if (Py_IS_NAN(y))
2842 r = x == 1. ? 1. : y; /* 1**NaN = 1 */
2843 else if (Py_IS_INFINITY(x)) {
2844 odd_y = Py_IS_FINITE(y) && fmod(fabs(y), 2.0) == 1.0;
2845 if (y > 0.)
2846 r = odd_y ? x : fabs(x);
2847 else if (y == 0.)
2848 r = 1.;
2849 else /* y < 0. */
2850 r = odd_y ? copysign(0., x) : 0.;
2851 }
2852 else if (Py_IS_INFINITY(y)) {
2853 if (fabs(x) == 1.0)
2854 r = 1.;
2855 else if (y > 0. && fabs(x) > 1.0)
2856 r = y;
2857 else if (y < 0. && fabs(x) < 1.0) {
2858 r = -y; /* result is +inf */
2859 }
2860 else
2861 r = 0.;
2862 }
2863 }
2864 else {
2865 /* let libm handle finite**finite */
2866 errno = 0;
2867 r = pow(x, y);
2868 /* a NaN result should arise only from (-ve)**(finite
2869 non-integer); in this case we want to raise ValueError. */
2870 if (!Py_IS_FINITE(r)) {
2871 if (Py_IS_NAN(r)) {
2872 errno = EDOM;
2873 }
2874 /*
2875 an infinite result here arises either from:
2876 (A) (+/-0.)**negative (-> divide-by-zero)
2877 (B) overflow of x**y with x and y finite
2878 */
2879 else if (Py_IS_INFINITY(r)) {
2880 if (x == 0.)
2881 errno = EDOM;
2882 else
2883 errno = ERANGE;
2884 }
2885 }
2886 }
2887
2888 if (errno && is_error(r))
2889 return NULL;
2890 else
2891 return PyFloat_FromDouble(r);
2892 }
2893
2894
2895 static const double degToRad = Py_MATH_PI / 180.0;
2896 static const double radToDeg = 180.0 / Py_MATH_PI;
2897
2898 /*[clinic input]
2899 math.degrees
2900
2901 x: double
2902 /
2903
2904 Convert angle x from radians to degrees.
2905 [clinic start generated code]*/
2906
2907 static PyObject *
math_degrees_impl(PyObject * module,double x)2908 math_degrees_impl(PyObject *module, double x)
2909 /*[clinic end generated code: output=7fea78b294acd12f input=81e016555d6e3660]*/
2910 {
2911 return PyFloat_FromDouble(x * radToDeg);
2912 }
2913
2914
2915 /*[clinic input]
2916 math.radians
2917
2918 x: double
2919 /
2920
2921 Convert angle x from degrees to radians.
2922 [clinic start generated code]*/
2923
2924 static PyObject *
math_radians_impl(PyObject * module,double x)2925 math_radians_impl(PyObject *module, double x)
2926 /*[clinic end generated code: output=34daa47caf9b1590 input=91626fc489fe3d63]*/
2927 {
2928 return PyFloat_FromDouble(x * degToRad);
2929 }
2930
2931
2932 /*[clinic input]
2933 math.isfinite
2934
2935 x: double
2936 /
2937
2938 Return True if x is neither an infinity nor a NaN, and False otherwise.
2939 [clinic start generated code]*/
2940
2941 static PyObject *
math_isfinite_impl(PyObject * module,double x)2942 math_isfinite_impl(PyObject *module, double x)
2943 /*[clinic end generated code: output=8ba1f396440c9901 input=46967d254812e54a]*/
2944 {
2945 return PyBool_FromLong((long)Py_IS_FINITE(x));
2946 }
2947
2948
2949 /*[clinic input]
2950 math.isnan
2951
2952 x: double
2953 /
2954
2955 Return True if x is a NaN (not a number), and False otherwise.
2956 [clinic start generated code]*/
2957
2958 static PyObject *
math_isnan_impl(PyObject * module,double x)2959 math_isnan_impl(PyObject *module, double x)
2960 /*[clinic end generated code: output=f537b4d6df878c3e input=935891e66083f46a]*/
2961 {
2962 return PyBool_FromLong((long)Py_IS_NAN(x));
2963 }
2964
2965
2966 /*[clinic input]
2967 math.isinf
2968
2969 x: double
2970 /
2971
2972 Return True if x is a positive or negative infinity, and False otherwise.
2973 [clinic start generated code]*/
2974
2975 static PyObject *
math_isinf_impl(PyObject * module,double x)2976 math_isinf_impl(PyObject *module, double x)
2977 /*[clinic end generated code: output=9f00cbec4de7b06b input=32630e4212cf961f]*/
2978 {
2979 return PyBool_FromLong((long)Py_IS_INFINITY(x));
2980 }
2981
2982
2983 /*[clinic input]
2984 math.isclose -> bool
2985
2986 a: double
2987 b: double
2988 *
2989 rel_tol: double = 1e-09
2990 maximum difference for being considered "close", relative to the
2991 magnitude of the input values
2992 abs_tol: double = 0.0
2993 maximum difference for being considered "close", regardless of the
2994 magnitude of the input values
2995
2996 Determine whether two floating point numbers are close in value.
2997
2998 Return True if a is close in value to b, and False otherwise.
2999
3000 For the values to be considered close, the difference between them
3001 must be smaller than at least one of the tolerances.
3002
3003 -inf, inf and NaN behave similarly to the IEEE 754 Standard. That
3004 is, NaN is not close to anything, even itself. inf and -inf are
3005 only close to themselves.
3006 [clinic start generated code]*/
3007
3008 static int
math_isclose_impl(PyObject * module,double a,double b,double rel_tol,double abs_tol)3009 math_isclose_impl(PyObject *module, double a, double b, double rel_tol,
3010 double abs_tol)
3011 /*[clinic end generated code: output=b73070207511952d input=f28671871ea5bfba]*/
3012 {
3013 double diff = 0.0;
3014
3015 /* sanity check on the inputs */
3016 if (rel_tol < 0.0 || abs_tol < 0.0 ) {
3017 PyErr_SetString(PyExc_ValueError,
3018 "tolerances must be non-negative");
3019 return -1;
3020 }
3021
3022 if ( a == b ) {
3023 /* short circuit exact equality -- needed to catch two infinities of
3024 the same sign. And perhaps speeds things up a bit sometimes.
3025 */
3026 return 1;
3027 }
3028
3029 /* This catches the case of two infinities of opposite sign, or
3030 one infinity and one finite number. Two infinities of opposite
3031 sign would otherwise have an infinite relative tolerance.
3032 Two infinities of the same sign are caught by the equality check
3033 above.
3034 */
3035
3036 if (Py_IS_INFINITY(a) || Py_IS_INFINITY(b)) {
3037 return 0;
3038 }
3039
3040 /* now do the regular computation
3041 this is essentially the "weak" test from the Boost library
3042 */
3043
3044 diff = fabs(b - a);
3045
3046 return (((diff <= fabs(rel_tol * b)) ||
3047 (diff <= fabs(rel_tol * a))) ||
3048 (diff <= abs_tol));
3049 }
3050
3051 static inline int
_check_long_mult_overflow(long a,long b)3052 _check_long_mult_overflow(long a, long b) {
3053
3054 /* From Python2's int_mul code:
3055
3056 Integer overflow checking for * is painful: Python tried a couple ways, but
3057 they didn't work on all platforms, or failed in endcases (a product of
3058 -sys.maxint-1 has been a particular pain).
3059
3060 Here's another way:
3061
3062 The native long product x*y is either exactly right or *way* off, being
3063 just the last n bits of the true product, where n is the number of bits
3064 in a long (the delivered product is the true product plus i*2**n for
3065 some integer i).
3066
3067 The native double product (double)x * (double)y is subject to three
3068 rounding errors: on a sizeof(long)==8 box, each cast to double can lose
3069 info, and even on a sizeof(long)==4 box, the multiplication can lose info.
3070 But, unlike the native long product, it's not in *range* trouble: even
3071 if sizeof(long)==32 (256-bit longs), the product easily fits in the
3072 dynamic range of a double. So the leading 50 (or so) bits of the double
3073 product are correct.
3074
3075 We check these two ways against each other, and declare victory if they're
3076 approximately the same. Else, because the native long product is the only
3077 one that can lose catastrophic amounts of information, it's the native long
3078 product that must have overflowed.
3079
3080 */
3081
3082 long longprod = (long)((unsigned long)a * b);
3083 double doubleprod = (double)a * (double)b;
3084 double doubled_longprod = (double)longprod;
3085
3086 if (doubled_longprod == doubleprod) {
3087 return 0;
3088 }
3089
3090 const double diff = doubled_longprod - doubleprod;
3091 const double absdiff = diff >= 0.0 ? diff : -diff;
3092 const double absprod = doubleprod >= 0.0 ? doubleprod : -doubleprod;
3093
3094 if (32.0 * absdiff <= absprod) {
3095 return 0;
3096 }
3097
3098 return 1;
3099 }
3100
3101 /*[clinic input]
3102 math.prod
3103
3104 iterable: object
3105 /
3106 *
3107 start: object(c_default="NULL") = 1
3108
3109 Calculate the product of all the elements in the input iterable.
3110
3111 The default start value for the product is 1.
3112
3113 When the iterable is empty, return the start value. This function is
3114 intended specifically for use with numeric values and may reject
3115 non-numeric types.
3116 [clinic start generated code]*/
3117
3118 static PyObject *
math_prod_impl(PyObject * module,PyObject * iterable,PyObject * start)3119 math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
3120 /*[clinic end generated code: output=36153bedac74a198 input=4c5ab0682782ed54]*/
3121 {
3122 PyObject *result = start;
3123 PyObject *temp, *item, *iter;
3124
3125 iter = PyObject_GetIter(iterable);
3126 if (iter == NULL) {
3127 return NULL;
3128 }
3129
3130 if (result == NULL) {
3131 result = _PyLong_GetOne();
3132 }
3133 Py_INCREF(result);
3134 #ifndef SLOW_PROD
3135 /* Fast paths for integers keeping temporary products in C.
3136 * Assumes all inputs are the same type.
3137 * If the assumption fails, default to use PyObjects instead.
3138 */
3139 if (PyLong_CheckExact(result)) {
3140 int overflow;
3141 long i_result = PyLong_AsLongAndOverflow(result, &overflow);
3142 /* If this already overflowed, don't even enter the loop. */
3143 if (overflow == 0) {
3144 Py_DECREF(result);
3145 result = NULL;
3146 }
3147 /* Loop over all the items in the iterable until we finish, we overflow
3148 * or we found a non integer element */
3149 while (result == NULL) {
3150 item = PyIter_Next(iter);
3151 if (item == NULL) {
3152 Py_DECREF(iter);
3153 if (PyErr_Occurred()) {
3154 return NULL;
3155 }
3156 return PyLong_FromLong(i_result);
3157 }
3158 if (PyLong_CheckExact(item)) {
3159 long b = PyLong_AsLongAndOverflow(item, &overflow);
3160 if (overflow == 0 && !_check_long_mult_overflow(i_result, b)) {
3161 long x = i_result * b;
3162 i_result = x;
3163 Py_DECREF(item);
3164 continue;
3165 }
3166 }
3167 /* Either overflowed or is not an int.
3168 * Restore real objects and process normally */
3169 result = PyLong_FromLong(i_result);
3170 if (result == NULL) {
3171 Py_DECREF(item);
3172 Py_DECREF(iter);
3173 return NULL;
3174 }
3175 temp = PyNumber_Multiply(result, item);
3176 Py_DECREF(result);
3177 Py_DECREF(item);
3178 result = temp;
3179 if (result == NULL) {
3180 Py_DECREF(iter);
3181 return NULL;
3182 }
3183 }
3184 }
3185
3186 /* Fast paths for floats keeping temporary products in C.
3187 * Assumes all inputs are the same type.
3188 * If the assumption fails, default to use PyObjects instead.
3189 */
3190 if (PyFloat_CheckExact(result)) {
3191 double f_result = PyFloat_AS_DOUBLE(result);
3192 Py_DECREF(result);
3193 result = NULL;
3194 while(result == NULL) {
3195 item = PyIter_Next(iter);
3196 if (item == NULL) {
3197 Py_DECREF(iter);
3198 if (PyErr_Occurred()) {
3199 return NULL;
3200 }
3201 return PyFloat_FromDouble(f_result);
3202 }
3203 if (PyFloat_CheckExact(item)) {
3204 f_result *= PyFloat_AS_DOUBLE(item);
3205 Py_DECREF(item);
3206 continue;
3207 }
3208 if (PyLong_CheckExact(item)) {
3209 long value;
3210 int overflow;
3211 value = PyLong_AsLongAndOverflow(item, &overflow);
3212 if (!overflow) {
3213 f_result *= (double)value;
3214 Py_DECREF(item);
3215 continue;
3216 }
3217 }
3218 result = PyFloat_FromDouble(f_result);
3219 if (result == NULL) {
3220 Py_DECREF(item);
3221 Py_DECREF(iter);
3222 return NULL;
3223 }
3224 temp = PyNumber_Multiply(result, item);
3225 Py_DECREF(result);
3226 Py_DECREF(item);
3227 result = temp;
3228 if (result == NULL) {
3229 Py_DECREF(iter);
3230 return NULL;
3231 }
3232 }
3233 }
3234 #endif
3235 /* Consume rest of the iterable (if any) that could not be handled
3236 * by specialized functions above.*/
3237 for(;;) {
3238 item = PyIter_Next(iter);
3239 if (item == NULL) {
3240 /* error, or end-of-sequence */
3241 if (PyErr_Occurred()) {
3242 Py_DECREF(result);
3243 result = NULL;
3244 }
3245 break;
3246 }
3247 temp = PyNumber_Multiply(result, item);
3248 Py_DECREF(result);
3249 Py_DECREF(item);
3250 result = temp;
3251 if (result == NULL)
3252 break;
3253 }
3254 Py_DECREF(iter);
3255 return result;
3256 }
3257
3258
3259 /* least significant 64 bits of the odd part of factorial(n), for n in range(128).
3260
3261 Python code to generate the values:
3262
3263 import math
3264
3265 for n in range(128):
3266 fac = math.factorial(n)
3267 fac_odd_part = fac // (fac & -fac)
3268 reduced_fac_odd_part = fac_odd_part % (2**64)
3269 print(f"{reduced_fac_odd_part:#018x}u")
3270 */
3271 static const uint64_t reduced_factorial_odd_part[] = {
3272 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000003u,
3273 0x0000000000000003u, 0x000000000000000fu, 0x000000000000002du, 0x000000000000013bu,
3274 0x000000000000013bu, 0x0000000000000b13u, 0x000000000000375fu, 0x0000000000026115u,
3275 0x000000000007233fu, 0x00000000005cca33u, 0x0000000002898765u, 0x00000000260eeeebu,
3276 0x00000000260eeeebu, 0x0000000286fddd9bu, 0x00000016beecca73u, 0x000001b02b930689u,
3277 0x00000870d9df20adu, 0x0000b141df4dae31u, 0x00079dd498567c1bu, 0x00af2e19afc5266du,
3278 0x020d8a4d0f4f7347u, 0x335281867ec241efu, 0x9b3093d46fdd5923u, 0x5e1f9767cc5866b1u,
3279 0x92dd23d6966aced7u, 0xa30d0f4f0a196e5bu, 0x8dc3e5a1977d7755u, 0x2ab8ce915831734bu,
3280 0x2ab8ce915831734bu, 0x81d2a0bc5e5fdcabu, 0x9efcac82445da75bu, 0xbc8b95cf58cde171u,
3281 0xa0e8444a1f3cecf9u, 0x4191deb683ce3ffdu, 0xddd3878bc84ebfc7u, 0xcb39a64b83ff3751u,
3282 0xf8203f7993fc1495u, 0xbd2a2a78b35f4bddu, 0x84757be6b6d13921u, 0x3fbbcfc0b524988bu,
3283 0xbd11ed47c8928df9u, 0x3c26b59e41c2f4c5u, 0x677a5137e883fdb3u, 0xff74e943b03b93ddu,
3284 0xfe5ebbcb10b2bb97u, 0xb021f1de3235e7e7u, 0x33509eb2e743a58fu, 0x390f9da41279fb7du,
3285 0xe5cb0154f031c559u, 0x93074695ba4ddb6du, 0x81c471caa636247fu, 0xe1347289b5a1d749u,
3286 0x286f21c3f76ce2ffu, 0x00be84a2173e8ac7u, 0x1595065ca215b88bu, 0xf95877595b018809u,
3287 0x9c2efe3c5516f887u, 0x373294604679382bu, 0xaf1ff7a888adcd35u, 0x18ddf279a2c5800bu,
3288 0x18ddf279a2c5800bu, 0x505a90e2542582cbu, 0x5bacad2cd8d5dc2bu, 0xfe3152bcbff89f41u,
3289 0xe1467e88bf829351u, 0xb8001adb9e31b4d5u, 0x2803ac06a0cbb91fu, 0x1904b5d698805799u,
3290 0xe12a648b5c831461u, 0x3516abbd6160cfa9u, 0xac46d25f12fe036du, 0x78bfa1da906b00efu,
3291 0xf6390338b7f111bdu, 0x0f25f80f538255d9u, 0x4ec8ca55b8db140fu, 0x4ff670740b9b30a1u,
3292 0x8fd032443a07f325u, 0x80dfe7965c83eeb5u, 0xa3dc1714d1213afdu, 0x205b7bbfcdc62007u,
3293 0xa78126bbe140a093u, 0x9de1dc61ca7550cfu, 0x84f0046d01b492c5u, 0x2d91810b945de0f3u,
3294 0xf5408b7f6008aa71u, 0x43707f4863034149u, 0xdac65fb9679279d5u, 0xc48406e7d1114eb7u,
3295 0xa7dc9ed3c88e1271u, 0xfb25b2efdb9cb30du, 0x1bebda0951c4df63u, 0x5c85e975580ee5bdu,
3296 0x1591bc60082cb137u, 0x2c38606318ef25d7u, 0x76ca72f7c5c63e27u, 0xf04a75d17baa0915u,
3297 0x77458175139ae30du, 0x0e6c1330bc1b9421u, 0xdf87d2b5797e8293u, 0xefa5c703e1e68925u,
3298 0x2b6b1b3278b4f6e1u, 0xceee27b382394249u, 0xd74e3829f5dab91du, 0xfdb17989c26b5f1fu,
3299 0xc1b7d18781530845u, 0x7b4436b2105a8561u, 0x7ba7c0418372a7d7u, 0x9dbc5c67feb6c639u,
3300 0x502686d7f6ff6b8fu, 0x6101855406be7a1fu, 0x9956afb5806930e7u, 0xe1f0ee88af40f7c5u,
3301 0x984b057bda5c1151u, 0x9a49819acc13ea05u, 0x8ef0dead0896ef27u, 0x71f7826efe292b21u,
3302 0xad80a480e46986efu, 0x01cdc0ebf5e0c6f7u, 0x6e06f839968f68dbu, 0xdd5943ab56e76139u,
3303 0xcdcf31bf8604c5e7u, 0x7e2b4a847054a1cbu, 0x0ca75697a4d3d0f5u, 0x4703f53ac514a98bu,
3304 };
3305
3306 /* inverses of reduced_factorial_odd_part values modulo 2**64.
3307
3308 Python code to generate the values:
3309
3310 import math
3311
3312 for n in range(128):
3313 fac = math.factorial(n)
3314 fac_odd_part = fac // (fac & -fac)
3315 inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
3316 print(f"{inverted_fac_odd_part:#018x}u")
3317 */
3318 static const uint64_t inverted_factorial_odd_part[] = {
3319 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0xaaaaaaaaaaaaaaabu,
3320 0xaaaaaaaaaaaaaaabu, 0xeeeeeeeeeeeeeeefu, 0x4fa4fa4fa4fa4fa5u, 0x2ff2ff2ff2ff2ff3u,
3321 0x2ff2ff2ff2ff2ff3u, 0x938cc70553e3771bu, 0xb71c27cddd93e49fu, 0xb38e3229fcdee63du,
3322 0xe684bb63544a4cbfu, 0xc2f684917ca340fbu, 0xf747c9cba417526du, 0xbb26eb51d7bd49c3u,
3323 0xbb26eb51d7bd49c3u, 0xb0a7efb985294093u, 0xbe4b8c69f259eabbu, 0x6854d17ed6dc4fb9u,
3324 0xe1aa904c915f4325u, 0x3b8206df131cead1u, 0x79c6009fea76fe13u, 0xd8c5d381633cd365u,
3325 0x4841f12b21144677u, 0x4a91ff68200b0d0fu, 0x8f9513a58c4f9e8bu, 0x2b3e690621a42251u,
3326 0x4f520f00e03c04e7u, 0x2edf84ee600211d3u, 0xadcaa2764aaacdfdu, 0x161f4f9033f4fe63u,
3327 0x161f4f9033f4fe63u, 0xbada2932ea4d3e03u, 0xcec189f3efaa30d3u, 0xf7475bb68330bf91u,
3328 0x37eb7bf7d5b01549u, 0x46b35660a4e91555u, 0xa567c12d81f151f7u, 0x4c724007bb2071b1u,
3329 0x0f4a0cce58a016bdu, 0xfa21068e66106475u, 0x244ab72b5a318ae1u, 0x366ce67e080d0f23u,
3330 0xd666fdae5dd2a449u, 0xd740ddd0acc06a0du, 0xb050bbbb28e6f97bu, 0x70b003fe890a5c75u,
3331 0xd03aabff83037427u, 0x13ec4ca72c783bd7u, 0x90282c06afdbd96fu, 0x4414ddb9db4a95d5u,
3332 0xa2c68735ae6832e9u, 0xbf72d71455676665u, 0xa8469fab6b759b7fu, 0xc1e55b56e606caf9u,
3333 0x40455630fc4a1cffu, 0x0120a7b0046d16f7u, 0xa7c3553b08faef23u, 0x9f0bfd1b08d48639u,
3334 0xa433ffce9a304d37u, 0xa22ad1d53915c683u, 0xcb6cbc723ba5dd1du, 0x547fb1b8ab9d0ba3u,
3335 0x547fb1b8ab9d0ba3u, 0x8f15a826498852e3u, 0x32e1a03f38880283u, 0x3de4cce63283f0c1u,
3336 0x5dfe6667e4da95b1u, 0xfda6eeeef479e47du, 0xf14de991cc7882dfu, 0xe68db79247630ca9u,
3337 0xa7d6db8207ee8fa1u, 0x255e1f0fcf034499u, 0xc9a8990e43dd7e65u, 0x3279b6f289702e0fu,
3338 0xe7b5905d9b71b195u, 0x03025ba41ff0da69u, 0xb7df3d6d3be55aefu, 0xf89b212ebff2b361u,
3339 0xfe856d095996f0adu, 0xd6e533e9fdf20f9du, 0xf8c0e84a63da3255u, 0xa677876cd91b4db7u,
3340 0x07ed4f97780d7d9bu, 0x90a8705f258db62fu, 0xa41bbb2be31b1c0du, 0x6ec28690b038383bu,
3341 0xdb860c3bb2edd691u, 0x0838286838a980f9u, 0x558417a74b36f77du, 0x71779afc3646ef07u,
3342 0x743cda377ccb6e91u, 0x7fdf9f3fe89153c5u, 0xdc97d25df49b9a4bu, 0x76321a778eb37d95u,
3343 0x7cbb5e27da3bd487u, 0x9cff4ade1a009de7u, 0x70eb166d05c15197u, 0xdcf0460b71d5fe3du,
3344 0x5ac1ee5260b6a3c5u, 0xc922dedfdd78efe1u, 0xe5d381dc3b8eeb9bu, 0xd57e5347bafc6aadu,
3345 0x86939040983acd21u, 0x395b9d69740a4ff9u, 0x1467299c8e43d135u, 0x5fe440fcad975cdfu,
3346 0xcaa9a39794a6ca8du, 0xf61dbd640868dea1u, 0xac09d98d74843be7u, 0x2b103b9e1a6b4809u,
3347 0x2ab92d16960f536fu, 0x6653323d5e3681dfu, 0xefd48c1c0624e2d7u, 0xa496fefe04816f0du,
3348 0x1754a7b07bbdd7b1u, 0x23353c829a3852cdu, 0xbf831261abd59097u, 0x57a8e656df0618e1u,
3349 0x16e9206c3100680fu, 0xadad4c6ee921dac7u, 0x635f2b3860265353u, 0xdd6d0059f44b3d09u,
3350 0xac4dd6b894447dd7u, 0x42ea183eeaa87be3u, 0x15612d1550ee5b5du, 0x226fa19d656cb623u,
3351 };
3352
3353 /* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
3354
3355 Python code to generate the values:
3356
3357 import math
3358
3359 for n in range(128):
3360 fac = math.factorial(n)
3361 fac_trailing_zeros = (fac & -fac).bit_length() - 1
3362 print(fac_trailing_zeros)
3363 */
3364
3365 static const uint8_t factorial_trailing_zeros[] = {
3366 0, 0, 1, 1, 3, 3, 4, 4, 7, 7, 8, 8, 10, 10, 11, 11, // 0-15
3367 15, 15, 16, 16, 18, 18, 19, 19, 22, 22, 23, 23, 25, 25, 26, 26, // 16-31
3368 31, 31, 32, 32, 34, 34, 35, 35, 38, 38, 39, 39, 41, 41, 42, 42, // 32-47
3369 46, 46, 47, 47, 49, 49, 50, 50, 53, 53, 54, 54, 56, 56, 57, 57, // 48-63
3370 63, 63, 64, 64, 66, 66, 67, 67, 70, 70, 71, 71, 73, 73, 74, 74, // 64-79
3371 78, 78, 79, 79, 81, 81, 82, 82, 85, 85, 86, 86, 88, 88, 89, 89, // 80-95
3372 94, 94, 95, 95, 97, 97, 98, 98, 101, 101, 102, 102, 104, 104, 105, 105, // 96-111
3373 109, 109, 110, 110, 112, 112, 113, 113, 116, 116, 117, 117, 119, 119, 120, 120, // 112-127
3374 };
3375
3376 /* Number of permutations and combinations.
3377 * P(n, k) = n! / (n-k)!
3378 * C(n, k) = P(n, k) / k!
3379 */
3380
3381 /* Calculate C(n, k) for n in the 63-bit range. */
3382 static PyObject *
perm_comb_small(unsigned long long n,unsigned long long k,int iscomb)3383 perm_comb_small(unsigned long long n, unsigned long long k, int iscomb)
3384 {
3385 if (k == 0) {
3386 return PyLong_FromLong(1);
3387 }
3388
3389 /* For small enough n and k the result fits in the 64-bit range and can
3390 * be calculated without allocating intermediate PyLong objects. */
3391 if (iscomb) {
3392 /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)
3393 * fits into a uint64_t. Exclude k = 1, because the second fast
3394 * path is faster for this case.*/
3395 static const unsigned char fast_comb_limits1[] = {
3396 0, 0, 127, 127, 127, 127, 127, 127, // 0-7
3397 127, 127, 127, 127, 127, 127, 127, 127, // 8-15
3398 116, 105, 97, 91, 86, 82, 78, 76, // 16-23
3399 74, 72, 71, 70, 69, 68, 68, 67, // 24-31
3400 67, 67, 67, // 32-34
3401 };
3402 if (k < Py_ARRAY_LENGTH(fast_comb_limits1) && n <= fast_comb_limits1[k]) {
3403 /*
3404 comb(n, k) fits into a uint64_t. We compute it as
3405
3406 comb_odd_part << shift
3407
3408 where 2**shift is the largest power of two dividing comb(n, k)
3409 and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
3410 calculated efficiently via arithmetic modulo 2**64, using three
3411 lookups and two uint64_t multiplications.
3412 */
3413 uint64_t comb_odd_part = reduced_factorial_odd_part[n]
3414 * inverted_factorial_odd_part[k]
3415 * inverted_factorial_odd_part[n - k];
3416 int shift = factorial_trailing_zeros[n]
3417 - factorial_trailing_zeros[k]
3418 - factorial_trailing_zeros[n - k];
3419 return PyLong_FromUnsignedLongLong(comb_odd_part << shift);
3420 }
3421
3422 /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)*k
3423 * fits into a long long (which is at least 64 bit). Only contains
3424 * items larger than in fast_comb_limits1. */
3425 static const unsigned long long fast_comb_limits2[] = {
3426 0, ULLONG_MAX, 4294967296ULL, 3329022, 102570, 13467, 3612, 1449, // 0-7
3427 746, 453, 308, 227, 178, 147, // 8-13
3428 };
3429 if (k < Py_ARRAY_LENGTH(fast_comb_limits2) && n <= fast_comb_limits2[k]) {
3430 /* C(n, k) = C(n, k-1) * (n-k+1) / k */
3431 unsigned long long result = n;
3432 for (unsigned long long i = 1; i < k;) {
3433 result *= --n;
3434 result /= ++i;
3435 }
3436 return PyLong_FromUnsignedLongLong(result);
3437 }
3438 }
3439 else {
3440 /* Maps k to the maximal n so that k <= n and P(n, k)
3441 * fits into a long long (which is at least 64 bit). */
3442 static const unsigned long long fast_perm_limits[] = {
3443 0, ULLONG_MAX, 4294967296ULL, 2642246, 65537, 7133, 1627, 568, // 0-7
3444 259, 142, 88, 61, 45, 36, 30, 26, // 8-15
3445 24, 22, 21, 20, 20, // 16-20
3446 };
3447 if (k < Py_ARRAY_LENGTH(fast_perm_limits) && n <= fast_perm_limits[k]) {
3448 if (n <= 127) {
3449 /* P(n, k) fits into a uint64_t. */
3450 uint64_t perm_odd_part = reduced_factorial_odd_part[n]
3451 * inverted_factorial_odd_part[n - k];
3452 int shift = factorial_trailing_zeros[n]
3453 - factorial_trailing_zeros[n - k];
3454 return PyLong_FromUnsignedLongLong(perm_odd_part << shift);
3455 }
3456
3457 /* P(n, k) = P(n, k-1) * (n-k+1) */
3458 unsigned long long result = n;
3459 for (unsigned long long i = 1; i < k;) {
3460 result *= --n;
3461 ++i;
3462 }
3463 return PyLong_FromUnsignedLongLong(result);
3464 }
3465 }
3466
3467 /* For larger n use recursive formulas:
3468 *
3469 * P(n, k) = P(n, j) * P(n-j, k-j)
3470 * C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j)
3471 */
3472 unsigned long long j = k / 2;
3473 PyObject *a, *b;
3474 a = perm_comb_small(n, j, iscomb);
3475 if (a == NULL) {
3476 return NULL;
3477 }
3478 b = perm_comb_small(n - j, k - j, iscomb);
3479 if (b == NULL) {
3480 goto error;
3481 }
3482 Py_SETREF(a, PyNumber_Multiply(a, b));
3483 Py_DECREF(b);
3484 if (iscomb && a != NULL) {
3485 b = perm_comb_small(k, j, 1);
3486 if (b == NULL) {
3487 goto error;
3488 }
3489 Py_SETREF(a, PyNumber_FloorDivide(a, b));
3490 Py_DECREF(b);
3491 }
3492 return a;
3493
3494 error:
3495 Py_DECREF(a);
3496 return NULL;
3497 }
3498
3499 /* Calculate P(n, k) or C(n, k) using recursive formulas.
3500 * It is more efficient than sequential multiplication thanks to
3501 * Karatsuba multiplication.
3502 */
3503 static PyObject *
perm_comb(PyObject * n,unsigned long long k,int iscomb)3504 perm_comb(PyObject *n, unsigned long long k, int iscomb)
3505 {
3506 if (k == 0) {
3507 return PyLong_FromLong(1);
3508 }
3509 if (k == 1) {
3510 Py_INCREF(n);
3511 return n;
3512 }
3513
3514 /* P(n, k) = P(n, j) * P(n-j, k-j) */
3515 /* C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j) */
3516 unsigned long long j = k / 2;
3517 PyObject *a, *b;
3518 a = perm_comb(n, j, iscomb);
3519 if (a == NULL) {
3520 return NULL;
3521 }
3522 PyObject *t = PyLong_FromUnsignedLongLong(j);
3523 if (t == NULL) {
3524 goto error;
3525 }
3526 n = PyNumber_Subtract(n, t);
3527 Py_DECREF(t);
3528 if (n == NULL) {
3529 goto error;
3530 }
3531 b = perm_comb(n, k - j, iscomb);
3532 Py_DECREF(n);
3533 if (b == NULL) {
3534 goto error;
3535 }
3536 Py_SETREF(a, PyNumber_Multiply(a, b));
3537 Py_DECREF(b);
3538 if (iscomb && a != NULL) {
3539 b = perm_comb_small(k, j, 1);
3540 if (b == NULL) {
3541 goto error;
3542 }
3543 Py_SETREF(a, PyNumber_FloorDivide(a, b));
3544 Py_DECREF(b);
3545 }
3546 return a;
3547
3548 error:
3549 Py_DECREF(a);
3550 return NULL;
3551 }
3552
3553 /*[clinic input]
3554 math.perm
3555
3556 n: object
3557 k: object = None
3558 /
3559
3560 Number of ways to choose k items from n items without repetition and with order.
3561
3562 Evaluates to n! / (n - k)! when k <= n and evaluates
3563 to zero when k > n.
3564
3565 If k is not specified or is None, then k defaults to n
3566 and the function returns n!.
3567
3568 Raises TypeError if either of the arguments are not integers.
3569 Raises ValueError if either of the arguments are negative.
3570 [clinic start generated code]*/
3571
3572 static PyObject *
math_perm_impl(PyObject * module,PyObject * n,PyObject * k)3573 math_perm_impl(PyObject *module, PyObject *n, PyObject *k)
3574 /*[clinic end generated code: output=e021a25469653e23 input=5311c5a00f359b53]*/
3575 {
3576 PyObject *result = NULL;
3577 int overflow, cmp;
3578 long long ki, ni;
3579
3580 if (k == Py_None) {
3581 return math_factorial(module, n);
3582 }
3583 n = PyNumber_Index(n);
3584 if (n == NULL) {
3585 return NULL;
3586 }
3587 k = PyNumber_Index(k);
3588 if (k == NULL) {
3589 Py_DECREF(n);
3590 return NULL;
3591 }
3592 assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
3593
3594 if (Py_SIZE(n) < 0) {
3595 PyErr_SetString(PyExc_ValueError,
3596 "n must be a non-negative integer");
3597 goto error;
3598 }
3599 if (Py_SIZE(k) < 0) {
3600 PyErr_SetString(PyExc_ValueError,
3601 "k must be a non-negative integer");
3602 goto error;
3603 }
3604
3605 cmp = PyObject_RichCompareBool(n, k, Py_LT);
3606 if (cmp != 0) {
3607 if (cmp > 0) {
3608 result = PyLong_FromLong(0);
3609 goto done;
3610 }
3611 goto error;
3612 }
3613
3614 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3615 assert(overflow >= 0 && !PyErr_Occurred());
3616 if (overflow > 0) {
3617 PyErr_Format(PyExc_OverflowError,
3618 "k must not exceed %lld",
3619 LLONG_MAX);
3620 goto error;
3621 }
3622 assert(ki >= 0);
3623
3624 ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3625 assert(overflow >= 0 && !PyErr_Occurred());
3626 if (!overflow && ki > 1) {
3627 assert(ni >= 0);
3628 result = perm_comb_small((unsigned long long)ni,
3629 (unsigned long long)ki, 0);
3630 }
3631 else {
3632 result = perm_comb(n, (unsigned long long)ki, 0);
3633 }
3634
3635 done:
3636 Py_DECREF(n);
3637 Py_DECREF(k);
3638 return result;
3639
3640 error:
3641 Py_DECREF(n);
3642 Py_DECREF(k);
3643 return NULL;
3644 }
3645
3646 /*[clinic input]
3647 math.comb
3648
3649 n: object
3650 k: object
3651 /
3652
3653 Number of ways to choose k items from n items without repetition and without order.
3654
3655 Evaluates to n! / (k! * (n - k)!) when k <= n and evaluates
3656 to zero when k > n.
3657
3658 Also called the binomial coefficient because it is equivalent
3659 to the coefficient of k-th term in polynomial expansion of the
3660 expression (1 + x)**n.
3661
3662 Raises TypeError if either of the arguments are not integers.
3663 Raises ValueError if either of the arguments are negative.
3664
3665 [clinic start generated code]*/
3666
3667 static PyObject *
math_comb_impl(PyObject * module,PyObject * n,PyObject * k)3668 math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
3669 /*[clinic end generated code: output=bd2cec8d854f3493 input=9a05315af2518709]*/
3670 {
3671 PyObject *result = NULL, *temp;
3672 int overflow, cmp;
3673 long long ki, ni;
3674
3675 n = PyNumber_Index(n);
3676 if (n == NULL) {
3677 return NULL;
3678 }
3679 k = PyNumber_Index(k);
3680 if (k == NULL) {
3681 Py_DECREF(n);
3682 return NULL;
3683 }
3684 assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
3685
3686 if (Py_SIZE(n) < 0) {
3687 PyErr_SetString(PyExc_ValueError,
3688 "n must be a non-negative integer");
3689 goto error;
3690 }
3691 if (Py_SIZE(k) < 0) {
3692 PyErr_SetString(PyExc_ValueError,
3693 "k must be a non-negative integer");
3694 goto error;
3695 }
3696
3697 ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3698 assert(overflow >= 0 && !PyErr_Occurred());
3699 if (!overflow) {
3700 assert(ni >= 0);
3701 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3702 assert(overflow >= 0 && !PyErr_Occurred());
3703 if (overflow || ki > ni) {
3704 result = PyLong_FromLong(0);
3705 goto done;
3706 }
3707 assert(ki >= 0);
3708
3709 ki = Py_MIN(ki, ni - ki);
3710 if (ki > 1) {
3711 result = perm_comb_small((unsigned long long)ni,
3712 (unsigned long long)ki, 1);
3713 goto done;
3714 }
3715 /* For k == 1 just return the original n in perm_comb(). */
3716 }
3717 else {
3718 /* k = min(k, n - k) */
3719 temp = PyNumber_Subtract(n, k);
3720 if (temp == NULL) {
3721 goto error;
3722 }
3723 if (Py_SIZE(temp) < 0) {
3724 Py_DECREF(temp);
3725 result = PyLong_FromLong(0);
3726 goto done;
3727 }
3728 cmp = PyObject_RichCompareBool(temp, k, Py_LT);
3729 if (cmp > 0) {
3730 Py_SETREF(k, temp);
3731 }
3732 else {
3733 Py_DECREF(temp);
3734 if (cmp < 0) {
3735 goto error;
3736 }
3737 }
3738
3739 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3740 assert(overflow >= 0 && !PyErr_Occurred());
3741 if (overflow) {
3742 PyErr_Format(PyExc_OverflowError,
3743 "min(n - k, k) must not exceed %lld",
3744 LLONG_MAX);
3745 goto error;
3746 }
3747 assert(ki >= 0);
3748 }
3749
3750 result = perm_comb(n, (unsigned long long)ki, 1);
3751
3752 done:
3753 Py_DECREF(n);
3754 Py_DECREF(k);
3755 return result;
3756
3757 error:
3758 Py_DECREF(n);
3759 Py_DECREF(k);
3760 return NULL;
3761 }
3762
3763
3764 /*[clinic input]
3765 math.nextafter
3766
3767 x: double
3768 y: double
3769 /
3770
3771 Return the next floating-point value after x towards y.
3772 [clinic start generated code]*/
3773
3774 static PyObject *
math_nextafter_impl(PyObject * module,double x,double y)3775 math_nextafter_impl(PyObject *module, double x, double y)
3776 /*[clinic end generated code: output=750c8266c1c540ce input=02b2d50cd1d9f9b6]*/
3777 {
3778 #if defined(_AIX)
3779 if (x == y) {
3780 /* On AIX 7.1, libm nextafter(-0.0, +0.0) returns -0.0.
3781 Bug fixed in bos.adt.libm 7.2.2.0 by APAR IV95512. */
3782 return PyFloat_FromDouble(y);
3783 }
3784 if (Py_IS_NAN(x)) {
3785 return PyFloat_FromDouble(x);
3786 }
3787 if (Py_IS_NAN(y)) {
3788 return PyFloat_FromDouble(y);
3789 }
3790 #endif
3791 return PyFloat_FromDouble(nextafter(x, y));
3792 }
3793
3794
3795 /*[clinic input]
3796 math.ulp -> double
3797
3798 x: double
3799 /
3800
3801 Return the value of the least significant bit of the float x.
3802 [clinic start generated code]*/
3803
3804 static double
math_ulp_impl(PyObject * module,double x)3805 math_ulp_impl(PyObject *module, double x)
3806 /*[clinic end generated code: output=f5207867a9384dd4 input=31f9bfbbe373fcaa]*/
3807 {
3808 if (Py_IS_NAN(x)) {
3809 return x;
3810 }
3811 x = fabs(x);
3812 if (Py_IS_INFINITY(x)) {
3813 return x;
3814 }
3815 double inf = m_inf();
3816 double x2 = nextafter(x, inf);
3817 if (Py_IS_INFINITY(x2)) {
3818 /* special case: x is the largest positive representable float */
3819 x2 = nextafter(x, -inf);
3820 return x - x2;
3821 }
3822 return x2 - x;
3823 }
3824
3825 static int
math_exec(PyObject * module)3826 math_exec(PyObject *module)
3827 {
3828 if (PyModule_AddObject(module, "pi", PyFloat_FromDouble(Py_MATH_PI)) < 0) {
3829 return -1;
3830 }
3831 if (PyModule_AddObject(module, "e", PyFloat_FromDouble(Py_MATH_E)) < 0) {
3832 return -1;
3833 }
3834 // 2pi
3835 if (PyModule_AddObject(module, "tau", PyFloat_FromDouble(Py_MATH_TAU)) < 0) {
3836 return -1;
3837 }
3838 if (PyModule_AddObject(module, "inf", PyFloat_FromDouble(m_inf())) < 0) {
3839 return -1;
3840 }
3841 #if _PY_SHORT_FLOAT_REPR == 1
3842 if (PyModule_AddObject(module, "nan", PyFloat_FromDouble(m_nan())) < 0) {
3843 return -1;
3844 }
3845 #endif
3846 return 0;
3847 }
3848
3849 static PyMethodDef math_methods[] = {
3850 {"acos", math_acos, METH_O, math_acos_doc},
3851 {"acosh", math_acosh, METH_O, math_acosh_doc},
3852 {"asin", math_asin, METH_O, math_asin_doc},
3853 {"asinh", math_asinh, METH_O, math_asinh_doc},
3854 {"atan", math_atan, METH_O, math_atan_doc},
3855 {"atan2", _PyCFunction_CAST(math_atan2), METH_FASTCALL, math_atan2_doc},
3856 {"atanh", math_atanh, METH_O, math_atanh_doc},
3857 {"cbrt", math_cbrt, METH_O, math_cbrt_doc},
3858 MATH_CEIL_METHODDEF
3859 {"copysign", _PyCFunction_CAST(math_copysign), METH_FASTCALL, math_copysign_doc},
3860 {"cos", math_cos, METH_O, math_cos_doc},
3861 {"cosh", math_cosh, METH_O, math_cosh_doc},
3862 MATH_DEGREES_METHODDEF
3863 MATH_DIST_METHODDEF
3864 {"erf", math_erf, METH_O, math_erf_doc},
3865 {"erfc", math_erfc, METH_O, math_erfc_doc},
3866 {"exp", math_exp, METH_O, math_exp_doc},
3867 {"exp2", math_exp2, METH_O, math_exp2_doc},
3868 {"expm1", math_expm1, METH_O, math_expm1_doc},
3869 {"fabs", math_fabs, METH_O, math_fabs_doc},
3870 MATH_FACTORIAL_METHODDEF
3871 MATH_FLOOR_METHODDEF
3872 MATH_FMOD_METHODDEF
3873 MATH_FREXP_METHODDEF
3874 MATH_FSUM_METHODDEF
3875 {"gamma", math_gamma, METH_O, math_gamma_doc},
3876 {"gcd", _PyCFunction_CAST(math_gcd), METH_FASTCALL, math_gcd_doc},
3877 {"hypot", _PyCFunction_CAST(math_hypot), METH_FASTCALL, math_hypot_doc},
3878 MATH_ISCLOSE_METHODDEF
3879 MATH_ISFINITE_METHODDEF
3880 MATH_ISINF_METHODDEF
3881 MATH_ISNAN_METHODDEF
3882 MATH_ISQRT_METHODDEF
3883 {"lcm", _PyCFunction_CAST(math_lcm), METH_FASTCALL, math_lcm_doc},
3884 MATH_LDEXP_METHODDEF
3885 {"lgamma", math_lgamma, METH_O, math_lgamma_doc},
3886 MATH_LOG_METHODDEF
3887 {"log1p", math_log1p, METH_O, math_log1p_doc},
3888 MATH_LOG10_METHODDEF
3889 MATH_LOG2_METHODDEF
3890 MATH_MODF_METHODDEF
3891 MATH_POW_METHODDEF
3892 MATH_RADIANS_METHODDEF
3893 {"remainder", _PyCFunction_CAST(math_remainder), METH_FASTCALL, math_remainder_doc},
3894 {"sin", math_sin, METH_O, math_sin_doc},
3895 {"sinh", math_sinh, METH_O, math_sinh_doc},
3896 {"sqrt", math_sqrt, METH_O, math_sqrt_doc},
3897 {"tan", math_tan, METH_O, math_tan_doc},
3898 {"tanh", math_tanh, METH_O, math_tanh_doc},
3899 MATH_TRUNC_METHODDEF
3900 MATH_PROD_METHODDEF
3901 MATH_PERM_METHODDEF
3902 MATH_COMB_METHODDEF
3903 MATH_NEXTAFTER_METHODDEF
3904 MATH_ULP_METHODDEF
3905 {NULL, NULL} /* sentinel */
3906 };
3907
3908 static PyModuleDef_Slot math_slots[] = {
3909 {Py_mod_exec, math_exec},
3910 {0, NULL}
3911 };
3912
3913 PyDoc_STRVAR(module_doc,
3914 "This module provides access to the mathematical functions\n"
3915 "defined by the C standard.");
3916
3917 static struct PyModuleDef mathmodule = {
3918 PyModuleDef_HEAD_INIT,
3919 .m_name = "math",
3920 .m_doc = module_doc,
3921 .m_size = 0,
3922 .m_methods = math_methods,
3923 .m_slots = math_slots,
3924 };
3925
3926 PyMODINIT_FUNC
PyInit_math(void)3927 PyInit_math(void)
3928 {
3929 return PyModuleDef_Init(&mathmodule);
3930 }
3931