1 /* Math module -- standard C math library functions, pi and e */
2 
3 /* Here are some comments from Tim Peters, extracted from the
4    discussion attached to http://bugs.python.org/issue1640.  They
5    describe the general aims of the math module with respect to
6    special values, IEEE-754 floating-point exceptions, and Python
7    exceptions.
8 
9 These are the "spirit of 754" rules:
10 
11 1. If the mathematical result is a real number, but of magnitude too
12 large to approximate by a machine float, overflow is signaled and the
13 result is an infinity (with the appropriate sign).
14 
15 2. If the mathematical result is a real number, but of magnitude too
16 small to approximate by a machine float, underflow is signaled and the
17 result is a zero (with the appropriate sign).
18 
19 3. At a singularity (a value x such that the limit of f(y) as y
20 approaches x exists and is an infinity), "divide by zero" is signaled
21 and the result is an infinity (with the appropriate sign).  This is
22 complicated a little by that the left-side and right-side limits may
23 not be the same; e.g., 1/x approaches +inf or -inf as x approaches 0
24 from the positive or negative directions.  In that specific case, the
25 sign of the zero determines the result of 1/0.
26 
27 4. At a point where a function has no defined result in the extended
28 reals (i.e., the reals plus an infinity or two), invalid operation is
29 signaled and a NaN is returned.
30 
31 And these are what Python has historically /tried/ to do (but not
32 always successfully, as platform libm behavior varies a lot):
33 
34 For #1, raise OverflowError.
35 
36 For #2, return a zero (with the appropriate sign if that happens by
37 accident ;-)).
38 
39 For #3 and #4, raise ValueError.  It may have made sense to raise
40 Python's ZeroDivisionError in #3, but historically that's only been
41 raised for division by zero and mod by zero.
42 
43 */
44 
45 /*
46    In general, on an IEEE-754 platform the aim is to follow the C99
47    standard, including Annex 'F', whenever possible.  Where the
48    standard recommends raising the 'divide-by-zero' or 'invalid'
49    floating-point exceptions, Python should raise a ValueError.  Where
50    the standard recommends raising 'overflow', Python should raise an
51    OverflowError.  In all other circumstances a value should be
52    returned.
53  */
54 
55 #ifndef Py_BUILD_CORE_BUILTIN
56 #  define Py_BUILD_CORE_MODULE 1
57 #endif
58 #define NEEDS_PY_IDENTIFIER
59 
60 #include "Python.h"
61 #include "pycore_bitutils.h"      // _Py_bit_length()
62 #include "pycore_call.h"          // _PyObject_CallNoArgs()
63 #include "pycore_dtoa.h"          // _Py_dg_infinity()
64 #include "pycore_long.h"          // _PyLong_GetZero()
65 #include "pycore_pymath.h"        // _PY_SHORT_FLOAT_REPR
66 /* For DBL_EPSILON in _math.h */
67 #include <float.h>
68 /* For _Py_log1p with workarounds for buggy handling of zeros. */
69 #include "_math.h"
70 
71 #include "clinic/mathmodule.c.h"
72 
73 /*[clinic input]
74 module math
75 [clinic start generated code]*/
76 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=76bc7002685dd942]*/
77 
78 
79 /*
80    sin(pi*x), giving accurate results for all finite x (especially x
81    integral or close to an integer).  This is here for use in the
82    reflection formula for the gamma function.  It conforms to IEEE
83    754-2008 for finite arguments, but not for infinities or nans.
84 */
85 
86 static const double pi = 3.141592653589793238462643383279502884197;
87 static const double logpi = 1.144729885849400174143427351353058711647;
88 #if !defined(HAVE_ERF) || !defined(HAVE_ERFC)
89 static const double sqrtpi = 1.772453850905516027298167483341145182798;
90 #endif /* !defined(HAVE_ERF) || !defined(HAVE_ERFC) */
91 
92 
93 /* Version of PyFloat_AsDouble() with in-line fast paths
94    for exact floats and integers.  Gives a substantial
95    speed improvement for extracting float arguments.
96 */
97 
98 #define ASSIGN_DOUBLE(target_var, obj, error_label)        \
99     if (PyFloat_CheckExact(obj)) {                         \
100         target_var = PyFloat_AS_DOUBLE(obj);               \
101     }                                                      \
102     else if (PyLong_CheckExact(obj)) {                     \
103         target_var = PyLong_AsDouble(obj);                 \
104         if (target_var == -1.0 && PyErr_Occurred()) {      \
105             goto error_label;                              \
106         }                                                  \
107     }                                                      \
108     else {                                                 \
109         target_var = PyFloat_AsDouble(obj);                \
110         if (target_var == -1.0 && PyErr_Occurred()) {      \
111             goto error_label;                              \
112         }                                                  \
113     }
114 
115 static double
m_sinpi(double x)116 m_sinpi(double x)
117 {
118     double y, r;
119     int n;
120     /* this function should only ever be called for finite arguments */
121     assert(Py_IS_FINITE(x));
122     y = fmod(fabs(x), 2.0);
123     n = (int)round(2.0*y);
124     assert(0 <= n && n <= 4);
125     switch (n) {
126     case 0:
127         r = sin(pi*y);
128         break;
129     case 1:
130         r = cos(pi*(y-0.5));
131         break;
132     case 2:
133         /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
134            -0.0 instead of 0.0 when y == 1.0. */
135         r = sin(pi*(1.0-y));
136         break;
137     case 3:
138         r = -cos(pi*(y-1.5));
139         break;
140     case 4:
141         r = sin(pi*(y-2.0));
142         break;
143     default:
144         Py_UNREACHABLE();
145     }
146     return copysign(1.0, x)*r;
147 }
148 
149 /* Implementation of the real gamma function.  In extensive but non-exhaustive
150    random tests, this function proved accurate to within <= 10 ulps across the
151    entire float domain.  Note that accuracy may depend on the quality of the
152    system math functions, the pow function in particular.  Special cases
153    follow C99 annex F.  The parameters and method are tailored to platforms
154    whose double format is the IEEE 754 binary64 format.
155 
156    Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
157    and g=6.024680040776729583740234375; these parameters are amongst those
158    used by the Boost library.  Following Boost (again), we re-express the
159    Lanczos sum as a rational function, and compute it that way.  The
160    coefficients below were computed independently using MPFR, and have been
161    double-checked against the coefficients in the Boost source code.
162 
163    For x < 0.0 we use the reflection formula.
164 
165    There's one minor tweak that deserves explanation: Lanczos' formula for
166    Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5).  For many x
167    values, x+g-0.5 can be represented exactly.  However, in cases where it
168    can't be represented exactly the small error in x+g-0.5 can be magnified
169    significantly by the pow and exp calls, especially for large x.  A cheap
170    correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
171    involved in the computation of x+g-0.5 (that is, e = computed value of
172    x+g-0.5 - exact value of x+g-0.5).  Here's the proof:
173 
174    Correction factor
175    -----------------
176    Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
177    double, and e is tiny.  Then:
178 
179      pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
180      = pow(y, x-0.5)/exp(y) * C,
181 
182    where the correction_factor C is given by
183 
184      C = pow(1-e/y, x-0.5) * exp(e)
185 
186    Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
187 
188      C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
189 
190    But y-(x-0.5) = g+e, and g+e ~ g.  So we get C ~ 1 + e*g/y, and
191 
192      pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
193 
194    Note that for accuracy, when computing r*C it's better to do
195 
196      r + e*g/y*r;
197 
198    than
199 
200      r * (1 + e*g/y);
201 
202    since the addition in the latter throws away most of the bits of
203    information in e*g/y.
204 */
205 
206 #define LANCZOS_N 13
207 static const double lanczos_g = 6.024680040776729583740234375;
208 static const double lanczos_g_minus_half = 5.524680040776729583740234375;
209 static const double lanczos_num_coeffs[LANCZOS_N] = {
210     23531376880.410759688572007674451636754734846804940,
211     42919803642.649098768957899047001988850926355848959,
212     35711959237.355668049440185451547166705960488635843,
213     17921034426.037209699919755754458931112671403265390,
214     6039542586.3520280050642916443072979210699388420708,
215     1439720407.3117216736632230727949123939715485786772,
216     248874557.86205415651146038641322942321632125127801,
217     31426415.585400194380614231628318205362874684987640,
218     2876370.6289353724412254090516208496135991145378768,
219     186056.26539522349504029498971604569928220784236328,
220     8071.6720023658162106380029022722506138218516325024,
221     210.82427775157934587250973392071336271166969580291,
222     2.5066282746310002701649081771338373386264310793408
223 };
224 
225 /* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
226 static const double lanczos_den_coeffs[LANCZOS_N] = {
227     0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0,
228     13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0};
229 
230 /* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
231 #define NGAMMA_INTEGRAL 23
232 static const double gamma_integral[NGAMMA_INTEGRAL] = {
233     1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0,
234     3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0,
235     1307674368000.0, 20922789888000.0, 355687428096000.0,
236     6402373705728000.0, 121645100408832000.0, 2432902008176640000.0,
237     51090942171709440000.0, 1124000727777607680000.0,
238 };
239 
240 /* Lanczos' sum L_g(x), for positive x */
241 
242 static double
lanczos_sum(double x)243 lanczos_sum(double x)
244 {
245     double num = 0.0, den = 0.0;
246     int i;
247     assert(x > 0.0);
248     /* evaluate the rational function lanczos_sum(x).  For large
249        x, the obvious algorithm risks overflow, so we instead
250        rescale the denominator and numerator of the rational
251        function by x**(1-LANCZOS_N) and treat this as a
252        rational function in 1/x.  This also reduces the error for
253        larger x values.  The choice of cutoff point (5.0 below) is
254        somewhat arbitrary; in tests, smaller cutoff values than
255        this resulted in lower accuracy. */
256     if (x < 5.0) {
257         for (i = LANCZOS_N; --i >= 0; ) {
258             num = num * x + lanczos_num_coeffs[i];
259             den = den * x + lanczos_den_coeffs[i];
260         }
261     }
262     else {
263         for (i = 0; i < LANCZOS_N; i++) {
264             num = num / x + lanczos_num_coeffs[i];
265             den = den / x + lanczos_den_coeffs[i];
266         }
267     }
268     return num/den;
269 }
270 
271 /* Constant for +infinity, generated in the same way as float('inf'). */
272 
273 static double
m_inf(void)274 m_inf(void)
275 {
276 #if _PY_SHORT_FLOAT_REPR == 1
277     return _Py_dg_infinity(0);
278 #else
279     return Py_HUGE_VAL;
280 #endif
281 }
282 
283 /* Constant nan value, generated in the same way as float('nan'). */
284 /* We don't currently assume that Py_NAN is defined everywhere. */
285 
286 #if _PY_SHORT_FLOAT_REPR == 1
287 
288 static double
m_nan(void)289 m_nan(void)
290 {
291 #if _PY_SHORT_FLOAT_REPR == 1
292     return _Py_dg_stdnan(0);
293 #else
294     return Py_NAN;
295 #endif
296 }
297 
298 #endif
299 
300 static double
m_tgamma(double x)301 m_tgamma(double x)
302 {
303     double absx, r, y, z, sqrtpow;
304 
305     /* special cases */
306     if (!Py_IS_FINITE(x)) {
307         if (Py_IS_NAN(x) || x > 0.0)
308             return x;  /* tgamma(nan) = nan, tgamma(inf) = inf */
309         else {
310             errno = EDOM;
311             return Py_NAN;  /* tgamma(-inf) = nan, invalid */
312         }
313     }
314     if (x == 0.0) {
315         errno = EDOM;
316         /* tgamma(+-0.0) = +-inf, divide-by-zero */
317         return copysign(Py_HUGE_VAL, x);
318     }
319 
320     /* integer arguments */
321     if (x == floor(x)) {
322         if (x < 0.0) {
323             errno = EDOM;  /* tgamma(n) = nan, invalid for */
324             return Py_NAN; /* negative integers n */
325         }
326         if (x <= NGAMMA_INTEGRAL)
327             return gamma_integral[(int)x - 1];
328     }
329     absx = fabs(x);
330 
331     /* tiny arguments:  tgamma(x) ~ 1/x for x near 0 */
332     if (absx < 1e-20) {
333         r = 1.0/x;
334         if (Py_IS_INFINITY(r))
335             errno = ERANGE;
336         return r;
337     }
338 
339     /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
340        x > 200, and underflows to +-0.0 for x < -200, not a negative
341        integer. */
342     if (absx > 200.0) {
343         if (x < 0.0) {
344             return 0.0/m_sinpi(x);
345         }
346         else {
347             errno = ERANGE;
348             return Py_HUGE_VAL;
349         }
350     }
351 
352     y = absx + lanczos_g_minus_half;
353     /* compute error in sum */
354     if (absx > lanczos_g_minus_half) {
355         /* note: the correction can be foiled by an optimizing
356            compiler that (incorrectly) thinks that an expression like
357            a + b - a - b can be optimized to 0.0.  This shouldn't
358            happen in a standards-conforming compiler. */
359         double q = y - absx;
360         z = q - lanczos_g_minus_half;
361     }
362     else {
363         double q = y - lanczos_g_minus_half;
364         z = q - absx;
365     }
366     z = z * lanczos_g / y;
367     if (x < 0.0) {
368         r = -pi / m_sinpi(absx) / absx * exp(y) / lanczos_sum(absx);
369         r -= z * r;
370         if (absx < 140.0) {
371             r /= pow(y, absx - 0.5);
372         }
373         else {
374             sqrtpow = pow(y, absx / 2.0 - 0.25);
375             r /= sqrtpow;
376             r /= sqrtpow;
377         }
378     }
379     else {
380         r = lanczos_sum(absx) / exp(y);
381         r += z * r;
382         if (absx < 140.0) {
383             r *= pow(y, absx - 0.5);
384         }
385         else {
386             sqrtpow = pow(y, absx / 2.0 - 0.25);
387             r *= sqrtpow;
388             r *= sqrtpow;
389         }
390     }
391     if (Py_IS_INFINITY(r))
392         errno = ERANGE;
393     return r;
394 }
395 
396 /*
397    lgamma:  natural log of the absolute value of the Gamma function.
398    For large arguments, Lanczos' formula works extremely well here.
399 */
400 
401 static double
m_lgamma(double x)402 m_lgamma(double x)
403 {
404     double r;
405     double absx;
406 
407     /* special cases */
408     if (!Py_IS_FINITE(x)) {
409         if (Py_IS_NAN(x))
410             return x;  /* lgamma(nan) = nan */
411         else
412             return Py_HUGE_VAL; /* lgamma(+-inf) = +inf */
413     }
414 
415     /* integer arguments */
416     if (x == floor(x) && x <= 2.0) {
417         if (x <= 0.0) {
418             errno = EDOM;  /* lgamma(n) = inf, divide-by-zero for */
419             return Py_HUGE_VAL; /* integers n <= 0 */
420         }
421         else {
422             return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */
423         }
424     }
425 
426     absx = fabs(x);
427     /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
428     if (absx < 1e-20)
429         return -log(absx);
430 
431     /* Lanczos' formula.  We could save a fraction of a ulp in accuracy by
432        having a second set of numerator coefficients for lanczos_sum that
433        absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
434        subtraction below; it's probably not worth it. */
435     r = log(lanczos_sum(absx)) - lanczos_g;
436     r += (absx - 0.5) * (log(absx + lanczos_g - 0.5) - 1);
437     if (x < 0.0)
438         /* Use reflection formula to get value for negative x. */
439         r = logpi - log(fabs(m_sinpi(absx))) - log(absx) - r;
440     if (Py_IS_INFINITY(r))
441         errno = ERANGE;
442     return r;
443 }
444 
445 #if !defined(HAVE_ERF) || !defined(HAVE_ERFC)
446 
447 /*
448    Implementations of the error function erf(x) and the complementary error
449    function erfc(x).
450 
451    Method: we use a series approximation for erf for small x, and a continued
452    fraction approximation for erfc(x) for larger x;
453    combined with the relations erf(-x) = -erf(x) and erfc(x) = 1.0 - erf(x),
454    this gives us erf(x) and erfc(x) for all x.
455 
456    The series expansion used is:
457 
458       erf(x) = x*exp(-x*x)/sqrt(pi) * [
459                      2/1 + 4/3 x**2 + 8/15 x**4 + 16/105 x**6 + ...]
460 
461    The coefficient of x**(2k-2) here is 4**k*factorial(k)/factorial(2*k).
462    This series converges well for smallish x, but slowly for larger x.
463 
464    The continued fraction expansion used is:
465 
466       erfc(x) = x*exp(-x*x)/sqrt(pi) * [1/(0.5 + x**2 -) 0.5/(2.5 + x**2 - )
467                               3.0/(4.5 + x**2 - ) 7.5/(6.5 + x**2 - ) ...]
468 
469    after the first term, the general term has the form:
470 
471       k*(k-0.5)/(2*k+0.5 + x**2 - ...).
472 
473    This expansion converges fast for larger x, but convergence becomes
474    infinitely slow as x approaches 0.0.  The (somewhat naive) continued
475    fraction evaluation algorithm used below also risks overflow for large x;
476    but for large x, erfc(x) == 0.0 to within machine precision.  (For
477    example, erfc(30.0) is approximately 2.56e-393).
478 
479    Parameters: use series expansion for abs(x) < ERF_SERIES_CUTOFF and
480    continued fraction expansion for ERF_SERIES_CUTOFF <= abs(x) <
481    ERFC_CONTFRAC_CUTOFF.  ERFC_SERIES_TERMS and ERFC_CONTFRAC_TERMS are the
482    numbers of terms to use for the relevant expansions.  */
483 
484 #define ERF_SERIES_CUTOFF 1.5
485 #define ERF_SERIES_TERMS 25
486 #define ERFC_CONTFRAC_CUTOFF 30.0
487 #define ERFC_CONTFRAC_TERMS 50
488 
489 /*
490    Error function, via power series.
491 
492    Given a finite float x, return an approximation to erf(x).
493    Converges reasonably fast for small x.
494 */
495 
496 static double
m_erf_series(double x)497 m_erf_series(double x)
498 {
499     double x2, acc, fk, result;
500     int i, saved_errno;
501 
502     x2 = x * x;
503     acc = 0.0;
504     fk = (double)ERF_SERIES_TERMS + 0.5;
505     for (i = 0; i < ERF_SERIES_TERMS; i++) {
506         acc = 2.0 + x2 * acc / fk;
507         fk -= 1.0;
508     }
509     /* Make sure the exp call doesn't affect errno;
510        see m_erfc_contfrac for more. */
511     saved_errno = errno;
512     result = acc * x * exp(-x2) / sqrtpi;
513     errno = saved_errno;
514     return result;
515 }
516 
517 /*
518    Complementary error function, via continued fraction expansion.
519 
520    Given a positive float x, return an approximation to erfc(x).  Converges
521    reasonably fast for x large (say, x > 2.0), and should be safe from
522    overflow if x and nterms are not too large.  On an IEEE 754 machine, with x
523    <= 30.0, we're safe up to nterms = 100.  For x >= 30.0, erfc(x) is smaller
524    than the smallest representable nonzero float.  */
525 
526 static double
m_erfc_contfrac(double x)527 m_erfc_contfrac(double x)
528 {
529     double x2, a, da, p, p_last, q, q_last, b, result;
530     int i, saved_errno;
531 
532     if (x >= ERFC_CONTFRAC_CUTOFF)
533         return 0.0;
534 
535     x2 = x*x;
536     a = 0.0;
537     da = 0.5;
538     p = 1.0; p_last = 0.0;
539     q = da + x2; q_last = 1.0;
540     for (i = 0; i < ERFC_CONTFRAC_TERMS; i++) {
541         double temp;
542         a += da;
543         da += 2.0;
544         b = da + x2;
545         temp = p; p = b*p - a*p_last; p_last = temp;
546         temp = q; q = b*q - a*q_last; q_last = temp;
547     }
548     /* Issue #8986: On some platforms, exp sets errno on underflow to zero;
549        save the current errno value so that we can restore it later. */
550     saved_errno = errno;
551     result = p / q * x * exp(-x2) / sqrtpi;
552     errno = saved_errno;
553     return result;
554 }
555 
556 #endif /* !defined(HAVE_ERF) || !defined(HAVE_ERFC) */
557 
558 /* Error function erf(x), for general x */
559 
560 static double
m_erf(double x)561 m_erf(double x)
562 {
563 #ifdef HAVE_ERF
564     return erf(x);
565 #else
566     double absx, cf;
567 
568     if (Py_IS_NAN(x))
569         return x;
570     absx = fabs(x);
571     if (absx < ERF_SERIES_CUTOFF)
572         return m_erf_series(x);
573     else {
574         cf = m_erfc_contfrac(absx);
575         return x > 0.0 ? 1.0 - cf : cf - 1.0;
576     }
577 #endif
578 }
579 
580 /* Complementary error function erfc(x), for general x. */
581 
582 static double
m_erfc(double x)583 m_erfc(double x)
584 {
585 #ifdef HAVE_ERFC
586     return erfc(x);
587 #else
588     double absx, cf;
589 
590     if (Py_IS_NAN(x))
591         return x;
592     absx = fabs(x);
593     if (absx < ERF_SERIES_CUTOFF)
594         return 1.0 - m_erf_series(x);
595     else {
596         cf = m_erfc_contfrac(absx);
597         return x > 0.0 ? cf : 2.0 - cf;
598     }
599 #endif
600 }
601 
602 /*
603    wrapper for atan2 that deals directly with special cases before
604    delegating to the platform libm for the remaining cases.  This
605    is necessary to get consistent behaviour across platforms.
606    Windows, FreeBSD and alpha Tru64 are amongst platforms that don't
607    always follow C99.
608 */
609 
610 static double
m_atan2(double y,double x)611 m_atan2(double y, double x)
612 {
613     if (Py_IS_NAN(x) || Py_IS_NAN(y))
614         return Py_NAN;
615     if (Py_IS_INFINITY(y)) {
616         if (Py_IS_INFINITY(x)) {
617             if (copysign(1., x) == 1.)
618                 /* atan2(+-inf, +inf) == +-pi/4 */
619                 return copysign(0.25*Py_MATH_PI, y);
620             else
621                 /* atan2(+-inf, -inf) == +-pi*3/4 */
622                 return copysign(0.75*Py_MATH_PI, y);
623         }
624         /* atan2(+-inf, x) == +-pi/2 for finite x */
625         return copysign(0.5*Py_MATH_PI, y);
626     }
627     if (Py_IS_INFINITY(x) || y == 0.) {
628         if (copysign(1., x) == 1.)
629             /* atan2(+-y, +inf) = atan2(+-0, +x) = +-0. */
630             return copysign(0., y);
631         else
632             /* atan2(+-y, -inf) = atan2(+-0., -x) = +-pi. */
633             return copysign(Py_MATH_PI, y);
634     }
635     return atan2(y, x);
636 }
637 
638 
639 /* IEEE 754-style remainder operation: x - n*y where n*y is the nearest
640    multiple of y to x, taking n even in the case of a tie. Assuming an IEEE 754
641    binary floating-point format, the result is always exact. */
642 
643 static double
m_remainder(double x,double y)644 m_remainder(double x, double y)
645 {
646     /* Deal with most common case first. */
647     if (Py_IS_FINITE(x) && Py_IS_FINITE(y)) {
648         double absx, absy, c, m, r;
649 
650         if (y == 0.0) {
651             return Py_NAN;
652         }
653 
654         absx = fabs(x);
655         absy = fabs(y);
656         m = fmod(absx, absy);
657 
658         /*
659            Warning: some subtlety here. What we *want* to know at this point is
660            whether the remainder m is less than, equal to, or greater than half
661            of absy. However, we can't do that comparison directly because we
662            can't be sure that 0.5*absy is representable (the multiplication
663            might incur precision loss due to underflow). So instead we compare
664            m with the complement c = absy - m: m < 0.5*absy if and only if m <
665            c, and so on. The catch is that absy - m might also not be
666            representable, but it turns out that it doesn't matter:
667 
668            - if m > 0.5*absy then absy - m is exactly representable, by
669              Sterbenz's lemma, so m > c
670            - if m == 0.5*absy then again absy - m is exactly representable
671              and m == c
672            - if m < 0.5*absy then either (i) 0.5*absy is exactly representable,
673              in which case 0.5*absy < absy - m, so 0.5*absy <= c and hence m <
674              c, or (ii) absy is tiny, either subnormal or in the lowest normal
675              binade. Then absy - m is exactly representable and again m < c.
676         */
677 
678         c = absy - m;
679         if (m < c) {
680             r = m;
681         }
682         else if (m > c) {
683             r = -c;
684         }
685         else {
686             /*
687                Here absx is exactly halfway between two multiples of absy,
688                and we need to choose the even multiple. x now has the form
689 
690                    absx = n * absy + m
691 
692                for some integer n (recalling that m = 0.5*absy at this point).
693                If n is even we want to return m; if n is odd, we need to
694                return -m.
695 
696                So
697 
698                    0.5 * (absx - m) = (n/2) * absy
699 
700                and now reducing modulo absy gives us:
701 
702                                                   | m, if n is odd
703                    fmod(0.5 * (absx - m), absy) = |
704                                                   | 0, if n is even
705 
706                Now m - 2.0 * fmod(...) gives the desired result: m
707                if n is even, -m if m is odd.
708 
709                Note that all steps in fmod(0.5 * (absx - m), absy)
710                will be computed exactly, with no rounding error
711                introduced.
712             */
713             assert(m == c);
714             r = m - 2.0 * fmod(0.5 * (absx - m), absy);
715         }
716         return copysign(1.0, x) * r;
717     }
718 
719     /* Special values. */
720     if (Py_IS_NAN(x)) {
721         return x;
722     }
723     if (Py_IS_NAN(y)) {
724         return y;
725     }
726     if (Py_IS_INFINITY(x)) {
727         return Py_NAN;
728     }
729     assert(Py_IS_INFINITY(y));
730     return x;
731 }
732 
733 
734 /*
735     Various platforms (Solaris, OpenBSD) do nonstandard things for log(0),
736     log(-ve), log(NaN).  Here are wrappers for log and log10 that deal with
737     special values directly, passing positive non-special values through to
738     the system log/log10.
739  */
740 
741 static double
m_log(double x)742 m_log(double x)
743 {
744     if (Py_IS_FINITE(x)) {
745         if (x > 0.0)
746             return log(x);
747         errno = EDOM;
748         if (x == 0.0)
749             return -Py_HUGE_VAL; /* log(0) = -inf */
750         else
751             return Py_NAN; /* log(-ve) = nan */
752     }
753     else if (Py_IS_NAN(x))
754         return x; /* log(nan) = nan */
755     else if (x > 0.0)
756         return x; /* log(inf) = inf */
757     else {
758         errno = EDOM;
759         return Py_NAN; /* log(-inf) = nan */
760     }
761 }
762 
763 /*
764    log2: log to base 2.
765 
766    Uses an algorithm that should:
767 
768      (a) produce exact results for powers of 2, and
769      (b) give a monotonic log2 (for positive finite floats),
770          assuming that the system log is monotonic.
771 */
772 
773 static double
m_log2(double x)774 m_log2(double x)
775 {
776     if (!Py_IS_FINITE(x)) {
777         if (Py_IS_NAN(x))
778             return x; /* log2(nan) = nan */
779         else if (x > 0.0)
780             return x; /* log2(+inf) = +inf */
781         else {
782             errno = EDOM;
783             return Py_NAN; /* log2(-inf) = nan, invalid-operation */
784         }
785     }
786 
787     if (x > 0.0) {
788 #ifdef HAVE_LOG2
789         return log2(x);
790 #else
791         double m;
792         int e;
793         m = frexp(x, &e);
794         /* We want log2(m * 2**e) == log(m) / log(2) + e.  Care is needed when
795          * x is just greater than 1.0: in that case e is 1, log(m) is negative,
796          * and we get significant cancellation error from the addition of
797          * log(m) / log(2) to e.  The slight rewrite of the expression below
798          * avoids this problem.
799          */
800         if (x >= 1.0) {
801             return log(2.0 * m) / log(2.0) + (e - 1);
802         }
803         else {
804             return log(m) / log(2.0) + e;
805         }
806 #endif
807     }
808     else if (x == 0.0) {
809         errno = EDOM;
810         return -Py_HUGE_VAL; /* log2(0) = -inf, divide-by-zero */
811     }
812     else {
813         errno = EDOM;
814         return Py_NAN; /* log2(-inf) = nan, invalid-operation */
815     }
816 }
817 
818 static double
m_log10(double x)819 m_log10(double x)
820 {
821     if (Py_IS_FINITE(x)) {
822         if (x > 0.0)
823             return log10(x);
824         errno = EDOM;
825         if (x == 0.0)
826             return -Py_HUGE_VAL; /* log10(0) = -inf */
827         else
828             return Py_NAN; /* log10(-ve) = nan */
829     }
830     else if (Py_IS_NAN(x))
831         return x; /* log10(nan) = nan */
832     else if (x > 0.0)
833         return x; /* log10(inf) = inf */
834     else {
835         errno = EDOM;
836         return Py_NAN; /* log10(-inf) = nan */
837     }
838 }
839 
840 
841 static PyObject *
math_gcd(PyObject * module,PyObject * const * args,Py_ssize_t nargs)842 math_gcd(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
843 {
844     PyObject *res, *x;
845     Py_ssize_t i;
846 
847     if (nargs == 0) {
848         return PyLong_FromLong(0);
849     }
850     res = PyNumber_Index(args[0]);
851     if (res == NULL) {
852         return NULL;
853     }
854     if (nargs == 1) {
855         Py_SETREF(res, PyNumber_Absolute(res));
856         return res;
857     }
858 
859     PyObject *one = _PyLong_GetOne();  // borrowed ref
860     for (i = 1; i < nargs; i++) {
861         x = _PyNumber_Index(args[i]);
862         if (x == NULL) {
863             Py_DECREF(res);
864             return NULL;
865         }
866         if (res == one) {
867             /* Fast path: just check arguments.
868                It is okay to use identity comparison here. */
869             Py_DECREF(x);
870             continue;
871         }
872         Py_SETREF(res, _PyLong_GCD(res, x));
873         Py_DECREF(x);
874         if (res == NULL) {
875             return NULL;
876         }
877     }
878     return res;
879 }
880 
881 PyDoc_STRVAR(math_gcd_doc,
882 "gcd($module, *integers)\n"
883 "--\n"
884 "\n"
885 "Greatest Common Divisor.");
886 
887 
888 static PyObject *
long_lcm(PyObject * a,PyObject * b)889 long_lcm(PyObject *a, PyObject *b)
890 {
891     PyObject *g, *m, *f, *ab;
892 
893     if (Py_SIZE(a) == 0 || Py_SIZE(b) == 0) {
894         return PyLong_FromLong(0);
895     }
896     g = _PyLong_GCD(a, b);
897     if (g == NULL) {
898         return NULL;
899     }
900     f = PyNumber_FloorDivide(a, g);
901     Py_DECREF(g);
902     if (f == NULL) {
903         return NULL;
904     }
905     m = PyNumber_Multiply(f, b);
906     Py_DECREF(f);
907     if (m == NULL) {
908         return NULL;
909     }
910     ab = PyNumber_Absolute(m);
911     Py_DECREF(m);
912     return ab;
913 }
914 
915 
916 static PyObject *
math_lcm(PyObject * module,PyObject * const * args,Py_ssize_t nargs)917 math_lcm(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
918 {
919     PyObject *res, *x;
920     Py_ssize_t i;
921 
922     if (nargs == 0) {
923         return PyLong_FromLong(1);
924     }
925     res = PyNumber_Index(args[0]);
926     if (res == NULL) {
927         return NULL;
928     }
929     if (nargs == 1) {
930         Py_SETREF(res, PyNumber_Absolute(res));
931         return res;
932     }
933 
934     PyObject *zero = _PyLong_GetZero();  // borrowed ref
935     for (i = 1; i < nargs; i++) {
936         x = PyNumber_Index(args[i]);
937         if (x == NULL) {
938             Py_DECREF(res);
939             return NULL;
940         }
941         if (res == zero) {
942             /* Fast path: just check arguments.
943                It is okay to use identity comparison here. */
944             Py_DECREF(x);
945             continue;
946         }
947         Py_SETREF(res, long_lcm(res, x));
948         Py_DECREF(x);
949         if (res == NULL) {
950             return NULL;
951         }
952     }
953     return res;
954 }
955 
956 
957 PyDoc_STRVAR(math_lcm_doc,
958 "lcm($module, *integers)\n"
959 "--\n"
960 "\n"
961 "Least Common Multiple.");
962 
963 
964 /* Call is_error when errno != 0, and where x is the result libm
965  * returned.  is_error will usually set up an exception and return
966  * true (1), but may return false (0) without setting up an exception.
967  */
968 static int
is_error(double x)969 is_error(double x)
970 {
971     int result = 1;     /* presumption of guilt */
972     assert(errno);      /* non-zero errno is a precondition for calling */
973     if (errno == EDOM)
974         PyErr_SetString(PyExc_ValueError, "math domain error");
975 
976     else if (errno == ERANGE) {
977         /* ANSI C generally requires libm functions to set ERANGE
978          * on overflow, but also generally *allows* them to set
979          * ERANGE on underflow too.  There's no consistency about
980          * the latter across platforms.
981          * Alas, C99 never requires that errno be set.
982          * Here we suppress the underflow errors (libm functions
983          * should return a zero on underflow, and +- HUGE_VAL on
984          * overflow, so testing the result for zero suffices to
985          * distinguish the cases).
986          *
987          * On some platforms (Ubuntu/ia64) it seems that errno can be
988          * set to ERANGE for subnormal results that do *not* underflow
989          * to zero.  So to be safe, we'll ignore ERANGE whenever the
990          * function result is less than 1.5 in absolute value.
991          *
992          * bpo-46018: Changed to 1.5 to ensure underflows in expm1()
993          * are correctly detected, since the function may underflow
994          * toward -1.0 rather than 0.0.
995          */
996         if (fabs(x) < 1.5)
997             result = 0;
998         else
999             PyErr_SetString(PyExc_OverflowError,
1000                             "math range error");
1001     }
1002     else
1003         /* Unexpected math error */
1004         PyErr_SetFromErrno(PyExc_ValueError);
1005     return result;
1006 }
1007 
1008 /*
1009    math_1 is used to wrap a libm function f that takes a double
1010    argument and returns a double.
1011 
1012    The error reporting follows these rules, which are designed to do
1013    the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
1014    platforms.
1015 
1016    - a NaN result from non-NaN inputs causes ValueError to be raised
1017    - an infinite result from finite inputs causes OverflowError to be
1018      raised if can_overflow is 1, or raises ValueError if can_overflow
1019      is 0.
1020    - if the result is finite and errno == EDOM then ValueError is
1021      raised
1022    - if the result is finite and nonzero and errno == ERANGE then
1023      OverflowError is raised
1024 
1025    The last rule is used to catch overflow on platforms which follow
1026    C89 but for which HUGE_VAL is not an infinity.
1027 
1028    For the majority of one-argument functions these rules are enough
1029    to ensure that Python's functions behave as specified in 'Annex F'
1030    of the C99 standard, with the 'invalid' and 'divide-by-zero'
1031    floating-point exceptions mapping to Python's ValueError and the
1032    'overflow' floating-point exception mapping to OverflowError.
1033    math_1 only works for functions that don't have singularities *and*
1034    the possibility of overflow; fortunately, that covers everything we
1035    care about right now.
1036 */
1037 
1038 static PyObject *
math_1_to_whatever(PyObject * arg,double (* func)(double),PyObject * (* from_double_func)(double),int can_overflow)1039 math_1_to_whatever(PyObject *arg, double (*func) (double),
1040                    PyObject *(*from_double_func) (double),
1041                    int can_overflow)
1042 {
1043     double x, r;
1044     x = PyFloat_AsDouble(arg);
1045     if (x == -1.0 && PyErr_Occurred())
1046         return NULL;
1047     errno = 0;
1048     r = (*func)(x);
1049     if (Py_IS_NAN(r) && !Py_IS_NAN(x)) {
1050         PyErr_SetString(PyExc_ValueError,
1051                         "math domain error"); /* invalid arg */
1052         return NULL;
1053     }
1054     if (Py_IS_INFINITY(r) && Py_IS_FINITE(x)) {
1055         if (can_overflow)
1056             PyErr_SetString(PyExc_OverflowError,
1057                             "math range error"); /* overflow */
1058         else
1059             PyErr_SetString(PyExc_ValueError,
1060                             "math domain error"); /* singularity */
1061         return NULL;
1062     }
1063     if (Py_IS_FINITE(r) && errno && is_error(r))
1064         /* this branch unnecessary on most platforms */
1065         return NULL;
1066 
1067     return (*from_double_func)(r);
1068 }
1069 
1070 /* variant of math_1, to be used when the function being wrapped is known to
1071    set errno properly (that is, errno = EDOM for invalid or divide-by-zero,
1072    errno = ERANGE for overflow). */
1073 
1074 static PyObject *
math_1a(PyObject * arg,double (* func)(double))1075 math_1a(PyObject *arg, double (*func) (double))
1076 {
1077     double x, r;
1078     x = PyFloat_AsDouble(arg);
1079     if (x == -1.0 && PyErr_Occurred())
1080         return NULL;
1081     errno = 0;
1082     r = (*func)(x);
1083     if (errno && is_error(r))
1084         return NULL;
1085     return PyFloat_FromDouble(r);
1086 }
1087 
1088 /*
1089    math_2 is used to wrap a libm function f that takes two double
1090    arguments and returns a double.
1091 
1092    The error reporting follows these rules, which are designed to do
1093    the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
1094    platforms.
1095 
1096    - a NaN result from non-NaN inputs causes ValueError to be raised
1097    - an infinite result from finite inputs causes OverflowError to be
1098      raised.
1099    - if the result is finite and errno == EDOM then ValueError is
1100      raised
1101    - if the result is finite and nonzero and errno == ERANGE then
1102      OverflowError is raised
1103 
1104    The last rule is used to catch overflow on platforms which follow
1105    C89 but for which HUGE_VAL is not an infinity.
1106 
1107    For most two-argument functions (copysign, fmod, hypot, atan2)
1108    these rules are enough to ensure that Python's functions behave as
1109    specified in 'Annex F' of the C99 standard, with the 'invalid' and
1110    'divide-by-zero' floating-point exceptions mapping to Python's
1111    ValueError and the 'overflow' floating-point exception mapping to
1112    OverflowError.
1113 */
1114 
1115 static PyObject *
math_1(PyObject * arg,double (* func)(double),int can_overflow)1116 math_1(PyObject *arg, double (*func) (double), int can_overflow)
1117 {
1118     return math_1_to_whatever(arg, func, PyFloat_FromDouble, can_overflow);
1119 }
1120 
1121 static PyObject *
math_2(PyObject * const * args,Py_ssize_t nargs,double (* func)(double,double),const char * funcname)1122 math_2(PyObject *const *args, Py_ssize_t nargs,
1123        double (*func) (double, double), const char *funcname)
1124 {
1125     double x, y, r;
1126     if (!_PyArg_CheckPositional(funcname, nargs, 2, 2))
1127         return NULL;
1128     x = PyFloat_AsDouble(args[0]);
1129     if (x == -1.0 && PyErr_Occurred()) {
1130         return NULL;
1131     }
1132     y = PyFloat_AsDouble(args[1]);
1133     if (y == -1.0 && PyErr_Occurred()) {
1134         return NULL;
1135     }
1136     errno = 0;
1137     r = (*func)(x, y);
1138     if (Py_IS_NAN(r)) {
1139         if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
1140             errno = EDOM;
1141         else
1142             errno = 0;
1143     }
1144     else if (Py_IS_INFINITY(r)) {
1145         if (Py_IS_FINITE(x) && Py_IS_FINITE(y))
1146             errno = ERANGE;
1147         else
1148             errno = 0;
1149     }
1150     if (errno && is_error(r))
1151         return NULL;
1152     else
1153         return PyFloat_FromDouble(r);
1154 }
1155 
1156 #define FUNC1(funcname, func, can_overflow, docstring)                  \
1157     static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1158         return math_1(args, func, can_overflow);                            \
1159     }\
1160     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1161 
1162 #define FUNC1A(funcname, func, docstring)                               \
1163     static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1164         return math_1a(args, func);                                     \
1165     }\
1166     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1167 
1168 #define FUNC2(funcname, func, docstring) \
1169     static PyObject * math_##funcname(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { \
1170         return math_2(args, nargs, func, #funcname); \
1171     }\
1172     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1173 
1174 FUNC1(acos, acos, 0,
1175       "acos($module, x, /)\n--\n\n"
1176       "Return the arc cosine (measured in radians) of x.\n\n"
1177       "The result is between 0 and pi.")
1178 FUNC1(acosh, acosh, 0,
1179       "acosh($module, x, /)\n--\n\n"
1180       "Return the inverse hyperbolic cosine of x.")
1181 FUNC1(asin, asin, 0,
1182       "asin($module, x, /)\n--\n\n"
1183       "Return the arc sine (measured in radians) of x.\n\n"
1184       "The result is between -pi/2 and pi/2.")
1185 FUNC1(asinh, asinh, 0,
1186       "asinh($module, x, /)\n--\n\n"
1187       "Return the inverse hyperbolic sine of x.")
1188 FUNC1(atan, atan, 0,
1189       "atan($module, x, /)\n--\n\n"
1190       "Return the arc tangent (measured in radians) of x.\n\n"
1191       "The result is between -pi/2 and pi/2.")
1192 FUNC2(atan2, m_atan2,
1193       "atan2($module, y, x, /)\n--\n\n"
1194       "Return the arc tangent (measured in radians) of y/x.\n\n"
1195       "Unlike atan(y/x), the signs of both x and y are considered.")
1196 FUNC1(atanh, atanh, 0,
1197       "atanh($module, x, /)\n--\n\n"
1198       "Return the inverse hyperbolic tangent of x.")
1199 FUNC1(cbrt, cbrt, 0,
1200       "cbrt($module, x, /)\n--\n\n"
1201       "Return the cube root of x.")
1202 
1203 /*[clinic input]
1204 math.ceil
1205 
1206     x as number: object
1207     /
1208 
1209 Return the ceiling of x as an Integral.
1210 
1211 This is the smallest integer >= x.
1212 [clinic start generated code]*/
1213 
1214 static PyObject *
math_ceil(PyObject * module,PyObject * number)1215 math_ceil(PyObject *module, PyObject *number)
1216 /*[clinic end generated code: output=6c3b8a78bc201c67 input=2725352806399cab]*/
1217 {
1218     _Py_IDENTIFIER(__ceil__);
1219 
1220     if (!PyFloat_CheckExact(number)) {
1221         PyObject *method = _PyObject_LookupSpecialId(number, &PyId___ceil__);
1222         if (method != NULL) {
1223             PyObject *result = _PyObject_CallNoArgs(method);
1224             Py_DECREF(method);
1225             return result;
1226         }
1227         if (PyErr_Occurred())
1228             return NULL;
1229     }
1230     double x = PyFloat_AsDouble(number);
1231     if (x == -1.0 && PyErr_Occurred())
1232         return NULL;
1233 
1234     return PyLong_FromDouble(ceil(x));
1235 }
1236 
1237 FUNC2(copysign, copysign,
1238       "copysign($module, x, y, /)\n--\n\n"
1239        "Return a float with the magnitude (absolute value) of x but the sign of y.\n\n"
1240       "On platforms that support signed zeros, copysign(1.0, -0.0)\n"
1241       "returns -1.0.\n")
1242 FUNC1(cos, cos, 0,
1243       "cos($module, x, /)\n--\n\n"
1244       "Return the cosine of x (measured in radians).")
1245 FUNC1(cosh, cosh, 1,
1246       "cosh($module, x, /)\n--\n\n"
1247       "Return the hyperbolic cosine of x.")
1248 FUNC1A(erf, m_erf,
1249        "erf($module, x, /)\n--\n\n"
1250        "Error function at x.")
1251 FUNC1A(erfc, m_erfc,
1252        "erfc($module, x, /)\n--\n\n"
1253        "Complementary error function at x.")
1254 FUNC1(exp, exp, 1,
1255       "exp($module, x, /)\n--\n\n"
1256       "Return e raised to the power of x.")
1257 FUNC1(exp2, exp2, 1,
1258       "exp2($module, x, /)\n--\n\n"
1259       "Return 2 raised to the power of x.")
1260 FUNC1(expm1, expm1, 1,
1261       "expm1($module, x, /)\n--\n\n"
1262       "Return exp(x)-1.\n\n"
1263       "This function avoids the loss of precision involved in the direct "
1264       "evaluation of exp(x)-1 for small x.")
1265 FUNC1(fabs, fabs, 0,
1266       "fabs($module, x, /)\n--\n\n"
1267       "Return the absolute value of the float x.")
1268 
1269 /*[clinic input]
1270 math.floor
1271 
1272     x as number: object
1273     /
1274 
1275 Return the floor of x as an Integral.
1276 
1277 This is the largest integer <= x.
1278 [clinic start generated code]*/
1279 
1280 static PyObject *
math_floor(PyObject * module,PyObject * number)1281 math_floor(PyObject *module, PyObject *number)
1282 /*[clinic end generated code: output=c6a65c4884884b8a input=63af6b5d7ebcc3d6]*/
1283 {
1284     double x;
1285 
1286     _Py_IDENTIFIER(__floor__);
1287 
1288     if (PyFloat_CheckExact(number)) {
1289         x = PyFloat_AS_DOUBLE(number);
1290     }
1291     else
1292     {
1293         PyObject *method = _PyObject_LookupSpecialId(number, &PyId___floor__);
1294         if (method != NULL) {
1295             PyObject *result = _PyObject_CallNoArgs(method);
1296             Py_DECREF(method);
1297             return result;
1298         }
1299         if (PyErr_Occurred())
1300             return NULL;
1301         x = PyFloat_AsDouble(number);
1302         if (x == -1.0 && PyErr_Occurred())
1303             return NULL;
1304     }
1305     return PyLong_FromDouble(floor(x));
1306 }
1307 
1308 FUNC1A(gamma, m_tgamma,
1309       "gamma($module, x, /)\n--\n\n"
1310       "Gamma function at x.")
1311 FUNC1A(lgamma, m_lgamma,
1312       "lgamma($module, x, /)\n--\n\n"
1313       "Natural logarithm of absolute value of Gamma function at x.")
1314 FUNC1(log1p, m_log1p, 0,
1315       "log1p($module, x, /)\n--\n\n"
1316       "Return the natural logarithm of 1+x (base e).\n\n"
1317       "The result is computed in a way which is accurate for x near zero.")
1318 FUNC2(remainder, m_remainder,
1319       "remainder($module, x, y, /)\n--\n\n"
1320       "Difference between x and the closest integer multiple of y.\n\n"
1321       "Return x - n*y where n*y is the closest integer multiple of y.\n"
1322       "In the case where x is exactly halfway between two multiples of\n"
1323       "y, the nearest even value of n is used. The result is always exact.")
1324 FUNC1(sin, sin, 0,
1325       "sin($module, x, /)\n--\n\n"
1326       "Return the sine of x (measured in radians).")
1327 FUNC1(sinh, sinh, 1,
1328       "sinh($module, x, /)\n--\n\n"
1329       "Return the hyperbolic sine of x.")
1330 FUNC1(sqrt, sqrt, 0,
1331       "sqrt($module, x, /)\n--\n\n"
1332       "Return the square root of x.")
1333 FUNC1(tan, tan, 0,
1334       "tan($module, x, /)\n--\n\n"
1335       "Return the tangent of x (measured in radians).")
1336 FUNC1(tanh, tanh, 0,
1337       "tanh($module, x, /)\n--\n\n"
1338       "Return the hyperbolic tangent of x.")
1339 
1340 /* Precision summation function as msum() by Raymond Hettinger in
1341    <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/393090>,
1342    enhanced with the exact partials sum and roundoff from Mark
1343    Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
1344    See those links for more details, proofs and other references.
1345 
1346    Note 1: IEEE 754R floating point semantics are assumed,
1347    but the current implementation does not re-establish special
1348    value semantics across iterations (i.e. handling -Inf + Inf).
1349 
1350    Note 2:  No provision is made for intermediate overflow handling;
1351    therefore, sum([1e+308, 1e-308, 1e+308]) returns 1e+308 while
1352    sum([1e+308, 1e+308, 1e-308]) raises an OverflowError due to the
1353    overflow of the first partial sum.
1354 
1355    Note 3: The intermediate values lo, yr, and hi are declared volatile so
1356    aggressive compilers won't algebraically reduce lo to always be exactly 0.0.
1357    Also, the volatile declaration forces the values to be stored in memory as
1358    regular doubles instead of extended long precision (80-bit) values.  This
1359    prevents double rounding because any addition or subtraction of two doubles
1360    can be resolved exactly into double-sized hi and lo values.  As long as the
1361    hi value gets forced into a double before yr and lo are computed, the extra
1362    bits in downstream extended precision operations (x87 for example) will be
1363    exactly zero and therefore can be losslessly stored back into a double,
1364    thereby preventing double rounding.
1365 
1366    Note 4: A similar implementation is in Modules/cmathmodule.c.
1367    Be sure to update both when making changes.
1368 
1369    Note 5: The signature of math.fsum() differs from builtins.sum()
1370    because the start argument doesn't make sense in the context of
1371    accurate summation.  Since the partials table is collapsed before
1372    returning a result, sum(seq2, start=sum(seq1)) may not equal the
1373    accurate result returned by sum(itertools.chain(seq1, seq2)).
1374 */
1375 
1376 #define NUM_PARTIALS  32  /* initial partials array size, on stack */
1377 
1378 /* Extend the partials array p[] by doubling its size. */
1379 static int                          /* non-zero on error */
_fsum_realloc(double ** p_ptr,Py_ssize_t n,double * ps,Py_ssize_t * m_ptr)1380 _fsum_realloc(double **p_ptr, Py_ssize_t  n,
1381              double  *ps,    Py_ssize_t *m_ptr)
1382 {
1383     void *v = NULL;
1384     Py_ssize_t m = *m_ptr;
1385 
1386     m += m;  /* double */
1387     if (n < m && (size_t)m < ((size_t)PY_SSIZE_T_MAX / sizeof(double))) {
1388         double *p = *p_ptr;
1389         if (p == ps) {
1390             v = PyMem_Malloc(sizeof(double) * m);
1391             if (v != NULL)
1392                 memcpy(v, ps, sizeof(double) * n);
1393         }
1394         else
1395             v = PyMem_Realloc(p, sizeof(double) * m);
1396     }
1397     if (v == NULL) {        /* size overflow or no memory */
1398         PyErr_SetString(PyExc_MemoryError, "math.fsum partials");
1399         return 1;
1400     }
1401     *p_ptr = (double*) v;
1402     *m_ptr = m;
1403     return 0;
1404 }
1405 
1406 /* Full precision summation of a sequence of floats.
1407 
1408    def msum(iterable):
1409        partials = []  # sorted, non-overlapping partial sums
1410        for x in iterable:
1411            i = 0
1412            for y in partials:
1413                if abs(x) < abs(y):
1414                    x, y = y, x
1415                hi = x + y
1416                lo = y - (hi - x)
1417                if lo:
1418                    partials[i] = lo
1419                    i += 1
1420                x = hi
1421            partials[i:] = [x]
1422        return sum_exact(partials)
1423 
1424    Rounded x+y stored in hi with the roundoff stored in lo.  Together hi+lo
1425    are exactly equal to x+y.  The inner loop applies hi/lo summation to each
1426    partial so that the list of partial sums remains exact.
1427 
1428    Sum_exact() adds the partial sums exactly and correctly rounds the final
1429    result (using the round-half-to-even rule).  The items in partials remain
1430    non-zero, non-special, non-overlapping and strictly increasing in
1431    magnitude, but possibly not all having the same sign.
1432 
1433    Depends on IEEE 754 arithmetic guarantees and half-even rounding.
1434 */
1435 
1436 /*[clinic input]
1437 math.fsum
1438 
1439     seq: object
1440     /
1441 
1442 Return an accurate floating point sum of values in the iterable seq.
1443 
1444 Assumes IEEE-754 floating point arithmetic.
1445 [clinic start generated code]*/
1446 
1447 static PyObject *
math_fsum(PyObject * module,PyObject * seq)1448 math_fsum(PyObject *module, PyObject *seq)
1449 /*[clinic end generated code: output=ba5c672b87fe34fc input=c51b7d8caf6f6e82]*/
1450 {
1451     PyObject *item, *iter, *sum = NULL;
1452     Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
1453     double x, y, t, ps[NUM_PARTIALS], *p = ps;
1454     double xsave, special_sum = 0.0, inf_sum = 0.0;
1455     volatile double hi, yr, lo;
1456 
1457     iter = PyObject_GetIter(seq);
1458     if (iter == NULL)
1459         return NULL;
1460 
1461     for(;;) {           /* for x in iterable */
1462         assert(0 <= n && n <= m);
1463         assert((m == NUM_PARTIALS && p == ps) ||
1464                (m >  NUM_PARTIALS && p != NULL));
1465 
1466         item = PyIter_Next(iter);
1467         if (item == NULL) {
1468             if (PyErr_Occurred())
1469                 goto _fsum_error;
1470             break;
1471         }
1472         ASSIGN_DOUBLE(x, item, error_with_item);
1473         Py_DECREF(item);
1474 
1475         xsave = x;
1476         for (i = j = 0; j < n; j++) {       /* for y in partials */
1477             y = p[j];
1478             if (fabs(x) < fabs(y)) {
1479                 t = x; x = y; y = t;
1480             }
1481             hi = x + y;
1482             yr = hi - x;
1483             lo = y - yr;
1484             if (lo != 0.0)
1485                 p[i++] = lo;
1486             x = hi;
1487         }
1488 
1489         n = i;                              /* ps[i:] = [x] */
1490         if (x != 0.0) {
1491             if (! Py_IS_FINITE(x)) {
1492                 /* a nonfinite x could arise either as
1493                    a result of intermediate overflow, or
1494                    as a result of a nan or inf in the
1495                    summands */
1496                 if (Py_IS_FINITE(xsave)) {
1497                     PyErr_SetString(PyExc_OverflowError,
1498                           "intermediate overflow in fsum");
1499                     goto _fsum_error;
1500                 }
1501                 if (Py_IS_INFINITY(xsave))
1502                     inf_sum += xsave;
1503                 special_sum += xsave;
1504                 /* reset partials */
1505                 n = 0;
1506             }
1507             else if (n >= m && _fsum_realloc(&p, n, ps, &m))
1508                 goto _fsum_error;
1509             else
1510                 p[n++] = x;
1511         }
1512     }
1513 
1514     if (special_sum != 0.0) {
1515         if (Py_IS_NAN(inf_sum))
1516             PyErr_SetString(PyExc_ValueError,
1517                             "-inf + inf in fsum");
1518         else
1519             sum = PyFloat_FromDouble(special_sum);
1520         goto _fsum_error;
1521     }
1522 
1523     hi = 0.0;
1524     if (n > 0) {
1525         hi = p[--n];
1526         /* sum_exact(ps, hi) from the top, stop when the sum becomes
1527            inexact. */
1528         while (n > 0) {
1529             x = hi;
1530             y = p[--n];
1531             assert(fabs(y) < fabs(x));
1532             hi = x + y;
1533             yr = hi - x;
1534             lo = y - yr;
1535             if (lo != 0.0)
1536                 break;
1537         }
1538         /* Make half-even rounding work across multiple partials.
1539            Needed so that sum([1e-16, 1, 1e16]) will round-up the last
1540            digit to two instead of down to zero (the 1e-16 makes the 1
1541            slightly closer to two).  With a potential 1 ULP rounding
1542            error fixed-up, math.fsum() can guarantee commutativity. */
1543         if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||
1544                       (lo > 0.0 && p[n-1] > 0.0))) {
1545             y = lo * 2.0;
1546             x = hi + y;
1547             yr = x - hi;
1548             if (y == yr)
1549                 hi = x;
1550         }
1551     }
1552     sum = PyFloat_FromDouble(hi);
1553 
1554   _fsum_error:
1555     Py_DECREF(iter);
1556     if (p != ps)
1557         PyMem_Free(p);
1558     return sum;
1559 
1560   error_with_item:
1561     Py_DECREF(item);
1562     goto _fsum_error;
1563 }
1564 
1565 #undef NUM_PARTIALS
1566 
1567 
1568 static unsigned long
count_set_bits(unsigned long n)1569 count_set_bits(unsigned long n)
1570 {
1571     unsigned long count = 0;
1572     while (n != 0) {
1573         ++count;
1574         n &= n - 1; /* clear least significant bit */
1575     }
1576     return count;
1577 }
1578 
1579 /* Integer square root
1580 
1581 Given a nonnegative integer `n`, we want to compute the largest integer
1582 `a` for which `a * a <= n`, or equivalently the integer part of the exact
1583 square root of `n`.
1584 
1585 We use an adaptive-precision pure-integer version of Newton's iteration. Given
1586 a positive integer `n`, the algorithm produces at each iteration an integer
1587 approximation `a` to the square root of `n >> s` for some even integer `s`,
1588 with `s` decreasing as the iterations progress. On the final iteration, `s` is
1589 zero and we have an approximation to the square root of `n` itself.
1590 
1591 At every step, the approximation `a` is strictly within 1.0 of the true square
1592 root, so we have
1593 
1594     (a - 1)**2 < (n >> s) < (a + 1)**2
1595 
1596 After the final iteration, a check-and-correct step is needed to determine
1597 whether `a` or `a - 1` gives the desired integer square root of `n`.
1598 
1599 The algorithm is remarkable in its simplicity. There's no need for a
1600 per-iteration check-and-correct step, and termination is straightforward: the
1601 number of iterations is known in advance (it's exactly `floor(log2(log2(n)))`
1602 for `n > 1`). The only tricky part of the correctness proof is in establishing
1603 that the bound `(a - 1)**2 < (n >> s) < (a + 1)**2` is maintained from one
1604 iteration to the next. A sketch of the proof of this is given below.
1605 
1606 In addition to the proof sketch, a formal, computer-verified proof
1607 of correctness (using Lean) of an equivalent recursive algorithm can be found
1608 here:
1609 
1610     https://github.com/mdickinson/snippets/blob/master/proofs/isqrt/src/isqrt.lean
1611 
1612 
1613 Here's Python code equivalent to the C implementation below:
1614 
1615     def isqrt(n):
1616         """
1617         Return the integer part of the square root of the input.
1618         """
1619         n = operator.index(n)
1620 
1621         if n < 0:
1622             raise ValueError("isqrt() argument must be nonnegative")
1623         if n == 0:
1624             return 0
1625 
1626         c = (n.bit_length() - 1) // 2
1627         a = 1
1628         d = 0
1629         for s in reversed(range(c.bit_length())):
1630             # Loop invariant: (a-1)**2 < (n >> 2*(c - d)) < (a+1)**2
1631             e = d
1632             d = c >> s
1633             a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1634 
1635         return a - (a*a > n)
1636 
1637 
1638 Sketch of proof of correctness
1639 ------------------------------
1640 
1641 The delicate part of the correctness proof is showing that the loop invariant
1642 is preserved from one iteration to the next. That is, just before the line
1643 
1644     a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1645 
1646 is executed in the above code, we know that
1647 
1648     (1)  (a - 1)**2 < (n >> 2*(c - e)) < (a + 1)**2.
1649 
1650 (since `e` is always the value of `d` from the previous iteration). We must
1651 prove that after that line is executed, we have
1652 
1653     (a - 1)**2 < (n >> 2*(c - d)) < (a + 1)**2
1654 
1655 To facilitate the proof, we make some changes of notation. Write `m` for
1656 `n >> 2*(c-d)`, and write `b` for the new value of `a`, so
1657 
1658     b = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1659 
1660 or equivalently:
1661 
1662     (2)  b = (a << d - e - 1) + (m >> d - e + 1) // a
1663 
1664 Then we can rewrite (1) as:
1665 
1666     (3)  (a - 1)**2 < (m >> 2*(d - e)) < (a + 1)**2
1667 
1668 and we must show that (b - 1)**2 < m < (b + 1)**2.
1669 
1670 From this point on, we switch to mathematical notation, so `/` means exact
1671 division rather than integer division and `^` is used for exponentiation. We
1672 use the `√` symbol for the exact square root. In (3), we can remove the
1673 implicit floor operation to give:
1674 
1675     (4)  (a - 1)^2 < m / 4^(d - e) < (a + 1)^2
1676 
1677 Taking square roots throughout (4), scaling by `2^(d-e)`, and rearranging gives
1678 
1679     (5)  0 <= | 2^(d-e)a - √m | < 2^(d-e)
1680 
1681 Squaring and dividing through by `2^(d-e+1) a` gives
1682 
1683     (6)  0 <= 2^(d-e-1) a + m / (2^(d-e+1) a) - √m < 2^(d-e-1) / a
1684 
1685 We'll show below that `2^(d-e-1) <= a`. Given that, we can replace the
1686 right-hand side of (6) with `1`, and now replacing the central
1687 term `m / (2^(d-e+1) a)` with its floor in (6) gives
1688 
1689     (7) -1 < 2^(d-e-1) a + m // 2^(d-e+1) a - √m < 1
1690 
1691 Or equivalently, from (2):
1692 
1693     (7) -1 < b - √m < 1
1694 
1695 and rearranging gives that `(b-1)^2 < m < (b+1)^2`, which is what we needed
1696 to prove.
1697 
1698 We're not quite done: we still have to prove the inequality `2^(d - e - 1) <=
1699 a` that was used to get line (7) above. From the definition of `c`, we have
1700 `4^c <= n`, which implies
1701 
1702     (8)  4^d <= m
1703 
1704 also, since `e == d >> 1`, `d` is at most `2e + 1`, from which it follows
1705 that `2d - 2e - 1 <= d` and hence that
1706 
1707     (9)  4^(2d - 2e - 1) <= m
1708 
1709 Dividing both sides by `4^(d - e)` gives
1710 
1711     (10)  4^(d - e - 1) <= m / 4^(d - e)
1712 
1713 But we know from (4) that `m / 4^(d-e) < (a + 1)^2`, hence
1714 
1715     (11)  4^(d - e - 1) < (a + 1)^2
1716 
1717 Now taking square roots of both sides and observing that both `2^(d-e-1)` and
1718 `a` are integers gives `2^(d - e - 1) <= a`, which is what we needed. This
1719 completes the proof sketch.
1720 
1721 */
1722 
1723 /*
1724     The _approximate_isqrt_tab table provides approximate square roots for
1725     16-bit integers. For any n in the range 2**14 <= n < 2**16, the value
1726 
1727         a = _approximate_isqrt_tab[(n >> 8) - 64]
1728 
1729     is an approximate square root of n, satisfying (a - 1)**2 < n < (a + 1)**2.
1730 
1731     The table was computed in Python using the expression:
1732 
1733         [min(round(sqrt(256*n + 128)), 255) for n in range(64, 256)]
1734 */
1735 
1736 static const uint8_t _approximate_isqrt_tab[192] = {
1737     128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
1738     140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, 150,
1739     151, 151, 152, 153, 154, 155, 156, 156, 157, 158, 159, 160,
1740     160, 161, 162, 163, 164, 164, 165, 166, 167, 167, 168, 169,
1741     170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178,
1742     179, 179, 180, 181, 181, 182, 183, 183, 184, 185, 186, 186,
1743     187, 188, 188, 189, 190, 190, 191, 192, 192, 193, 194, 194,
1744     195, 196, 196, 197, 198, 198, 199, 200, 200, 201, 201, 202,
1745     203, 203, 204, 205, 205, 206, 206, 207, 208, 208, 209, 210,
1746     210, 211, 211, 212, 213, 213, 214, 214, 215, 216, 216, 217,
1747     217, 218, 219, 219, 220, 220, 221, 221, 222, 223, 223, 224,
1748     224, 225, 225, 226, 227, 227, 228, 228, 229, 229, 230, 230,
1749     231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 237, 237,
1750     238, 238, 239, 239, 240, 240, 241, 241, 242, 242, 243, 243,
1751     244, 244, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250,
1752     250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 255,
1753 };
1754 
1755 /* Approximate square root of a large 64-bit integer.
1756 
1757    Given `n` satisfying `2**62 <= n < 2**64`, return `a`
1758    satisfying `(a - 1)**2 < n < (a + 1)**2`. */
1759 
1760 static inline uint32_t
_approximate_isqrt(uint64_t n)1761 _approximate_isqrt(uint64_t n)
1762 {
1763     uint32_t u = _approximate_isqrt_tab[(n >> 56) - 64];
1764     u = (u << 7) + (uint32_t)(n >> 41) / u;
1765     return (u << 15) + (uint32_t)((n >> 17) / u);
1766 }
1767 
1768 /*[clinic input]
1769 math.isqrt
1770 
1771     n: object
1772     /
1773 
1774 Return the integer part of the square root of the input.
1775 [clinic start generated code]*/
1776 
1777 static PyObject *
math_isqrt(PyObject * module,PyObject * n)1778 math_isqrt(PyObject *module, PyObject *n)
1779 /*[clinic end generated code: output=35a6f7f980beab26 input=5b6e7ae4fa6c43d6]*/
1780 {
1781     int a_too_large, c_bit_length;
1782     size_t c, d;
1783     uint64_t m;
1784     uint32_t u;
1785     PyObject *a = NULL, *b;
1786 
1787     n = _PyNumber_Index(n);
1788     if (n == NULL) {
1789         return NULL;
1790     }
1791 
1792     if (_PyLong_Sign(n) < 0) {
1793         PyErr_SetString(
1794             PyExc_ValueError,
1795             "isqrt() argument must be nonnegative");
1796         goto error;
1797     }
1798     if (_PyLong_Sign(n) == 0) {
1799         Py_DECREF(n);
1800         return PyLong_FromLong(0);
1801     }
1802 
1803     /* c = (n.bit_length() - 1) // 2 */
1804     c = _PyLong_NumBits(n);
1805     if (c == (size_t)(-1)) {
1806         goto error;
1807     }
1808     c = (c - 1U) / 2U;
1809 
1810     /* Fast path: if c <= 31 then n < 2**64 and we can compute directly with a
1811        fast, almost branch-free algorithm. */
1812     if (c <= 31U) {
1813         int shift = 31 - (int)c;
1814         m = (uint64_t)PyLong_AsUnsignedLongLong(n);
1815         Py_DECREF(n);
1816         if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1817             return NULL;
1818         }
1819         u = _approximate_isqrt(m << 2*shift) >> shift;
1820         u -= (uint64_t)u * u > m;
1821         return PyLong_FromUnsignedLong(u);
1822     }
1823 
1824     /* Slow path: n >= 2**64. We perform the first five iterations in C integer
1825        arithmetic, then switch to using Python long integers. */
1826 
1827     /* From n >= 2**64 it follows that c.bit_length() >= 6. */
1828     c_bit_length = 6;
1829     while ((c >> c_bit_length) > 0U) {
1830         ++c_bit_length;
1831     }
1832 
1833     /* Initialise d and a. */
1834     d = c >> (c_bit_length - 5);
1835     b = _PyLong_Rshift(n, 2U*c - 62U);
1836     if (b == NULL) {
1837         goto error;
1838     }
1839     m = (uint64_t)PyLong_AsUnsignedLongLong(b);
1840     Py_DECREF(b);
1841     if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1842         goto error;
1843     }
1844     u = _approximate_isqrt(m) >> (31U - d);
1845     a = PyLong_FromUnsignedLong(u);
1846     if (a == NULL) {
1847         goto error;
1848     }
1849 
1850     for (int s = c_bit_length - 6; s >= 0; --s) {
1851         PyObject *q;
1852         size_t e = d;
1853 
1854         d = c >> s;
1855 
1856         /* q = (n >> 2*c - e - d + 1) // a */
1857         q = _PyLong_Rshift(n, 2U*c - d - e + 1U);
1858         if (q == NULL) {
1859             goto error;
1860         }
1861         Py_SETREF(q, PyNumber_FloorDivide(q, a));
1862         if (q == NULL) {
1863             goto error;
1864         }
1865 
1866         /* a = (a << d - 1 - e) + q */
1867         Py_SETREF(a, _PyLong_Lshift(a, d - 1U - e));
1868         if (a == NULL) {
1869             Py_DECREF(q);
1870             goto error;
1871         }
1872         Py_SETREF(a, PyNumber_Add(a, q));
1873         Py_DECREF(q);
1874         if (a == NULL) {
1875             goto error;
1876         }
1877     }
1878 
1879     /* The correct result is either a or a - 1. Figure out which, and
1880        decrement a if necessary. */
1881 
1882     /* a_too_large = n < a * a */
1883     b = PyNumber_Multiply(a, a);
1884     if (b == NULL) {
1885         goto error;
1886     }
1887     a_too_large = PyObject_RichCompareBool(n, b, Py_LT);
1888     Py_DECREF(b);
1889     if (a_too_large == -1) {
1890         goto error;
1891     }
1892 
1893     if (a_too_large) {
1894         Py_SETREF(a, PyNumber_Subtract(a, _PyLong_GetOne()));
1895     }
1896     Py_DECREF(n);
1897     return a;
1898 
1899   error:
1900     Py_XDECREF(a);
1901     Py_DECREF(n);
1902     return NULL;
1903 }
1904 
1905 /* Divide-and-conquer factorial algorithm
1906  *
1907  * Based on the formula and pseudo-code provided at:
1908  * http://www.luschny.de/math/factorial/binarysplitfact.html
1909  *
1910  * Faster algorithms exist, but they're more complicated and depend on
1911  * a fast prime factorization algorithm.
1912  *
1913  * Notes on the algorithm
1914  * ----------------------
1915  *
1916  * factorial(n) is written in the form 2**k * m, with m odd.  k and m are
1917  * computed separately, and then combined using a left shift.
1918  *
1919  * The function factorial_odd_part computes the odd part m (i.e., the greatest
1920  * odd divisor) of factorial(n), using the formula:
1921  *
1922  *   factorial_odd_part(n) =
1923  *
1924  *        product_{i >= 0} product_{0 < j <= n / 2**i, j odd} j
1925  *
1926  * Example: factorial_odd_part(20) =
1927  *
1928  *        (1) *
1929  *        (1) *
1930  *        (1 * 3 * 5) *
1931  *        (1 * 3 * 5 * 7 * 9) *
1932  *        (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1933  *
1934  * Here i goes from large to small: the first term corresponds to i=4 (any
1935  * larger i gives an empty product), and the last term corresponds to i=0.
1936  * Each term can be computed from the last by multiplying by the extra odd
1937  * numbers required: e.g., to get from the penultimate term to the last one,
1938  * we multiply by (11 * 13 * 15 * 17 * 19).
1939  *
1940  * To see a hint of why this formula works, here are the same numbers as above
1941  * but with the even parts (i.e., the appropriate powers of 2) included.  For
1942  * each subterm in the product for i, we multiply that subterm by 2**i:
1943  *
1944  *   factorial(20) =
1945  *
1946  *        (16) *
1947  *        (8) *
1948  *        (4 * 12 * 20) *
1949  *        (2 * 6 * 10 * 14 * 18) *
1950  *        (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1951  *
1952  * The factorial_partial_product function computes the product of all odd j in
1953  * range(start, stop) for given start and stop.  It's used to compute the
1954  * partial products like (11 * 13 * 15 * 17 * 19) in the example above.  It
1955  * operates recursively, repeatedly splitting the range into two roughly equal
1956  * pieces until the subranges are small enough to be computed using only C
1957  * integer arithmetic.
1958  *
1959  * The two-valuation k (i.e., the exponent of the largest power of 2 dividing
1960  * the factorial) is computed independently in the main math_factorial
1961  * function.  By standard results, its value is:
1962  *
1963  *    two_valuation = n//2 + n//4 + n//8 + ....
1964  *
1965  * It can be shown (e.g., by complete induction on n) that two_valuation is
1966  * equal to n - count_set_bits(n), where count_set_bits(n) gives the number of
1967  * '1'-bits in the binary expansion of n.
1968  */
1969 
1970 /* factorial_partial_product: Compute product(range(start, stop, 2)) using
1971  * divide and conquer.  Assumes start and stop are odd and stop > start.
1972  * max_bits must be >= bit_length(stop - 2). */
1973 
1974 static PyObject *
factorial_partial_product(unsigned long start,unsigned long stop,unsigned long max_bits)1975 factorial_partial_product(unsigned long start, unsigned long stop,
1976                           unsigned long max_bits)
1977 {
1978     unsigned long midpoint, num_operands;
1979     PyObject *left = NULL, *right = NULL, *result = NULL;
1980 
1981     /* If the return value will fit an unsigned long, then we can
1982      * multiply in a tight, fast loop where each multiply is O(1).
1983      * Compute an upper bound on the number of bits required to store
1984      * the answer.
1985      *
1986      * Storing some integer z requires floor(lg(z))+1 bits, which is
1987      * conveniently the value returned by bit_length(z).  The
1988      * product x*y will require at most
1989      * bit_length(x) + bit_length(y) bits to store, based
1990      * on the idea that lg product = lg x + lg y.
1991      *
1992      * We know that stop - 2 is the largest number to be multiplied.  From
1993      * there, we have: bit_length(answer) <= num_operands *
1994      * bit_length(stop - 2)
1995      */
1996 
1997     num_operands = (stop - start) / 2;
1998     /* The "num_operands <= 8 * SIZEOF_LONG" check guards against the
1999      * unlikely case of an overflow in num_operands * max_bits. */
2000     if (num_operands <= 8 * SIZEOF_LONG &&
2001         num_operands * max_bits <= 8 * SIZEOF_LONG) {
2002         unsigned long j, total;
2003         for (total = start, j = start + 2; j < stop; j += 2)
2004             total *= j;
2005         return PyLong_FromUnsignedLong(total);
2006     }
2007 
2008     /* find midpoint of range(start, stop), rounded up to next odd number. */
2009     midpoint = (start + num_operands) | 1;
2010     left = factorial_partial_product(start, midpoint,
2011                                      _Py_bit_length(midpoint - 2));
2012     if (left == NULL)
2013         goto error;
2014     right = factorial_partial_product(midpoint, stop, max_bits);
2015     if (right == NULL)
2016         goto error;
2017     result = PyNumber_Multiply(left, right);
2018 
2019   error:
2020     Py_XDECREF(left);
2021     Py_XDECREF(right);
2022     return result;
2023 }
2024 
2025 /* factorial_odd_part:  compute the odd part of factorial(n). */
2026 
2027 static PyObject *
factorial_odd_part(unsigned long n)2028 factorial_odd_part(unsigned long n)
2029 {
2030     long i;
2031     unsigned long v, lower, upper;
2032     PyObject *partial, *tmp, *inner, *outer;
2033 
2034     inner = PyLong_FromLong(1);
2035     if (inner == NULL)
2036         return NULL;
2037     outer = inner;
2038     Py_INCREF(outer);
2039 
2040     upper = 3;
2041     for (i = _Py_bit_length(n) - 2; i >= 0; i--) {
2042         v = n >> i;
2043         if (v <= 2)
2044             continue;
2045         lower = upper;
2046         /* (v + 1) | 1 = least odd integer strictly larger than n / 2**i */
2047         upper = (v + 1) | 1;
2048         /* Here inner is the product of all odd integers j in the range (0,
2049            n/2**(i+1)].  The factorial_partial_product call below gives the
2050            product of all odd integers j in the range (n/2**(i+1), n/2**i]. */
2051         partial = factorial_partial_product(lower, upper, _Py_bit_length(upper-2));
2052         /* inner *= partial */
2053         if (partial == NULL)
2054             goto error;
2055         tmp = PyNumber_Multiply(inner, partial);
2056         Py_DECREF(partial);
2057         if (tmp == NULL)
2058             goto error;
2059         Py_DECREF(inner);
2060         inner = tmp;
2061         /* Now inner is the product of all odd integers j in the range (0,
2062            n/2**i], giving the inner product in the formula above. */
2063 
2064         /* outer *= inner; */
2065         tmp = PyNumber_Multiply(outer, inner);
2066         if (tmp == NULL)
2067             goto error;
2068         Py_DECREF(outer);
2069         outer = tmp;
2070     }
2071     Py_DECREF(inner);
2072     return outer;
2073 
2074   error:
2075     Py_DECREF(outer);
2076     Py_DECREF(inner);
2077     return NULL;
2078 }
2079 
2080 
2081 /* Lookup table for small factorial values */
2082 
2083 static const unsigned long SmallFactorials[] = {
2084     1, 1, 2, 6, 24, 120, 720, 5040, 40320,
2085     362880, 3628800, 39916800, 479001600,
2086 #if SIZEOF_LONG >= 8
2087     6227020800, 87178291200, 1307674368000,
2088     20922789888000, 355687428096000, 6402373705728000,
2089     121645100408832000, 2432902008176640000
2090 #endif
2091 };
2092 
2093 /*[clinic input]
2094 math.factorial
2095 
2096     n as arg: object
2097     /
2098 
2099 Find n!.
2100 
2101 Raise a ValueError if x is negative or non-integral.
2102 [clinic start generated code]*/
2103 
2104 static PyObject *
math_factorial(PyObject * module,PyObject * arg)2105 math_factorial(PyObject *module, PyObject *arg)
2106 /*[clinic end generated code: output=6686f26fae00e9ca input=713fb771677e8c31]*/
2107 {
2108     long x, two_valuation;
2109     int overflow;
2110     PyObject *result, *odd_part;
2111 
2112     x = PyLong_AsLongAndOverflow(arg, &overflow);
2113     if (x == -1 && PyErr_Occurred()) {
2114         return NULL;
2115     }
2116     else if (overflow == 1) {
2117         PyErr_Format(PyExc_OverflowError,
2118                      "factorial() argument should not exceed %ld",
2119                      LONG_MAX);
2120         return NULL;
2121     }
2122     else if (overflow == -1 || x < 0) {
2123         PyErr_SetString(PyExc_ValueError,
2124                         "factorial() not defined for negative values");
2125         return NULL;
2126     }
2127 
2128     /* use lookup table if x is small */
2129     if (x < (long)Py_ARRAY_LENGTH(SmallFactorials))
2130         return PyLong_FromUnsignedLong(SmallFactorials[x]);
2131 
2132     /* else express in the form odd_part * 2**two_valuation, and compute as
2133        odd_part << two_valuation. */
2134     odd_part = factorial_odd_part(x);
2135     if (odd_part == NULL)
2136         return NULL;
2137     two_valuation = x - count_set_bits(x);
2138     result = _PyLong_Lshift(odd_part, two_valuation);
2139     Py_DECREF(odd_part);
2140     return result;
2141 }
2142 
2143 
2144 /*[clinic input]
2145 math.trunc
2146 
2147     x: object
2148     /
2149 
2150 Truncates the Real x to the nearest Integral toward 0.
2151 
2152 Uses the __trunc__ magic method.
2153 [clinic start generated code]*/
2154 
2155 static PyObject *
math_trunc(PyObject * module,PyObject * x)2156 math_trunc(PyObject *module, PyObject *x)
2157 /*[clinic end generated code: output=34b9697b707e1031 input=2168b34e0a09134d]*/
2158 {
2159     _Py_IDENTIFIER(__trunc__);
2160     PyObject *trunc, *result;
2161 
2162     if (PyFloat_CheckExact(x)) {
2163         return PyFloat_Type.tp_as_number->nb_int(x);
2164     }
2165 
2166     if (Py_TYPE(x)->tp_dict == NULL) {
2167         if (PyType_Ready(Py_TYPE(x)) < 0)
2168             return NULL;
2169     }
2170 
2171     trunc = _PyObject_LookupSpecialId(x, &PyId___trunc__);
2172     if (trunc == NULL) {
2173         if (!PyErr_Occurred())
2174             PyErr_Format(PyExc_TypeError,
2175                          "type %.100s doesn't define __trunc__ method",
2176                          Py_TYPE(x)->tp_name);
2177         return NULL;
2178     }
2179     result = _PyObject_CallNoArgs(trunc);
2180     Py_DECREF(trunc);
2181     return result;
2182 }
2183 
2184 
2185 /*[clinic input]
2186 math.frexp
2187 
2188     x: double
2189     /
2190 
2191 Return the mantissa and exponent of x, as pair (m, e).
2192 
2193 m is a float and e is an int, such that x = m * 2.**e.
2194 If x is 0, m and e are both 0.  Else 0.5 <= abs(m) < 1.0.
2195 [clinic start generated code]*/
2196 
2197 static PyObject *
math_frexp_impl(PyObject * module,double x)2198 math_frexp_impl(PyObject *module, double x)
2199 /*[clinic end generated code: output=03e30d252a15ad4a input=96251c9e208bc6e9]*/
2200 {
2201     int i;
2202     /* deal with special cases directly, to sidestep platform
2203        differences */
2204     if (Py_IS_NAN(x) || Py_IS_INFINITY(x) || !x) {
2205         i = 0;
2206     }
2207     else {
2208         x = frexp(x, &i);
2209     }
2210     return Py_BuildValue("(di)", x, i);
2211 }
2212 
2213 
2214 /*[clinic input]
2215 math.ldexp
2216 
2217     x: double
2218     i: object
2219     /
2220 
2221 Return x * (2**i).
2222 
2223 This is essentially the inverse of frexp().
2224 [clinic start generated code]*/
2225 
2226 static PyObject *
math_ldexp_impl(PyObject * module,double x,PyObject * i)2227 math_ldexp_impl(PyObject *module, double x, PyObject *i)
2228 /*[clinic end generated code: output=b6892f3c2df9cc6a input=17d5970c1a40a8c1]*/
2229 {
2230     double r;
2231     long exp;
2232     int overflow;
2233 
2234     if (PyLong_Check(i)) {
2235         /* on overflow, replace exponent with either LONG_MAX
2236            or LONG_MIN, depending on the sign. */
2237         exp = PyLong_AsLongAndOverflow(i, &overflow);
2238         if (exp == -1 && PyErr_Occurred())
2239             return NULL;
2240         if (overflow)
2241             exp = overflow < 0 ? LONG_MIN : LONG_MAX;
2242     }
2243     else {
2244         PyErr_SetString(PyExc_TypeError,
2245                         "Expected an int as second argument to ldexp.");
2246         return NULL;
2247     }
2248 
2249     if (x == 0. || !Py_IS_FINITE(x)) {
2250         /* NaNs, zeros and infinities are returned unchanged */
2251         r = x;
2252         errno = 0;
2253     } else if (exp > INT_MAX) {
2254         /* overflow */
2255         r = copysign(Py_HUGE_VAL, x);
2256         errno = ERANGE;
2257     } else if (exp < INT_MIN) {
2258         /* underflow to +-0 */
2259         r = copysign(0., x);
2260         errno = 0;
2261     } else {
2262         errno = 0;
2263         r = ldexp(x, (int)exp);
2264         if (Py_IS_INFINITY(r))
2265             errno = ERANGE;
2266     }
2267 
2268     if (errno && is_error(r))
2269         return NULL;
2270     return PyFloat_FromDouble(r);
2271 }
2272 
2273 
2274 /*[clinic input]
2275 math.modf
2276 
2277     x: double
2278     /
2279 
2280 Return the fractional and integer parts of x.
2281 
2282 Both results carry the sign of x and are floats.
2283 [clinic start generated code]*/
2284 
2285 static PyObject *
math_modf_impl(PyObject * module,double x)2286 math_modf_impl(PyObject *module, double x)
2287 /*[clinic end generated code: output=90cee0260014c3c0 input=b4cfb6786afd9035]*/
2288 {
2289     double y;
2290     /* some platforms don't do the right thing for NaNs and
2291        infinities, so we take care of special cases directly. */
2292     if (!Py_IS_FINITE(x)) {
2293         if (Py_IS_INFINITY(x))
2294             return Py_BuildValue("(dd)", copysign(0., x), x);
2295         else if (Py_IS_NAN(x))
2296             return Py_BuildValue("(dd)", x, x);
2297     }
2298 
2299     errno = 0;
2300     x = modf(x, &y);
2301     return Py_BuildValue("(dd)", x, y);
2302 }
2303 
2304 
2305 /* A decent logarithm is easy to compute even for huge ints, but libm can't
2306    do that by itself -- loghelper can.  func is log or log10, and name is
2307    "log" or "log10".  Note that overflow of the result isn't possible: an int
2308    can contain no more than INT_MAX * SHIFT bits, so has value certainly less
2309    than 2**(2**64 * 2**16) == 2**2**80, and log2 of that is 2**80, which is
2310    small enough to fit in an IEEE single.  log and log10 are even smaller.
2311    However, intermediate overflow is possible for an int if the number of bits
2312    in that int is larger than PY_SSIZE_T_MAX. */
2313 
2314 static PyObject*
loghelper(PyObject * arg,double (* func)(double),const char * funcname)2315 loghelper(PyObject* arg, double (*func)(double), const char *funcname)
2316 {
2317     /* If it is int, do it ourselves. */
2318     if (PyLong_Check(arg)) {
2319         double x, result;
2320         Py_ssize_t e;
2321 
2322         /* Negative or zero inputs give a ValueError. */
2323         if (Py_SIZE(arg) <= 0) {
2324             PyErr_SetString(PyExc_ValueError,
2325                             "math domain error");
2326             return NULL;
2327         }
2328 
2329         x = PyLong_AsDouble(arg);
2330         if (x == -1.0 && PyErr_Occurred()) {
2331             if (!PyErr_ExceptionMatches(PyExc_OverflowError))
2332                 return NULL;
2333             /* Here the conversion to double overflowed, but it's possible
2334                to compute the log anyway.  Clear the exception and continue. */
2335             PyErr_Clear();
2336             x = _PyLong_Frexp((PyLongObject *)arg, &e);
2337             if (x == -1.0 && PyErr_Occurred())
2338                 return NULL;
2339             /* Value is ~= x * 2**e, so the log ~= log(x) + log(2) * e. */
2340             result = func(x) + func(2.0) * e;
2341         }
2342         else
2343             /* Successfully converted x to a double. */
2344             result = func(x);
2345         return PyFloat_FromDouble(result);
2346     }
2347 
2348     /* Else let libm handle it by itself. */
2349     return math_1(arg, func, 0);
2350 }
2351 
2352 
2353 /*[clinic input]
2354 math.log
2355 
2356     x:    object
2357     [
2358     base: object(c_default="NULL") = math.e
2359     ]
2360     /
2361 
2362 Return the logarithm of x to the given base.
2363 
2364 If the base not specified, returns the natural logarithm (base e) of x.
2365 [clinic start generated code]*/
2366 
2367 static PyObject *
math_log_impl(PyObject * module,PyObject * x,int group_right_1,PyObject * base)2368 math_log_impl(PyObject *module, PyObject *x, int group_right_1,
2369               PyObject *base)
2370 /*[clinic end generated code: output=7b5a39e526b73fc9 input=0f62d5726cbfebbd]*/
2371 {
2372     PyObject *num, *den;
2373     PyObject *ans;
2374 
2375     num = loghelper(x, m_log, "log");
2376     if (num == NULL || base == NULL)
2377         return num;
2378 
2379     den = loghelper(base, m_log, "log");
2380     if (den == NULL) {
2381         Py_DECREF(num);
2382         return NULL;
2383     }
2384 
2385     ans = PyNumber_TrueDivide(num, den);
2386     Py_DECREF(num);
2387     Py_DECREF(den);
2388     return ans;
2389 }
2390 
2391 
2392 /*[clinic input]
2393 math.log2
2394 
2395     x: object
2396     /
2397 
2398 Return the base 2 logarithm of x.
2399 [clinic start generated code]*/
2400 
2401 static PyObject *
math_log2(PyObject * module,PyObject * x)2402 math_log2(PyObject *module, PyObject *x)
2403 /*[clinic end generated code: output=5425899a4d5d6acb input=08321262bae4f39b]*/
2404 {
2405     return loghelper(x, m_log2, "log2");
2406 }
2407 
2408 
2409 /*[clinic input]
2410 math.log10
2411 
2412     x: object
2413     /
2414 
2415 Return the base 10 logarithm of x.
2416 [clinic start generated code]*/
2417 
2418 static PyObject *
math_log10(PyObject * module,PyObject * x)2419 math_log10(PyObject *module, PyObject *x)
2420 /*[clinic end generated code: output=be72a64617df9c6f input=b2469d02c6469e53]*/
2421 {
2422     return loghelper(x, m_log10, "log10");
2423 }
2424 
2425 
2426 /*[clinic input]
2427 math.fmod
2428 
2429     x: double
2430     y: double
2431     /
2432 
2433 Return fmod(x, y), according to platform C.
2434 
2435 x % y may differ.
2436 [clinic start generated code]*/
2437 
2438 static PyObject *
math_fmod_impl(PyObject * module,double x,double y)2439 math_fmod_impl(PyObject *module, double x, double y)
2440 /*[clinic end generated code: output=7559d794343a27b5 input=4f84caa8cfc26a03]*/
2441 {
2442     double r;
2443     /* fmod(x, +/-Inf) returns x for finite x. */
2444     if (Py_IS_INFINITY(y) && Py_IS_FINITE(x))
2445         return PyFloat_FromDouble(x);
2446     errno = 0;
2447     r = fmod(x, y);
2448     if (Py_IS_NAN(r)) {
2449         if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
2450             errno = EDOM;
2451         else
2452             errno = 0;
2453     }
2454     if (errno && is_error(r))
2455         return NULL;
2456     else
2457         return PyFloat_FromDouble(r);
2458 }
2459 
2460 /*
2461 Given a *vec* of values, compute the vector norm:
2462 
2463     sqrt(sum(x ** 2 for x in vec))
2464 
2465 The *max* variable should be equal to the largest fabs(x).
2466 The *n* variable is the length of *vec*.
2467 If n==0, then *max* should be 0.0.
2468 If an infinity is present in the vec, *max* should be INF.
2469 The *found_nan* variable indicates whether some member of
2470 the *vec* is a NaN.
2471 
2472 To avoid overflow/underflow and to achieve high accuracy giving results
2473 that are almost always correctly rounded, four techniques are used:
2474 
2475 * lossless scaling using a power-of-two scaling factor
2476 * accurate squaring using Veltkamp-Dekker splitting [1]
2477 * compensated summation using a variant of the Neumaier algorithm [2]
2478 * differential correction of the square root [3]
2479 
2480 The usual presentation of the Neumaier summation algorithm has an
2481 expensive branch depending on which operand has the larger
2482 magnitude.  We avoid this cost by arranging the calculation so that
2483 fabs(csum) is always as large as fabs(x).
2484 
2485 To establish the invariant, *csum* is initialized to 1.0 which is
2486 always larger than x**2 after scaling or after division by *max*.
2487 After the loop is finished, the initial 1.0 is subtracted out for a
2488 net zero effect on the final sum.  Since *csum* will be greater than
2489 1.0, the subtraction of 1.0 will not cause fractional digits to be
2490 dropped from *csum*.
2491 
2492 To get the full benefit from compensated summation, the largest
2493 addend should be in the range: 0.5 <= |x| <= 1.0.  Accordingly,
2494 scaling or division by *max* should not be skipped even if not
2495 otherwise needed to prevent overflow or loss of precision.
2496 
2497 The assertion that hi*hi <= 1.0 is a bit subtle.  Each vector element
2498 gets scaled to a magnitude below 1.0.  The Veltkamp-Dekker splitting
2499 algorithm gives a *hi* value that is correctly rounded to half
2500 precision.  When a value at or below 1.0 is correctly rounded, it
2501 never goes above 1.0.  And when values at or below 1.0 are squared,
2502 they remain at or below 1.0, thus preserving the summation invariant.
2503 
2504 Another interesting assertion is that csum+lo*lo == csum. In the loop,
2505 each scaled vector element has a magnitude less than 1.0.  After the
2506 Veltkamp split, *lo* has a maximum value of 2**-27.  So the maximum
2507 value of *lo* squared is 2**-54.  The value of ulp(1.0)/2.0 is 2**-53.
2508 Given that csum >= 1.0, we have:
2509     lo**2 <= 2**-54 < 2**-53 == 1/2*ulp(1.0) <= ulp(csum)/2
2510 Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum.
2511 
2512 To minimize loss of information during the accumulation of fractional
2513 values, each term has a separate accumulator.  This also breaks up
2514 sequential dependencies in the inner loop so the CPU can maximize
2515 floating point throughput. [4]  On a 2.6 GHz Haswell, adding one
2516 dimension has an incremental cost of only 5ns -- for example when
2517 moving from hypot(x,y) to hypot(x,y,z).
2518 
2519 The square root differential correction is needed because a
2520 correctly rounded square root of a correctly rounded sum of
2521 squares can still be off by as much as one ulp.
2522 
2523 The differential correction starts with a value *x* that is
2524 the difference between the square of *h*, the possibly inaccurately
2525 rounded square root, and the accurately computed sum of squares.
2526 The correction is the first order term of the Maclaurin series
2527 expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [5]
2528 
2529 Essentially, this differential correction is equivalent to one
2530 refinement step in Newton's divide-and-average square root
2531 algorithm, effectively doubling the number of accurate bits.
2532 This technique is used in Dekker's SQRT2 algorithm and again in
2533 Borges' ALGORITHM 4 and 5.
2534 
2535 Without proof for all cases, hypot() cannot claim to be always
2536 correctly rounded.  However for n <= 1000, prior to the final addition
2537 that rounds the overall result, the internal accuracy of "h" together
2538 with its correction of "x / (2.0 * h)" is at least 100 bits. [6]
2539 Also, hypot() was tested against a Decimal implementation with
2540 prec=300.  After 100 million trials, no incorrectly rounded examples
2541 were found.  In addition, perfect commutativity (all permutations are
2542 exactly equal) was verified for 1 billion random inputs with n=5. [7]
2543 
2544 References:
2545 
2546 1. Veltkamp-Dekker splitting: http://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
2547 2. Compensated summation:  http://www.ti3.tu-harburg.de/paper/rump/Ru08b.pdf
2548 3. Square root differential correction:  https://arxiv.org/pdf/1904.09481.pdf
2549 4. Data dependency graph:  https://bugs.python.org/file49439/hypot.png
2550 5. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
2551 6. Analysis of internal accuracy:  https://bugs.python.org/file49484/best_frac.py
2552 7. Commutativity test:  https://bugs.python.org/file49448/test_hypot_commutativity.py
2553 
2554 */
2555 
2556 static inline double
vector_norm(Py_ssize_t n,double * vec,double max,int found_nan)2557 vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
2558 {
2559     const double T27 = 134217729.0;     /* ldexp(1.0, 27) + 1.0) */
2560     double x, scale, oldcsum, csum = 1.0, frac1 = 0.0, frac2 = 0.0, frac3 = 0.0;
2561     double t, hi, lo, h;
2562     int max_e;
2563     Py_ssize_t i;
2564 
2565     if (Py_IS_INFINITY(max)) {
2566         return max;
2567     }
2568     if (found_nan) {
2569         return Py_NAN;
2570     }
2571     if (max == 0.0 || n <= 1) {
2572         return max;
2573     }
2574     frexp(max, &max_e);
2575     if (max_e >= -1023) {
2576         scale = ldexp(1.0, -max_e);
2577         assert(max * scale >= 0.5);
2578         assert(max * scale < 1.0);
2579         for (i=0 ; i < n ; i++) {
2580             x = vec[i];
2581             assert(Py_IS_FINITE(x) && fabs(x) <= max);
2582 
2583             x *= scale;
2584             assert(fabs(x) < 1.0);
2585 
2586             t = x * T27;
2587             hi = t - (t - x);
2588             lo = x - hi;
2589             assert(hi + lo == x);
2590 
2591             x = hi * hi;
2592             assert(x <= 1.0);
2593             assert(fabs(csum) >= fabs(x));
2594             oldcsum = csum;
2595             csum += x;
2596             frac1 += (oldcsum - csum) + x;
2597 
2598             x = 2.0 * hi * lo;
2599             assert(fabs(csum) >= fabs(x));
2600             oldcsum = csum;
2601             csum += x;
2602             frac2 += (oldcsum - csum) + x;
2603 
2604             assert(csum + lo * lo == csum);
2605             frac3 += lo * lo;
2606         }
2607         h = sqrt(csum - 1.0 + (frac1 + frac2 + frac3));
2608 
2609         x = h;
2610         t = x * T27;
2611         hi = t - (t - x);
2612         lo = x - hi;
2613         assert (hi + lo == x);
2614 
2615         x = -hi * hi;
2616         assert(fabs(csum) >= fabs(x));
2617         oldcsum = csum;
2618         csum += x;
2619         frac1 += (oldcsum - csum) + x;
2620 
2621         x = -2.0 * hi * lo;
2622         assert(fabs(csum) >= fabs(x));
2623         oldcsum = csum;
2624         csum += x;
2625         frac2 += (oldcsum - csum) + x;
2626 
2627         x = -lo * lo;
2628         assert(fabs(csum) >= fabs(x));
2629         oldcsum = csum;
2630         csum += x;
2631         frac3 += (oldcsum - csum) + x;
2632 
2633         x = csum - 1.0 + (frac1 + frac2 + frac3);
2634         return (h + x / (2.0 * h)) / scale;
2635     }
2636     /* When max_e < -1023, ldexp(1.0, -max_e) overflows.
2637        So instead of multiplying by a scale, we just divide by *max*.
2638     */
2639     for (i=0 ; i < n ; i++) {
2640         x = vec[i];
2641         assert(Py_IS_FINITE(x) && fabs(x) <= max);
2642         x /= max;
2643         x = x*x;
2644         assert(x <= 1.0);
2645         assert(fabs(csum) >= fabs(x));
2646         oldcsum = csum;
2647         csum += x;
2648         frac1 += (oldcsum - csum) + x;
2649     }
2650     return max * sqrt(csum - 1.0 + frac1);
2651 }
2652 
2653 #define NUM_STACK_ELEMS 16
2654 
2655 /*[clinic input]
2656 math.dist
2657 
2658     p: object
2659     q: object
2660     /
2661 
2662 Return the Euclidean distance between two points p and q.
2663 
2664 The points should be specified as sequences (or iterables) of
2665 coordinates.  Both inputs must have the same dimension.
2666 
2667 Roughly equivalent to:
2668     sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q)))
2669 [clinic start generated code]*/
2670 
2671 static PyObject *
math_dist_impl(PyObject * module,PyObject * p,PyObject * q)2672 math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
2673 /*[clinic end generated code: output=56bd9538d06bbcfe input=74e85e1b6092e68e]*/
2674 {
2675     PyObject *item;
2676     double max = 0.0;
2677     double x, px, qx, result;
2678     Py_ssize_t i, m, n;
2679     int found_nan = 0, p_allocated = 0, q_allocated = 0;
2680     double diffs_on_stack[NUM_STACK_ELEMS];
2681     double *diffs = diffs_on_stack;
2682 
2683     if (!PyTuple_Check(p)) {
2684         p = PySequence_Tuple(p);
2685         if (p == NULL) {
2686             return NULL;
2687         }
2688         p_allocated = 1;
2689     }
2690     if (!PyTuple_Check(q)) {
2691         q = PySequence_Tuple(q);
2692         if (q == NULL) {
2693             if (p_allocated) {
2694                 Py_DECREF(p);
2695             }
2696             return NULL;
2697         }
2698         q_allocated = 1;
2699     }
2700 
2701     m = PyTuple_GET_SIZE(p);
2702     n = PyTuple_GET_SIZE(q);
2703     if (m != n) {
2704         PyErr_SetString(PyExc_ValueError,
2705                         "both points must have the same number of dimensions");
2706         goto error_exit;
2707     }
2708     if (n > NUM_STACK_ELEMS) {
2709         diffs = (double *) PyObject_Malloc(n * sizeof(double));
2710         if (diffs == NULL) {
2711             PyErr_NoMemory();
2712             goto error_exit;
2713         }
2714     }
2715     for (i=0 ; i<n ; i++) {
2716         item = PyTuple_GET_ITEM(p, i);
2717         ASSIGN_DOUBLE(px, item, error_exit);
2718         item = PyTuple_GET_ITEM(q, i);
2719         ASSIGN_DOUBLE(qx, item, error_exit);
2720         x = fabs(px - qx);
2721         diffs[i] = x;
2722         found_nan |= Py_IS_NAN(x);
2723         if (x > max) {
2724             max = x;
2725         }
2726     }
2727     result = vector_norm(n, diffs, max, found_nan);
2728     if (diffs != diffs_on_stack) {
2729         PyObject_Free(diffs);
2730     }
2731     if (p_allocated) {
2732         Py_DECREF(p);
2733     }
2734     if (q_allocated) {
2735         Py_DECREF(q);
2736     }
2737     return PyFloat_FromDouble(result);
2738 
2739   error_exit:
2740     if (diffs != diffs_on_stack) {
2741         PyObject_Free(diffs);
2742     }
2743     if (p_allocated) {
2744         Py_DECREF(p);
2745     }
2746     if (q_allocated) {
2747         Py_DECREF(q);
2748     }
2749     return NULL;
2750 }
2751 
2752 /* AC: cannot convert yet, waiting for *args support */
2753 static PyObject *
math_hypot(PyObject * self,PyObject * const * args,Py_ssize_t nargs)2754 math_hypot(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
2755 {
2756     Py_ssize_t i;
2757     PyObject *item;
2758     double max = 0.0;
2759     double x, result;
2760     int found_nan = 0;
2761     double coord_on_stack[NUM_STACK_ELEMS];
2762     double *coordinates = coord_on_stack;
2763 
2764     if (nargs > NUM_STACK_ELEMS) {
2765         coordinates = (double *) PyObject_Malloc(nargs * sizeof(double));
2766         if (coordinates == NULL) {
2767             return PyErr_NoMemory();
2768         }
2769     }
2770     for (i = 0; i < nargs; i++) {
2771         item = args[i];
2772         ASSIGN_DOUBLE(x, item, error_exit);
2773         x = fabs(x);
2774         coordinates[i] = x;
2775         found_nan |= Py_IS_NAN(x);
2776         if (x > max) {
2777             max = x;
2778         }
2779     }
2780     result = vector_norm(nargs, coordinates, max, found_nan);
2781     if (coordinates != coord_on_stack) {
2782         PyObject_Free(coordinates);
2783     }
2784     return PyFloat_FromDouble(result);
2785 
2786   error_exit:
2787     if (coordinates != coord_on_stack) {
2788         PyObject_Free(coordinates);
2789     }
2790     return NULL;
2791 }
2792 
2793 #undef NUM_STACK_ELEMS
2794 
2795 PyDoc_STRVAR(math_hypot_doc,
2796              "hypot(*coordinates) -> value\n\n\
2797 Multidimensional Euclidean distance from the origin to a point.\n\
2798 \n\
2799 Roughly equivalent to:\n\
2800     sqrt(sum(x**2 for x in coordinates))\n\
2801 \n\
2802 For a two dimensional point (x, y), gives the hypotenuse\n\
2803 using the Pythagorean theorem:  sqrt(x*x + y*y).\n\
2804 \n\
2805 For example, the hypotenuse of a 3/4/5 right triangle is:\n\
2806 \n\
2807     >>> hypot(3.0, 4.0)\n\
2808     5.0\n\
2809 ");
2810 
2811 /* pow can't use math_2, but needs its own wrapper: the problem is
2812    that an infinite result can arise either as a result of overflow
2813    (in which case OverflowError should be raised) or as a result of
2814    e.g. 0.**-5. (for which ValueError needs to be raised.)
2815 */
2816 
2817 /*[clinic input]
2818 math.pow
2819 
2820     x: double
2821     y: double
2822     /
2823 
2824 Return x**y (x to the power of y).
2825 [clinic start generated code]*/
2826 
2827 static PyObject *
math_pow_impl(PyObject * module,double x,double y)2828 math_pow_impl(PyObject *module, double x, double y)
2829 /*[clinic end generated code: output=fff93e65abccd6b0 input=c26f1f6075088bfd]*/
2830 {
2831     double r;
2832     int odd_y;
2833 
2834     /* deal directly with IEEE specials, to cope with problems on various
2835        platforms whose semantics don't exactly match C99 */
2836     r = 0.; /* silence compiler warning */
2837     if (!Py_IS_FINITE(x) || !Py_IS_FINITE(y)) {
2838         errno = 0;
2839         if (Py_IS_NAN(x))
2840             r = y == 0. ? 1. : x; /* NaN**0 = 1 */
2841         else if (Py_IS_NAN(y))
2842             r = x == 1. ? 1. : y; /* 1**NaN = 1 */
2843         else if (Py_IS_INFINITY(x)) {
2844             odd_y = Py_IS_FINITE(y) && fmod(fabs(y), 2.0) == 1.0;
2845             if (y > 0.)
2846                 r = odd_y ? x : fabs(x);
2847             else if (y == 0.)
2848                 r = 1.;
2849             else /* y < 0. */
2850                 r = odd_y ? copysign(0., x) : 0.;
2851         }
2852         else if (Py_IS_INFINITY(y)) {
2853             if (fabs(x) == 1.0)
2854                 r = 1.;
2855             else if (y > 0. && fabs(x) > 1.0)
2856                 r = y;
2857             else if (y < 0. && fabs(x) < 1.0) {
2858                 r = -y; /* result is +inf */
2859             }
2860             else
2861                 r = 0.;
2862         }
2863     }
2864     else {
2865         /* let libm handle finite**finite */
2866         errno = 0;
2867         r = pow(x, y);
2868         /* a NaN result should arise only from (-ve)**(finite
2869            non-integer); in this case we want to raise ValueError. */
2870         if (!Py_IS_FINITE(r)) {
2871             if (Py_IS_NAN(r)) {
2872                 errno = EDOM;
2873             }
2874             /*
2875                an infinite result here arises either from:
2876                (A) (+/-0.)**negative (-> divide-by-zero)
2877                (B) overflow of x**y with x and y finite
2878             */
2879             else if (Py_IS_INFINITY(r)) {
2880                 if (x == 0.)
2881                     errno = EDOM;
2882                 else
2883                     errno = ERANGE;
2884             }
2885         }
2886     }
2887 
2888     if (errno && is_error(r))
2889         return NULL;
2890     else
2891         return PyFloat_FromDouble(r);
2892 }
2893 
2894 
2895 static const double degToRad = Py_MATH_PI / 180.0;
2896 static const double radToDeg = 180.0 / Py_MATH_PI;
2897 
2898 /*[clinic input]
2899 math.degrees
2900 
2901     x: double
2902     /
2903 
2904 Convert angle x from radians to degrees.
2905 [clinic start generated code]*/
2906 
2907 static PyObject *
math_degrees_impl(PyObject * module,double x)2908 math_degrees_impl(PyObject *module, double x)
2909 /*[clinic end generated code: output=7fea78b294acd12f input=81e016555d6e3660]*/
2910 {
2911     return PyFloat_FromDouble(x * radToDeg);
2912 }
2913 
2914 
2915 /*[clinic input]
2916 math.radians
2917 
2918     x: double
2919     /
2920 
2921 Convert angle x from degrees to radians.
2922 [clinic start generated code]*/
2923 
2924 static PyObject *
math_radians_impl(PyObject * module,double x)2925 math_radians_impl(PyObject *module, double x)
2926 /*[clinic end generated code: output=34daa47caf9b1590 input=91626fc489fe3d63]*/
2927 {
2928     return PyFloat_FromDouble(x * degToRad);
2929 }
2930 
2931 
2932 /*[clinic input]
2933 math.isfinite
2934 
2935     x: double
2936     /
2937 
2938 Return True if x is neither an infinity nor a NaN, and False otherwise.
2939 [clinic start generated code]*/
2940 
2941 static PyObject *
math_isfinite_impl(PyObject * module,double x)2942 math_isfinite_impl(PyObject *module, double x)
2943 /*[clinic end generated code: output=8ba1f396440c9901 input=46967d254812e54a]*/
2944 {
2945     return PyBool_FromLong((long)Py_IS_FINITE(x));
2946 }
2947 
2948 
2949 /*[clinic input]
2950 math.isnan
2951 
2952     x: double
2953     /
2954 
2955 Return True if x is a NaN (not a number), and False otherwise.
2956 [clinic start generated code]*/
2957 
2958 static PyObject *
math_isnan_impl(PyObject * module,double x)2959 math_isnan_impl(PyObject *module, double x)
2960 /*[clinic end generated code: output=f537b4d6df878c3e input=935891e66083f46a]*/
2961 {
2962     return PyBool_FromLong((long)Py_IS_NAN(x));
2963 }
2964 
2965 
2966 /*[clinic input]
2967 math.isinf
2968 
2969     x: double
2970     /
2971 
2972 Return True if x is a positive or negative infinity, and False otherwise.
2973 [clinic start generated code]*/
2974 
2975 static PyObject *
math_isinf_impl(PyObject * module,double x)2976 math_isinf_impl(PyObject *module, double x)
2977 /*[clinic end generated code: output=9f00cbec4de7b06b input=32630e4212cf961f]*/
2978 {
2979     return PyBool_FromLong((long)Py_IS_INFINITY(x));
2980 }
2981 
2982 
2983 /*[clinic input]
2984 math.isclose -> bool
2985 
2986     a: double
2987     b: double
2988     *
2989     rel_tol: double = 1e-09
2990         maximum difference for being considered "close", relative to the
2991         magnitude of the input values
2992     abs_tol: double = 0.0
2993         maximum difference for being considered "close", regardless of the
2994         magnitude of the input values
2995 
2996 Determine whether two floating point numbers are close in value.
2997 
2998 Return True if a is close in value to b, and False otherwise.
2999 
3000 For the values to be considered close, the difference between them
3001 must be smaller than at least one of the tolerances.
3002 
3003 -inf, inf and NaN behave similarly to the IEEE 754 Standard.  That
3004 is, NaN is not close to anything, even itself.  inf and -inf are
3005 only close to themselves.
3006 [clinic start generated code]*/
3007 
3008 static int
math_isclose_impl(PyObject * module,double a,double b,double rel_tol,double abs_tol)3009 math_isclose_impl(PyObject *module, double a, double b, double rel_tol,
3010                   double abs_tol)
3011 /*[clinic end generated code: output=b73070207511952d input=f28671871ea5bfba]*/
3012 {
3013     double diff = 0.0;
3014 
3015     /* sanity check on the inputs */
3016     if (rel_tol < 0.0 || abs_tol < 0.0 ) {
3017         PyErr_SetString(PyExc_ValueError,
3018                         "tolerances must be non-negative");
3019         return -1;
3020     }
3021 
3022     if ( a == b ) {
3023         /* short circuit exact equality -- needed to catch two infinities of
3024            the same sign. And perhaps speeds things up a bit sometimes.
3025         */
3026         return 1;
3027     }
3028 
3029     /* This catches the case of two infinities of opposite sign, or
3030        one infinity and one finite number. Two infinities of opposite
3031        sign would otherwise have an infinite relative tolerance.
3032        Two infinities of the same sign are caught by the equality check
3033        above.
3034     */
3035 
3036     if (Py_IS_INFINITY(a) || Py_IS_INFINITY(b)) {
3037         return 0;
3038     }
3039 
3040     /* now do the regular computation
3041        this is essentially the "weak" test from the Boost library
3042     */
3043 
3044     diff = fabs(b - a);
3045 
3046     return (((diff <= fabs(rel_tol * b)) ||
3047              (diff <= fabs(rel_tol * a))) ||
3048             (diff <= abs_tol));
3049 }
3050 
3051 static inline int
_check_long_mult_overflow(long a,long b)3052 _check_long_mult_overflow(long a, long b) {
3053 
3054     /* From Python2's int_mul code:
3055 
3056     Integer overflow checking for * is painful:  Python tried a couple ways, but
3057     they didn't work on all platforms, or failed in endcases (a product of
3058     -sys.maxint-1 has been a particular pain).
3059 
3060     Here's another way:
3061 
3062     The native long product x*y is either exactly right or *way* off, being
3063     just the last n bits of the true product, where n is the number of bits
3064     in a long (the delivered product is the true product plus i*2**n for
3065     some integer i).
3066 
3067     The native double product (double)x * (double)y is subject to three
3068     rounding errors:  on a sizeof(long)==8 box, each cast to double can lose
3069     info, and even on a sizeof(long)==4 box, the multiplication can lose info.
3070     But, unlike the native long product, it's not in *range* trouble:  even
3071     if sizeof(long)==32 (256-bit longs), the product easily fits in the
3072     dynamic range of a double.  So the leading 50 (or so) bits of the double
3073     product are correct.
3074 
3075     We check these two ways against each other, and declare victory if they're
3076     approximately the same.  Else, because the native long product is the only
3077     one that can lose catastrophic amounts of information, it's the native long
3078     product that must have overflowed.
3079 
3080     */
3081 
3082     long longprod = (long)((unsigned long)a * b);
3083     double doubleprod = (double)a * (double)b;
3084     double doubled_longprod = (double)longprod;
3085 
3086     if (doubled_longprod == doubleprod) {
3087         return 0;
3088     }
3089 
3090     const double diff = doubled_longprod - doubleprod;
3091     const double absdiff = diff >= 0.0 ? diff : -diff;
3092     const double absprod = doubleprod >= 0.0 ? doubleprod : -doubleprod;
3093 
3094     if (32.0 * absdiff <= absprod) {
3095         return 0;
3096     }
3097 
3098     return 1;
3099 }
3100 
3101 /*[clinic input]
3102 math.prod
3103 
3104     iterable: object
3105     /
3106     *
3107     start: object(c_default="NULL") = 1
3108 
3109 Calculate the product of all the elements in the input iterable.
3110 
3111 The default start value for the product is 1.
3112 
3113 When the iterable is empty, return the start value.  This function is
3114 intended specifically for use with numeric values and may reject
3115 non-numeric types.
3116 [clinic start generated code]*/
3117 
3118 static PyObject *
math_prod_impl(PyObject * module,PyObject * iterable,PyObject * start)3119 math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
3120 /*[clinic end generated code: output=36153bedac74a198 input=4c5ab0682782ed54]*/
3121 {
3122     PyObject *result = start;
3123     PyObject *temp, *item, *iter;
3124 
3125     iter = PyObject_GetIter(iterable);
3126     if (iter == NULL) {
3127         return NULL;
3128     }
3129 
3130     if (result == NULL) {
3131         result = _PyLong_GetOne();
3132     }
3133     Py_INCREF(result);
3134 #ifndef SLOW_PROD
3135     /* Fast paths for integers keeping temporary products in C.
3136      * Assumes all inputs are the same type.
3137      * If the assumption fails, default to use PyObjects instead.
3138     */
3139     if (PyLong_CheckExact(result)) {
3140         int overflow;
3141         long i_result = PyLong_AsLongAndOverflow(result, &overflow);
3142         /* If this already overflowed, don't even enter the loop. */
3143         if (overflow == 0) {
3144             Py_DECREF(result);
3145             result = NULL;
3146         }
3147         /* Loop over all the items in the iterable until we finish, we overflow
3148          * or we found a non integer element */
3149         while (result == NULL) {
3150             item = PyIter_Next(iter);
3151             if (item == NULL) {
3152                 Py_DECREF(iter);
3153                 if (PyErr_Occurred()) {
3154                     return NULL;
3155                 }
3156                 return PyLong_FromLong(i_result);
3157             }
3158             if (PyLong_CheckExact(item)) {
3159                 long b = PyLong_AsLongAndOverflow(item, &overflow);
3160                 if (overflow == 0 && !_check_long_mult_overflow(i_result, b)) {
3161                     long x = i_result * b;
3162                     i_result = x;
3163                     Py_DECREF(item);
3164                     continue;
3165                 }
3166             }
3167             /* Either overflowed or is not an int.
3168              * Restore real objects and process normally */
3169             result = PyLong_FromLong(i_result);
3170             if (result == NULL) {
3171                 Py_DECREF(item);
3172                 Py_DECREF(iter);
3173                 return NULL;
3174             }
3175             temp = PyNumber_Multiply(result, item);
3176             Py_DECREF(result);
3177             Py_DECREF(item);
3178             result = temp;
3179             if (result == NULL) {
3180                 Py_DECREF(iter);
3181                 return NULL;
3182             }
3183         }
3184     }
3185 
3186     /* Fast paths for floats keeping temporary products in C.
3187      * Assumes all inputs are the same type.
3188      * If the assumption fails, default to use PyObjects instead.
3189     */
3190     if (PyFloat_CheckExact(result)) {
3191         double f_result = PyFloat_AS_DOUBLE(result);
3192         Py_DECREF(result);
3193         result = NULL;
3194         while(result == NULL) {
3195             item = PyIter_Next(iter);
3196             if (item == NULL) {
3197                 Py_DECREF(iter);
3198                 if (PyErr_Occurred()) {
3199                     return NULL;
3200                 }
3201                 return PyFloat_FromDouble(f_result);
3202             }
3203             if (PyFloat_CheckExact(item)) {
3204                 f_result *= PyFloat_AS_DOUBLE(item);
3205                 Py_DECREF(item);
3206                 continue;
3207             }
3208             if (PyLong_CheckExact(item)) {
3209                 long value;
3210                 int overflow;
3211                 value = PyLong_AsLongAndOverflow(item, &overflow);
3212                 if (!overflow) {
3213                     f_result *= (double)value;
3214                     Py_DECREF(item);
3215                     continue;
3216                 }
3217             }
3218             result = PyFloat_FromDouble(f_result);
3219             if (result == NULL) {
3220                 Py_DECREF(item);
3221                 Py_DECREF(iter);
3222                 return NULL;
3223             }
3224             temp = PyNumber_Multiply(result, item);
3225             Py_DECREF(result);
3226             Py_DECREF(item);
3227             result = temp;
3228             if (result == NULL) {
3229                 Py_DECREF(iter);
3230                 return NULL;
3231             }
3232         }
3233     }
3234 #endif
3235     /* Consume rest of the iterable (if any) that could not be handled
3236      * by specialized functions above.*/
3237     for(;;) {
3238         item = PyIter_Next(iter);
3239         if (item == NULL) {
3240             /* error, or end-of-sequence */
3241             if (PyErr_Occurred()) {
3242                 Py_DECREF(result);
3243                 result = NULL;
3244             }
3245             break;
3246         }
3247         temp = PyNumber_Multiply(result, item);
3248         Py_DECREF(result);
3249         Py_DECREF(item);
3250         result = temp;
3251         if (result == NULL)
3252             break;
3253     }
3254     Py_DECREF(iter);
3255     return result;
3256 }
3257 
3258 
3259 /* least significant 64 bits of the odd part of factorial(n), for n in range(128).
3260 
3261 Python code to generate the values:
3262 
3263     import math
3264 
3265     for n in range(128):
3266         fac = math.factorial(n)
3267         fac_odd_part = fac // (fac & -fac)
3268         reduced_fac_odd_part = fac_odd_part % (2**64)
3269         print(f"{reduced_fac_odd_part:#018x}u")
3270 */
3271 static const uint64_t reduced_factorial_odd_part[] = {
3272     0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000003u,
3273     0x0000000000000003u, 0x000000000000000fu, 0x000000000000002du, 0x000000000000013bu,
3274     0x000000000000013bu, 0x0000000000000b13u, 0x000000000000375fu, 0x0000000000026115u,
3275     0x000000000007233fu, 0x00000000005cca33u, 0x0000000002898765u, 0x00000000260eeeebu,
3276     0x00000000260eeeebu, 0x0000000286fddd9bu, 0x00000016beecca73u, 0x000001b02b930689u,
3277     0x00000870d9df20adu, 0x0000b141df4dae31u, 0x00079dd498567c1bu, 0x00af2e19afc5266du,
3278     0x020d8a4d0f4f7347u, 0x335281867ec241efu, 0x9b3093d46fdd5923u, 0x5e1f9767cc5866b1u,
3279     0x92dd23d6966aced7u, 0xa30d0f4f0a196e5bu, 0x8dc3e5a1977d7755u, 0x2ab8ce915831734bu,
3280     0x2ab8ce915831734bu, 0x81d2a0bc5e5fdcabu, 0x9efcac82445da75bu, 0xbc8b95cf58cde171u,
3281     0xa0e8444a1f3cecf9u, 0x4191deb683ce3ffdu, 0xddd3878bc84ebfc7u, 0xcb39a64b83ff3751u,
3282     0xf8203f7993fc1495u, 0xbd2a2a78b35f4bddu, 0x84757be6b6d13921u, 0x3fbbcfc0b524988bu,
3283     0xbd11ed47c8928df9u, 0x3c26b59e41c2f4c5u, 0x677a5137e883fdb3u, 0xff74e943b03b93ddu,
3284     0xfe5ebbcb10b2bb97u, 0xb021f1de3235e7e7u, 0x33509eb2e743a58fu, 0x390f9da41279fb7du,
3285     0xe5cb0154f031c559u, 0x93074695ba4ddb6du, 0x81c471caa636247fu, 0xe1347289b5a1d749u,
3286     0x286f21c3f76ce2ffu, 0x00be84a2173e8ac7u, 0x1595065ca215b88bu, 0xf95877595b018809u,
3287     0x9c2efe3c5516f887u, 0x373294604679382bu, 0xaf1ff7a888adcd35u, 0x18ddf279a2c5800bu,
3288     0x18ddf279a2c5800bu, 0x505a90e2542582cbu, 0x5bacad2cd8d5dc2bu, 0xfe3152bcbff89f41u,
3289     0xe1467e88bf829351u, 0xb8001adb9e31b4d5u, 0x2803ac06a0cbb91fu, 0x1904b5d698805799u,
3290     0xe12a648b5c831461u, 0x3516abbd6160cfa9u, 0xac46d25f12fe036du, 0x78bfa1da906b00efu,
3291     0xf6390338b7f111bdu, 0x0f25f80f538255d9u, 0x4ec8ca55b8db140fu, 0x4ff670740b9b30a1u,
3292     0x8fd032443a07f325u, 0x80dfe7965c83eeb5u, 0xa3dc1714d1213afdu, 0x205b7bbfcdc62007u,
3293     0xa78126bbe140a093u, 0x9de1dc61ca7550cfu, 0x84f0046d01b492c5u, 0x2d91810b945de0f3u,
3294     0xf5408b7f6008aa71u, 0x43707f4863034149u, 0xdac65fb9679279d5u, 0xc48406e7d1114eb7u,
3295     0xa7dc9ed3c88e1271u, 0xfb25b2efdb9cb30du, 0x1bebda0951c4df63u, 0x5c85e975580ee5bdu,
3296     0x1591bc60082cb137u, 0x2c38606318ef25d7u, 0x76ca72f7c5c63e27u, 0xf04a75d17baa0915u,
3297     0x77458175139ae30du, 0x0e6c1330bc1b9421u, 0xdf87d2b5797e8293u, 0xefa5c703e1e68925u,
3298     0x2b6b1b3278b4f6e1u, 0xceee27b382394249u, 0xd74e3829f5dab91du, 0xfdb17989c26b5f1fu,
3299     0xc1b7d18781530845u, 0x7b4436b2105a8561u, 0x7ba7c0418372a7d7u, 0x9dbc5c67feb6c639u,
3300     0x502686d7f6ff6b8fu, 0x6101855406be7a1fu, 0x9956afb5806930e7u, 0xe1f0ee88af40f7c5u,
3301     0x984b057bda5c1151u, 0x9a49819acc13ea05u, 0x8ef0dead0896ef27u, 0x71f7826efe292b21u,
3302     0xad80a480e46986efu, 0x01cdc0ebf5e0c6f7u, 0x6e06f839968f68dbu, 0xdd5943ab56e76139u,
3303     0xcdcf31bf8604c5e7u, 0x7e2b4a847054a1cbu, 0x0ca75697a4d3d0f5u, 0x4703f53ac514a98bu,
3304 };
3305 
3306 /* inverses of reduced_factorial_odd_part values modulo 2**64.
3307 
3308 Python code to generate the values:
3309 
3310     import math
3311 
3312     for n in range(128):
3313         fac = math.factorial(n)
3314         fac_odd_part = fac // (fac & -fac)
3315         inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
3316         print(f"{inverted_fac_odd_part:#018x}u")
3317 */
3318 static const uint64_t inverted_factorial_odd_part[] = {
3319     0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0xaaaaaaaaaaaaaaabu,
3320     0xaaaaaaaaaaaaaaabu, 0xeeeeeeeeeeeeeeefu, 0x4fa4fa4fa4fa4fa5u, 0x2ff2ff2ff2ff2ff3u,
3321     0x2ff2ff2ff2ff2ff3u, 0x938cc70553e3771bu, 0xb71c27cddd93e49fu, 0xb38e3229fcdee63du,
3322     0xe684bb63544a4cbfu, 0xc2f684917ca340fbu, 0xf747c9cba417526du, 0xbb26eb51d7bd49c3u,
3323     0xbb26eb51d7bd49c3u, 0xb0a7efb985294093u, 0xbe4b8c69f259eabbu, 0x6854d17ed6dc4fb9u,
3324     0xe1aa904c915f4325u, 0x3b8206df131cead1u, 0x79c6009fea76fe13u, 0xd8c5d381633cd365u,
3325     0x4841f12b21144677u, 0x4a91ff68200b0d0fu, 0x8f9513a58c4f9e8bu, 0x2b3e690621a42251u,
3326     0x4f520f00e03c04e7u, 0x2edf84ee600211d3u, 0xadcaa2764aaacdfdu, 0x161f4f9033f4fe63u,
3327     0x161f4f9033f4fe63u, 0xbada2932ea4d3e03u, 0xcec189f3efaa30d3u, 0xf7475bb68330bf91u,
3328     0x37eb7bf7d5b01549u, 0x46b35660a4e91555u, 0xa567c12d81f151f7u, 0x4c724007bb2071b1u,
3329     0x0f4a0cce58a016bdu, 0xfa21068e66106475u, 0x244ab72b5a318ae1u, 0x366ce67e080d0f23u,
3330     0xd666fdae5dd2a449u, 0xd740ddd0acc06a0du, 0xb050bbbb28e6f97bu, 0x70b003fe890a5c75u,
3331     0xd03aabff83037427u, 0x13ec4ca72c783bd7u, 0x90282c06afdbd96fu, 0x4414ddb9db4a95d5u,
3332     0xa2c68735ae6832e9u, 0xbf72d71455676665u, 0xa8469fab6b759b7fu, 0xc1e55b56e606caf9u,
3333     0x40455630fc4a1cffu, 0x0120a7b0046d16f7u, 0xa7c3553b08faef23u, 0x9f0bfd1b08d48639u,
3334     0xa433ffce9a304d37u, 0xa22ad1d53915c683u, 0xcb6cbc723ba5dd1du, 0x547fb1b8ab9d0ba3u,
3335     0x547fb1b8ab9d0ba3u, 0x8f15a826498852e3u, 0x32e1a03f38880283u, 0x3de4cce63283f0c1u,
3336     0x5dfe6667e4da95b1u, 0xfda6eeeef479e47du, 0xf14de991cc7882dfu, 0xe68db79247630ca9u,
3337     0xa7d6db8207ee8fa1u, 0x255e1f0fcf034499u, 0xc9a8990e43dd7e65u, 0x3279b6f289702e0fu,
3338     0xe7b5905d9b71b195u, 0x03025ba41ff0da69u, 0xb7df3d6d3be55aefu, 0xf89b212ebff2b361u,
3339     0xfe856d095996f0adu, 0xd6e533e9fdf20f9du, 0xf8c0e84a63da3255u, 0xa677876cd91b4db7u,
3340     0x07ed4f97780d7d9bu, 0x90a8705f258db62fu, 0xa41bbb2be31b1c0du, 0x6ec28690b038383bu,
3341     0xdb860c3bb2edd691u, 0x0838286838a980f9u, 0x558417a74b36f77du, 0x71779afc3646ef07u,
3342     0x743cda377ccb6e91u, 0x7fdf9f3fe89153c5u, 0xdc97d25df49b9a4bu, 0x76321a778eb37d95u,
3343     0x7cbb5e27da3bd487u, 0x9cff4ade1a009de7u, 0x70eb166d05c15197u, 0xdcf0460b71d5fe3du,
3344     0x5ac1ee5260b6a3c5u, 0xc922dedfdd78efe1u, 0xe5d381dc3b8eeb9bu, 0xd57e5347bafc6aadu,
3345     0x86939040983acd21u, 0x395b9d69740a4ff9u, 0x1467299c8e43d135u, 0x5fe440fcad975cdfu,
3346     0xcaa9a39794a6ca8du, 0xf61dbd640868dea1u, 0xac09d98d74843be7u, 0x2b103b9e1a6b4809u,
3347     0x2ab92d16960f536fu, 0x6653323d5e3681dfu, 0xefd48c1c0624e2d7u, 0xa496fefe04816f0du,
3348     0x1754a7b07bbdd7b1u, 0x23353c829a3852cdu, 0xbf831261abd59097u, 0x57a8e656df0618e1u,
3349     0x16e9206c3100680fu, 0xadad4c6ee921dac7u, 0x635f2b3860265353u, 0xdd6d0059f44b3d09u,
3350     0xac4dd6b894447dd7u, 0x42ea183eeaa87be3u, 0x15612d1550ee5b5du, 0x226fa19d656cb623u,
3351 };
3352 
3353 /* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
3354 
3355 Python code to generate the values:
3356 
3357 import math
3358 
3359 for n in range(128):
3360     fac = math.factorial(n)
3361     fac_trailing_zeros = (fac & -fac).bit_length() - 1
3362     print(fac_trailing_zeros)
3363 */
3364 
3365 static const uint8_t factorial_trailing_zeros[] = {
3366      0,  0,  1,  1,  3,  3,  4,  4,  7,  7,  8,  8, 10, 10, 11, 11,  //  0-15
3367     15, 15, 16, 16, 18, 18, 19, 19, 22, 22, 23, 23, 25, 25, 26, 26,  // 16-31
3368     31, 31, 32, 32, 34, 34, 35, 35, 38, 38, 39, 39, 41, 41, 42, 42,  // 32-47
3369     46, 46, 47, 47, 49, 49, 50, 50, 53, 53, 54, 54, 56, 56, 57, 57,  // 48-63
3370     63, 63, 64, 64, 66, 66, 67, 67, 70, 70, 71, 71, 73, 73, 74, 74,  // 64-79
3371     78, 78, 79, 79, 81, 81, 82, 82, 85, 85, 86, 86, 88, 88, 89, 89,  // 80-95
3372     94, 94, 95, 95, 97, 97, 98, 98, 101, 101, 102, 102, 104, 104, 105, 105,  // 96-111
3373     109, 109, 110, 110, 112, 112, 113, 113, 116, 116, 117, 117, 119, 119, 120, 120,  // 112-127
3374 };
3375 
3376 /* Number of permutations and combinations.
3377  * P(n, k) = n! / (n-k)!
3378  * C(n, k) = P(n, k) / k!
3379  */
3380 
3381 /* Calculate C(n, k) for n in the 63-bit range. */
3382 static PyObject *
perm_comb_small(unsigned long long n,unsigned long long k,int iscomb)3383 perm_comb_small(unsigned long long n, unsigned long long k, int iscomb)
3384 {
3385     if (k == 0) {
3386         return PyLong_FromLong(1);
3387     }
3388 
3389     /* For small enough n and k the result fits in the 64-bit range and can
3390      * be calculated without allocating intermediate PyLong objects. */
3391     if (iscomb) {
3392         /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)
3393          * fits into a uint64_t.  Exclude k = 1, because the second fast
3394          * path is faster for this case.*/
3395         static const unsigned char fast_comb_limits1[] = {
3396             0, 0, 127, 127, 127, 127, 127, 127,  // 0-7
3397             127, 127, 127, 127, 127, 127, 127, 127,  // 8-15
3398             116, 105, 97, 91, 86, 82, 78, 76,  // 16-23
3399             74, 72, 71, 70, 69, 68, 68, 67,  // 24-31
3400             67, 67, 67,  // 32-34
3401         };
3402         if (k < Py_ARRAY_LENGTH(fast_comb_limits1) && n <= fast_comb_limits1[k]) {
3403             /*
3404                 comb(n, k) fits into a uint64_t. We compute it as
3405 
3406                     comb_odd_part << shift
3407 
3408                 where 2**shift is the largest power of two dividing comb(n, k)
3409                 and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
3410                 calculated efficiently via arithmetic modulo 2**64, using three
3411                 lookups and two uint64_t multiplications.
3412             */
3413             uint64_t comb_odd_part = reduced_factorial_odd_part[n]
3414                                    * inverted_factorial_odd_part[k]
3415                                    * inverted_factorial_odd_part[n - k];
3416             int shift = factorial_trailing_zeros[n]
3417                       - factorial_trailing_zeros[k]
3418                       - factorial_trailing_zeros[n - k];
3419             return PyLong_FromUnsignedLongLong(comb_odd_part << shift);
3420         }
3421 
3422         /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)*k
3423          * fits into a long long (which is at least 64 bit).  Only contains
3424          * items larger than in fast_comb_limits1. */
3425         static const unsigned long long fast_comb_limits2[] = {
3426             0, ULLONG_MAX, 4294967296ULL, 3329022, 102570, 13467, 3612, 1449,  // 0-7
3427             746, 453, 308, 227, 178, 147,  // 8-13
3428         };
3429         if (k < Py_ARRAY_LENGTH(fast_comb_limits2) && n <= fast_comb_limits2[k]) {
3430             /* C(n, k) = C(n, k-1) * (n-k+1) / k */
3431             unsigned long long result = n;
3432             for (unsigned long long i = 1; i < k;) {
3433                 result *= --n;
3434                 result /= ++i;
3435             }
3436             return PyLong_FromUnsignedLongLong(result);
3437         }
3438     }
3439     else {
3440         /* Maps k to the maximal n so that k <= n and P(n, k)
3441          * fits into a long long (which is at least 64 bit). */
3442         static const unsigned long long fast_perm_limits[] = {
3443             0, ULLONG_MAX, 4294967296ULL, 2642246, 65537, 7133, 1627, 568,  // 0-7
3444             259, 142, 88, 61, 45, 36, 30, 26,  // 8-15
3445             24, 22, 21, 20, 20,  // 16-20
3446         };
3447         if (k < Py_ARRAY_LENGTH(fast_perm_limits) && n <= fast_perm_limits[k]) {
3448             if (n <= 127) {
3449                 /* P(n, k) fits into a uint64_t. */
3450                 uint64_t perm_odd_part = reduced_factorial_odd_part[n]
3451                                        * inverted_factorial_odd_part[n - k];
3452                 int shift = factorial_trailing_zeros[n]
3453                           - factorial_trailing_zeros[n - k];
3454                 return PyLong_FromUnsignedLongLong(perm_odd_part << shift);
3455             }
3456 
3457             /* P(n, k) = P(n, k-1) * (n-k+1) */
3458             unsigned long long result = n;
3459             for (unsigned long long i = 1; i < k;) {
3460                 result *= --n;
3461                 ++i;
3462             }
3463             return PyLong_FromUnsignedLongLong(result);
3464         }
3465     }
3466 
3467     /* For larger n use recursive formulas:
3468      *
3469      *   P(n, k) = P(n, j) * P(n-j, k-j)
3470      *   C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j)
3471      */
3472     unsigned long long j = k / 2;
3473     PyObject *a, *b;
3474     a = perm_comb_small(n, j, iscomb);
3475     if (a == NULL) {
3476         return NULL;
3477     }
3478     b = perm_comb_small(n - j, k - j, iscomb);
3479     if (b == NULL) {
3480         goto error;
3481     }
3482     Py_SETREF(a, PyNumber_Multiply(a, b));
3483     Py_DECREF(b);
3484     if (iscomb && a != NULL) {
3485         b = perm_comb_small(k, j, 1);
3486         if (b == NULL) {
3487             goto error;
3488         }
3489         Py_SETREF(a, PyNumber_FloorDivide(a, b));
3490         Py_DECREF(b);
3491     }
3492     return a;
3493 
3494 error:
3495     Py_DECREF(a);
3496     return NULL;
3497 }
3498 
3499 /* Calculate P(n, k) or C(n, k) using recursive formulas.
3500  * It is more efficient than sequential multiplication thanks to
3501  * Karatsuba multiplication.
3502  */
3503 static PyObject *
perm_comb(PyObject * n,unsigned long long k,int iscomb)3504 perm_comb(PyObject *n, unsigned long long k, int iscomb)
3505 {
3506     if (k == 0) {
3507         return PyLong_FromLong(1);
3508     }
3509     if (k == 1) {
3510         Py_INCREF(n);
3511         return n;
3512     }
3513 
3514     /* P(n, k) = P(n, j) * P(n-j, k-j) */
3515     /* C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j) */
3516     unsigned long long j = k / 2;
3517     PyObject *a, *b;
3518     a = perm_comb(n, j, iscomb);
3519     if (a == NULL) {
3520         return NULL;
3521     }
3522     PyObject *t = PyLong_FromUnsignedLongLong(j);
3523     if (t == NULL) {
3524         goto error;
3525     }
3526     n = PyNumber_Subtract(n, t);
3527     Py_DECREF(t);
3528     if (n == NULL) {
3529         goto error;
3530     }
3531     b = perm_comb(n, k - j, iscomb);
3532     Py_DECREF(n);
3533     if (b == NULL) {
3534         goto error;
3535     }
3536     Py_SETREF(a, PyNumber_Multiply(a, b));
3537     Py_DECREF(b);
3538     if (iscomb && a != NULL) {
3539         b = perm_comb_small(k, j, 1);
3540         if (b == NULL) {
3541             goto error;
3542         }
3543         Py_SETREF(a, PyNumber_FloorDivide(a, b));
3544         Py_DECREF(b);
3545     }
3546     return a;
3547 
3548 error:
3549     Py_DECREF(a);
3550     return NULL;
3551 }
3552 
3553 /*[clinic input]
3554 math.perm
3555 
3556     n: object
3557     k: object = None
3558     /
3559 
3560 Number of ways to choose k items from n items without repetition and with order.
3561 
3562 Evaluates to n! / (n - k)! when k <= n and evaluates
3563 to zero when k > n.
3564 
3565 If k is not specified or is None, then k defaults to n
3566 and the function returns n!.
3567 
3568 Raises TypeError if either of the arguments are not integers.
3569 Raises ValueError if either of the arguments are negative.
3570 [clinic start generated code]*/
3571 
3572 static PyObject *
math_perm_impl(PyObject * module,PyObject * n,PyObject * k)3573 math_perm_impl(PyObject *module, PyObject *n, PyObject *k)
3574 /*[clinic end generated code: output=e021a25469653e23 input=5311c5a00f359b53]*/
3575 {
3576     PyObject *result = NULL;
3577     int overflow, cmp;
3578     long long ki, ni;
3579 
3580     if (k == Py_None) {
3581         return math_factorial(module, n);
3582     }
3583     n = PyNumber_Index(n);
3584     if (n == NULL) {
3585         return NULL;
3586     }
3587     k = PyNumber_Index(k);
3588     if (k == NULL) {
3589         Py_DECREF(n);
3590         return NULL;
3591     }
3592     assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
3593 
3594     if (Py_SIZE(n) < 0) {
3595         PyErr_SetString(PyExc_ValueError,
3596                         "n must be a non-negative integer");
3597         goto error;
3598     }
3599     if (Py_SIZE(k) < 0) {
3600         PyErr_SetString(PyExc_ValueError,
3601                         "k must be a non-negative integer");
3602         goto error;
3603     }
3604 
3605     cmp = PyObject_RichCompareBool(n, k, Py_LT);
3606     if (cmp != 0) {
3607         if (cmp > 0) {
3608             result = PyLong_FromLong(0);
3609             goto done;
3610         }
3611         goto error;
3612     }
3613 
3614     ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3615     assert(overflow >= 0 && !PyErr_Occurred());
3616     if (overflow > 0) {
3617         PyErr_Format(PyExc_OverflowError,
3618                      "k must not exceed %lld",
3619                      LLONG_MAX);
3620         goto error;
3621     }
3622     assert(ki >= 0);
3623 
3624     ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3625     assert(overflow >= 0 && !PyErr_Occurred());
3626     if (!overflow && ki > 1) {
3627         assert(ni >= 0);
3628         result = perm_comb_small((unsigned long long)ni,
3629                                  (unsigned long long)ki, 0);
3630     }
3631     else {
3632         result = perm_comb(n, (unsigned long long)ki, 0);
3633     }
3634 
3635 done:
3636     Py_DECREF(n);
3637     Py_DECREF(k);
3638     return result;
3639 
3640 error:
3641     Py_DECREF(n);
3642     Py_DECREF(k);
3643     return NULL;
3644 }
3645 
3646 /*[clinic input]
3647 math.comb
3648 
3649     n: object
3650     k: object
3651     /
3652 
3653 Number of ways to choose k items from n items without repetition and without order.
3654 
3655 Evaluates to n! / (k! * (n - k)!) when k <= n and evaluates
3656 to zero when k > n.
3657 
3658 Also called the binomial coefficient because it is equivalent
3659 to the coefficient of k-th term in polynomial expansion of the
3660 expression (1 + x)**n.
3661 
3662 Raises TypeError if either of the arguments are not integers.
3663 Raises ValueError if either of the arguments are negative.
3664 
3665 [clinic start generated code]*/
3666 
3667 static PyObject *
math_comb_impl(PyObject * module,PyObject * n,PyObject * k)3668 math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
3669 /*[clinic end generated code: output=bd2cec8d854f3493 input=9a05315af2518709]*/
3670 {
3671     PyObject *result = NULL, *temp;
3672     int overflow, cmp;
3673     long long ki, ni;
3674 
3675     n = PyNumber_Index(n);
3676     if (n == NULL) {
3677         return NULL;
3678     }
3679     k = PyNumber_Index(k);
3680     if (k == NULL) {
3681         Py_DECREF(n);
3682         return NULL;
3683     }
3684     assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
3685 
3686     if (Py_SIZE(n) < 0) {
3687         PyErr_SetString(PyExc_ValueError,
3688                         "n must be a non-negative integer");
3689         goto error;
3690     }
3691     if (Py_SIZE(k) < 0) {
3692         PyErr_SetString(PyExc_ValueError,
3693                         "k must be a non-negative integer");
3694         goto error;
3695     }
3696 
3697     ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3698     assert(overflow >= 0 && !PyErr_Occurred());
3699     if (!overflow) {
3700         assert(ni >= 0);
3701         ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3702         assert(overflow >= 0 && !PyErr_Occurred());
3703         if (overflow || ki > ni) {
3704             result = PyLong_FromLong(0);
3705             goto done;
3706         }
3707         assert(ki >= 0);
3708 
3709         ki = Py_MIN(ki, ni - ki);
3710         if (ki > 1) {
3711             result = perm_comb_small((unsigned long long)ni,
3712                                      (unsigned long long)ki, 1);
3713             goto done;
3714         }
3715         /* For k == 1 just return the original n in perm_comb(). */
3716     }
3717     else {
3718         /* k = min(k, n - k) */
3719         temp = PyNumber_Subtract(n, k);
3720         if (temp == NULL) {
3721             goto error;
3722         }
3723         if (Py_SIZE(temp) < 0) {
3724             Py_DECREF(temp);
3725             result = PyLong_FromLong(0);
3726             goto done;
3727         }
3728         cmp = PyObject_RichCompareBool(temp, k, Py_LT);
3729         if (cmp > 0) {
3730             Py_SETREF(k, temp);
3731         }
3732         else {
3733             Py_DECREF(temp);
3734             if (cmp < 0) {
3735                 goto error;
3736             }
3737         }
3738 
3739         ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3740         assert(overflow >= 0 && !PyErr_Occurred());
3741         if (overflow) {
3742             PyErr_Format(PyExc_OverflowError,
3743                          "min(n - k, k) must not exceed %lld",
3744                          LLONG_MAX);
3745             goto error;
3746         }
3747         assert(ki >= 0);
3748     }
3749 
3750     result = perm_comb(n, (unsigned long long)ki, 1);
3751 
3752 done:
3753     Py_DECREF(n);
3754     Py_DECREF(k);
3755     return result;
3756 
3757 error:
3758     Py_DECREF(n);
3759     Py_DECREF(k);
3760     return NULL;
3761 }
3762 
3763 
3764 /*[clinic input]
3765 math.nextafter
3766 
3767     x: double
3768     y: double
3769     /
3770 
3771 Return the next floating-point value after x towards y.
3772 [clinic start generated code]*/
3773 
3774 static PyObject *
math_nextafter_impl(PyObject * module,double x,double y)3775 math_nextafter_impl(PyObject *module, double x, double y)
3776 /*[clinic end generated code: output=750c8266c1c540ce input=02b2d50cd1d9f9b6]*/
3777 {
3778 #if defined(_AIX)
3779     if (x == y) {
3780         /* On AIX 7.1, libm nextafter(-0.0, +0.0) returns -0.0.
3781            Bug fixed in bos.adt.libm 7.2.2.0 by APAR IV95512. */
3782         return PyFloat_FromDouble(y);
3783     }
3784     if (Py_IS_NAN(x)) {
3785         return PyFloat_FromDouble(x);
3786     }
3787     if (Py_IS_NAN(y)) {
3788         return PyFloat_FromDouble(y);
3789     }
3790 #endif
3791     return PyFloat_FromDouble(nextafter(x, y));
3792 }
3793 
3794 
3795 /*[clinic input]
3796 math.ulp -> double
3797 
3798     x: double
3799     /
3800 
3801 Return the value of the least significant bit of the float x.
3802 [clinic start generated code]*/
3803 
3804 static double
math_ulp_impl(PyObject * module,double x)3805 math_ulp_impl(PyObject *module, double x)
3806 /*[clinic end generated code: output=f5207867a9384dd4 input=31f9bfbbe373fcaa]*/
3807 {
3808     if (Py_IS_NAN(x)) {
3809         return x;
3810     }
3811     x = fabs(x);
3812     if (Py_IS_INFINITY(x)) {
3813         return x;
3814     }
3815     double inf = m_inf();
3816     double x2 = nextafter(x, inf);
3817     if (Py_IS_INFINITY(x2)) {
3818         /* special case: x is the largest positive representable float */
3819         x2 = nextafter(x, -inf);
3820         return x - x2;
3821     }
3822     return x2 - x;
3823 }
3824 
3825 static int
math_exec(PyObject * module)3826 math_exec(PyObject *module)
3827 {
3828     if (PyModule_AddObject(module, "pi", PyFloat_FromDouble(Py_MATH_PI)) < 0) {
3829         return -1;
3830     }
3831     if (PyModule_AddObject(module, "e", PyFloat_FromDouble(Py_MATH_E)) < 0) {
3832         return -1;
3833     }
3834     // 2pi
3835     if (PyModule_AddObject(module, "tau", PyFloat_FromDouble(Py_MATH_TAU)) < 0) {
3836         return -1;
3837     }
3838     if (PyModule_AddObject(module, "inf", PyFloat_FromDouble(m_inf())) < 0) {
3839         return -1;
3840     }
3841 #if _PY_SHORT_FLOAT_REPR == 1
3842     if (PyModule_AddObject(module, "nan", PyFloat_FromDouble(m_nan())) < 0) {
3843         return -1;
3844     }
3845 #endif
3846     return 0;
3847 }
3848 
3849 static PyMethodDef math_methods[] = {
3850     {"acos",            math_acos,      METH_O,         math_acos_doc},
3851     {"acosh",           math_acosh,     METH_O,         math_acosh_doc},
3852     {"asin",            math_asin,      METH_O,         math_asin_doc},
3853     {"asinh",           math_asinh,     METH_O,         math_asinh_doc},
3854     {"atan",            math_atan,      METH_O,         math_atan_doc},
3855     {"atan2",           _PyCFunction_CAST(math_atan2),     METH_FASTCALL,  math_atan2_doc},
3856     {"atanh",           math_atanh,     METH_O,         math_atanh_doc},
3857     {"cbrt",            math_cbrt,      METH_O,         math_cbrt_doc},
3858     MATH_CEIL_METHODDEF
3859     {"copysign",        _PyCFunction_CAST(math_copysign),  METH_FASTCALL,  math_copysign_doc},
3860     {"cos",             math_cos,       METH_O,         math_cos_doc},
3861     {"cosh",            math_cosh,      METH_O,         math_cosh_doc},
3862     MATH_DEGREES_METHODDEF
3863     MATH_DIST_METHODDEF
3864     {"erf",             math_erf,       METH_O,         math_erf_doc},
3865     {"erfc",            math_erfc,      METH_O,         math_erfc_doc},
3866     {"exp",             math_exp,       METH_O,         math_exp_doc},
3867     {"exp2",            math_exp2,      METH_O,         math_exp2_doc},
3868     {"expm1",           math_expm1,     METH_O,         math_expm1_doc},
3869     {"fabs",            math_fabs,      METH_O,         math_fabs_doc},
3870     MATH_FACTORIAL_METHODDEF
3871     MATH_FLOOR_METHODDEF
3872     MATH_FMOD_METHODDEF
3873     MATH_FREXP_METHODDEF
3874     MATH_FSUM_METHODDEF
3875     {"gamma",           math_gamma,     METH_O,         math_gamma_doc},
3876     {"gcd",             _PyCFunction_CAST(math_gcd),       METH_FASTCALL,  math_gcd_doc},
3877     {"hypot",           _PyCFunction_CAST(math_hypot),     METH_FASTCALL,  math_hypot_doc},
3878     MATH_ISCLOSE_METHODDEF
3879     MATH_ISFINITE_METHODDEF
3880     MATH_ISINF_METHODDEF
3881     MATH_ISNAN_METHODDEF
3882     MATH_ISQRT_METHODDEF
3883     {"lcm",             _PyCFunction_CAST(math_lcm),       METH_FASTCALL,  math_lcm_doc},
3884     MATH_LDEXP_METHODDEF
3885     {"lgamma",          math_lgamma,    METH_O,         math_lgamma_doc},
3886     MATH_LOG_METHODDEF
3887     {"log1p",           math_log1p,     METH_O,         math_log1p_doc},
3888     MATH_LOG10_METHODDEF
3889     MATH_LOG2_METHODDEF
3890     MATH_MODF_METHODDEF
3891     MATH_POW_METHODDEF
3892     MATH_RADIANS_METHODDEF
3893     {"remainder",       _PyCFunction_CAST(math_remainder), METH_FASTCALL,  math_remainder_doc},
3894     {"sin",             math_sin,       METH_O,         math_sin_doc},
3895     {"sinh",            math_sinh,      METH_O,         math_sinh_doc},
3896     {"sqrt",            math_sqrt,      METH_O,         math_sqrt_doc},
3897     {"tan",             math_tan,       METH_O,         math_tan_doc},
3898     {"tanh",            math_tanh,      METH_O,         math_tanh_doc},
3899     MATH_TRUNC_METHODDEF
3900     MATH_PROD_METHODDEF
3901     MATH_PERM_METHODDEF
3902     MATH_COMB_METHODDEF
3903     MATH_NEXTAFTER_METHODDEF
3904     MATH_ULP_METHODDEF
3905     {NULL,              NULL}           /* sentinel */
3906 };
3907 
3908 static PyModuleDef_Slot math_slots[] = {
3909     {Py_mod_exec, math_exec},
3910     {0, NULL}
3911 };
3912 
3913 PyDoc_STRVAR(module_doc,
3914 "This module provides access to the mathematical functions\n"
3915 "defined by the C standard.");
3916 
3917 static struct PyModuleDef mathmodule = {
3918     PyModuleDef_HEAD_INIT,
3919     .m_name = "math",
3920     .m_doc = module_doc,
3921     .m_size = 0,
3922     .m_methods = math_methods,
3923     .m_slots = math_slots,
3924 };
3925 
3926 PyMODINIT_FUNC
PyInit_math(void)3927 PyInit_math(void)
3928 {
3929     return PyModuleDef_Init(&mathmodule);
3930 }
3931