1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#define	Ln2Hi	6.93147180369123816490e-01
6#define	Ln2Lo	1.90821492927058770002e-10
7#define	Log2e	1.44269504088896338700e+00
8#define	Overflow	7.09782712893383973096e+02
9#define	Underflow	-7.45133219101941108420e+02
10#define	Overflow2	1.0239999999999999e+03
11#define	Underflow2	-1.0740e+03
12#define	NearZero	0x3e30000000000000	// 2**-28
13#define	PosInf	0x7ff0000000000000
14#define	FracMask	0x000fffffffffffff
15#define	C1	0x3cb0000000000000	// 2**-52
16#define	P1	1.66666666666666657415e-01	// 0x3FC55555; 0x55555555
17#define	P2	-2.77777777770155933842e-03	// 0xBF66C16C; 0x16BEBD93
18#define	P3	6.61375632143793436117e-05	// 0x3F11566A; 0xAF25DE2C
19#define	P4	-1.65339022054652515390e-06	// 0xBEBBBD41; 0xC5D26BF1
20#define	P5	4.13813679705723846039e-08	// 0x3E663769; 0x72BEA4D0
21
22// Exp returns e**x, the base-e exponential of x.
23// This is an assembly implementation of the method used for function Exp in file exp.go.
24//
25// func Exp(x float64) float64
26TEXT ·archExp(SB),$0-16
27	FMOVD	x+0(FP), F0	// F0 = x
28	FCMPD	F0, F0
29	BNE	isNaN		// x = NaN, return NaN
30	FMOVD	$Overflow, F1
31	FCMPD	F1, F0
32	BGT	overflow	// x > Overflow, return PosInf
33	FMOVD	$Underflow, F1
34	FCMPD	F1, F0
35	BLT	underflow	// x < Underflow, return 0
36	MOVD	$NearZero, R0
37	FMOVD	R0, F2
38	FABSD	F0, F3
39	FMOVD	$1.0, F1	// F1 = 1.0
40	FCMPD	F2, F3
41	BLT	nearzero	// fabs(x) < NearZero, return 1 + x
42	// argument reduction, x = k*ln2 + r,  |r| <= 0.5*ln2
43	// computed as r = hi - lo for extra precision.
44	FMOVD	$Log2e, F2
45	FMOVD	$0.5, F3
46	FNMSUBD	F0, F3, F2, F4	// Log2e*x - 0.5
47	FMADDD	F0, F3, F2, F3	// Log2e*x + 0.5
48	FCMPD	$0.0, F0
49	FCSELD	LT, F4, F3, F3	// F3 = k
50	FCVTZSD	F3, R1		// R1 = int(k)
51	SCVTFD	R1, F3		// F3 = float64(int(k))
52	FMOVD	$Ln2Hi, F4	// F4 = Ln2Hi
53	FMOVD	$Ln2Lo, F5	// F5 = Ln2Lo
54	FMSUBD	F3, F0, F4, F4	// F4 = hi = x - float64(int(k))*Ln2Hi
55	FMULD	F3, F5		// F5 = lo = float64(int(k)) * Ln2Lo
56	FSUBD	F5, F4, F6	// F6 = r = hi - lo
57	FMULD	F6, F6, F7	// F7 = t = r * r
58	// compute y
59	FMOVD	$P5, F8		// F8 = P5
60	FMOVD	$P4, F9		// F9 = P4
61	FMADDD	F7, F9, F8, F13	// P4+t*P5
62	FMOVD	$P3, F10	// F10 = P3
63	FMADDD	F7, F10, F13, F13	// P3+t*(P4+t*P5)
64	FMOVD	$P2, F11	// F11 = P2
65	FMADDD	F7, F11, F13, F13	// P2+t*(P3+t*(P4+t*P5))
66	FMOVD	$P1, F12	// F12 = P1
67	FMADDD	F7, F12, F13, F13	// P1+t*(P2+t*(P3+t*(P4+t*P5)))
68	FMSUBD	F7, F6, F13, F13	// F13 = c = r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))))
69	FMOVD	$2.0, F14
70	FSUBD	F13, F14
71	FMULD	F6, F13, F15
72	FDIVD	F14, F15	// F15 = (r*c)/(2-c)
73	FSUBD	F15, F5, F15	// lo-(r*c)/(2-c)
74	FSUBD	F4, F15, F15	// (lo-(r*c)/(2-c))-hi
75	FSUBD	F15, F1, F16	// F16 = y = 1-((lo-(r*c)/(2-c))-hi)
76	// inline Ldexp(y, k), benefit:
77	// 1, no parameter pass overhead.
78	// 2, skip unnecessary checks for Inf/NaN/Zero
79	FMOVD	F16, R0
80	AND	$FracMask, R0, R2	// fraction
81	LSR	$52, R0, R5	// exponent
82	ADD	R1, R5		// R1 = int(k)
83	CMP	$1, R5
84	BGE	normal
85	ADD	$52, R5		// denormal
86	MOVD	$C1, R8
87	FMOVD	R8, F1		// m = 2**-52
88normal:
89	ORR	R5<<52, R2, R0
90	FMOVD	R0, F0
91	FMULD	F1, F0		// return m * x
92	FMOVD	F0, ret+8(FP)
93	RET
94nearzero:
95	FADDD	F1, F0
96isNaN:
97	FMOVD	F0, ret+8(FP)
98	RET
99underflow:
100	MOVD	ZR, ret+8(FP)
101	RET
102overflow:
103	MOVD	$PosInf, R0
104	MOVD	R0, ret+8(FP)
105	RET
106
107
108// Exp2 returns 2**x, the base-2 exponential of x.
109// This is an assembly implementation of the method used for function Exp2 in file exp.go.
110//
111// func Exp2(x float64) float64
112TEXT ·archExp2(SB),$0-16
113	FMOVD	x+0(FP), F0	// F0 = x
114	FCMPD	F0, F0
115	BNE	isNaN		// x = NaN, return NaN
116	FMOVD	$Overflow2, F1
117	FCMPD	F1, F0
118	BGT	overflow	// x > Overflow, return PosInf
119	FMOVD	$Underflow2, F1
120	FCMPD	F1, F0
121	BLT	underflow	// x < Underflow, return 0
122	// argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2
123	// computed as r = hi - lo for extra precision.
124	FMOVD	$0.5, F2
125	FSUBD	F2, F0, F3	// x + 0.5
126	FADDD	F2, F0, F4	// x - 0.5
127	FCMPD	$0.0, F0
128	FCSELD	LT, F3, F4, F3	// F3 = k
129	FCVTZSD	F3, R1		// R1 = int(k)
130	SCVTFD	R1, F3		// F3 = float64(int(k))
131	FSUBD	F3, F0, F3	// t = x - float64(int(k))
132	FMOVD	$Ln2Hi, F4	// F4 = Ln2Hi
133	FMOVD	$Ln2Lo, F5	// F5 = Ln2Lo
134	FMULD	F3, F4		// F4 = hi = t * Ln2Hi
135	FNMULD	F3, F5		// F5 = lo = -t * Ln2Lo
136	FSUBD	F5, F4, F6	// F6 = r = hi - lo
137	FMULD	F6, F6, F7	// F7 = t = r * r
138	// compute y
139	FMOVD	$P5, F8		// F8 = P5
140	FMOVD	$P4, F9		// F9 = P4
141	FMADDD	F7, F9, F8, F13	// P4+t*P5
142	FMOVD	$P3, F10	// F10 = P3
143	FMADDD	F7, F10, F13, F13	// P3+t*(P4+t*P5)
144	FMOVD	$P2, F11	// F11 = P2
145	FMADDD	F7, F11, F13, F13	// P2+t*(P3+t*(P4+t*P5))
146	FMOVD	$P1, F12	// F12 = P1
147	FMADDD	F7, F12, F13, F13	// P1+t*(P2+t*(P3+t*(P4+t*P5)))
148	FMSUBD	F7, F6, F13, F13	// F13 = c = r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))))
149	FMOVD	$2.0, F14
150	FSUBD	F13, F14
151	FMULD	F6, F13, F15
152	FDIVD	F14, F15	// F15 = (r*c)/(2-c)
153	FMOVD	$1.0, F1	// F1 = 1.0
154	FSUBD	F15, F5, F15	// lo-(r*c)/(2-c)
155	FSUBD	F4, F15, F15	// (lo-(r*c)/(2-c))-hi
156	FSUBD	F15, F1, F16	// F16 = y = 1-((lo-(r*c)/(2-c))-hi)
157	// inline Ldexp(y, k), benefit:
158	// 1, no parameter pass overhead.
159	// 2, skip unnecessary checks for Inf/NaN/Zero
160	FMOVD	F16, R0
161	AND	$FracMask, R0, R2	// fraction
162	LSR	$52, R0, R5	// exponent
163	ADD	R1, R5		// R1 = int(k)
164	CMP	$1, R5
165	BGE	normal
166	ADD	$52, R5		// denormal
167	MOVD	$C1, R8
168	FMOVD	R8, F1		// m = 2**-52
169normal:
170	ORR	R5<<52, R2, R0
171	FMOVD	R0, F0
172	FMULD	F1, F0		// return m * x
173isNaN:
174	FMOVD	F0, ret+8(FP)
175	RET
176underflow:
177	MOVD	ZR, ret+8(FP)
178	RET
179overflow:
180	MOVD	$PosInf, R0
181	MOVD	R0, ret+8(FP)
182	RET
183