1*412f47f9SXin Li /*
2*412f47f9SXin Li * Helpers for evaluating polynomials with various schemes - specific to SVE
3*412f47f9SXin Li * but precision-agnostic.
4*412f47f9SXin Li *
5*412f47f9SXin Li * Copyright (c) 2023, Arm Limited.
6*412f47f9SXin Li * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
7*412f47f9SXin Li */
8*412f47f9SXin Li
9*412f47f9SXin Li #ifndef VTYPE
10*412f47f9SXin Li # error Cannot use poly_generic without defining VTYPE
11*412f47f9SXin Li #endif
12*412f47f9SXin Li #ifndef STYPE
13*412f47f9SXin Li # error Cannot use poly_generic without defining STYPE
14*412f47f9SXin Li #endif
15*412f47f9SXin Li #ifndef VWRAP
16*412f47f9SXin Li # error Cannot use poly_generic without defining VWRAP
17*412f47f9SXin Li #endif
18*412f47f9SXin Li #ifndef DUP
19*412f47f9SXin Li # error Cannot use poly_generic without defining DUP
20*412f47f9SXin Li #endif
21*412f47f9SXin Li
VWRAP(pairwise_poly_3)22*412f47f9SXin Li static inline VTYPE VWRAP (pairwise_poly_3) (svbool_t pg, VTYPE x, VTYPE x2,
23*412f47f9SXin Li const STYPE *poly)
24*412f47f9SXin Li {
25*412f47f9SXin Li /* At order 3, Estrin and Pairwise Horner are identical. */
26*412f47f9SXin Li VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
27*412f47f9SXin Li VTYPE p23 = svmla_x (pg, DUP (poly[2]), x, poly[3]);
28*412f47f9SXin Li return svmla_x (pg, p01, p23, x2);
29*412f47f9SXin Li }
30*412f47f9SXin Li
VWRAP(estrin_4)31*412f47f9SXin Li static inline VTYPE VWRAP (estrin_4) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
32*412f47f9SXin Li const STYPE *poly)
33*412f47f9SXin Li {
34*412f47f9SXin Li VTYPE p03 = VWRAP (pairwise_poly_3) (pg, x, x2, poly);
35*412f47f9SXin Li return svmla_x (pg, p03, x4, poly[4]);
36*412f47f9SXin Li }
VWRAP(estrin_5)37*412f47f9SXin Li static inline VTYPE VWRAP (estrin_5) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
38*412f47f9SXin Li const STYPE *poly)
39*412f47f9SXin Li {
40*412f47f9SXin Li VTYPE p03 = VWRAP (pairwise_poly_3) (pg, x, x2, poly);
41*412f47f9SXin Li VTYPE p45 = svmla_x (pg, DUP (poly[4]), x, poly[5]);
42*412f47f9SXin Li return svmla_x (pg, p03, p45, x4);
43*412f47f9SXin Li }
VWRAP(estrin_6)44*412f47f9SXin Li static inline VTYPE VWRAP (estrin_6) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
45*412f47f9SXin Li const STYPE *poly)
46*412f47f9SXin Li {
47*412f47f9SXin Li VTYPE p03 = VWRAP (pairwise_poly_3) (pg, x, x2, poly);
48*412f47f9SXin Li VTYPE p45 = svmla_x (pg, DUP (poly[4]), x, poly[5]);
49*412f47f9SXin Li VTYPE p46 = svmla_x (pg, p45, x, poly[6]);
50*412f47f9SXin Li return svmla_x (pg, p03, p46, x4);
51*412f47f9SXin Li }
VWRAP(estrin_7)52*412f47f9SXin Li static inline VTYPE VWRAP (estrin_7) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
53*412f47f9SXin Li const STYPE *poly)
54*412f47f9SXin Li {
55*412f47f9SXin Li VTYPE p03 = VWRAP (pairwise_poly_3) (pg, x, x2, poly);
56*412f47f9SXin Li VTYPE p47 = VWRAP (pairwise_poly_3) (pg, x, x2, poly + 4);
57*412f47f9SXin Li return svmla_x (pg, p03, p47, x4);
58*412f47f9SXin Li }
VWRAP(estrin_8)59*412f47f9SXin Li static inline VTYPE VWRAP (estrin_8) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
60*412f47f9SXin Li VTYPE x8, const STYPE *poly)
61*412f47f9SXin Li {
62*412f47f9SXin Li return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly), x8, poly[8]);
63*412f47f9SXin Li }
VWRAP(estrin_9)64*412f47f9SXin Li static inline VTYPE VWRAP (estrin_9) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
65*412f47f9SXin Li VTYPE x8, const STYPE *poly)
66*412f47f9SXin Li {
67*412f47f9SXin Li VTYPE p89 = svmla_x (pg, DUP (poly[8]), x, poly[9]);
68*412f47f9SXin Li return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly), p89, x8);
69*412f47f9SXin Li }
VWRAP(estrin_10)70*412f47f9SXin Li static inline VTYPE VWRAP (estrin_10) (svbool_t pg, VTYPE x, VTYPE x2,
71*412f47f9SXin Li VTYPE x4, VTYPE x8, const STYPE *poly)
72*412f47f9SXin Li {
73*412f47f9SXin Li VTYPE p89 = svmla_x (pg, DUP (poly[8]), x, poly[9]);
74*412f47f9SXin Li VTYPE p8_10 = svmla_x (pg, p89, x2, poly[10]);
75*412f47f9SXin Li return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly), p8_10, x8);
76*412f47f9SXin Li }
VWRAP(estrin_11)77*412f47f9SXin Li static inline VTYPE VWRAP (estrin_11) (svbool_t pg, VTYPE x, VTYPE x2,
78*412f47f9SXin Li VTYPE x4, VTYPE x8, const STYPE *poly)
79*412f47f9SXin Li {
80*412f47f9SXin Li VTYPE p8_11 = VWRAP (pairwise_poly_3) (pg, x, x2, poly + 8);
81*412f47f9SXin Li return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly), p8_11, x8);
82*412f47f9SXin Li }
VWRAP(estrin_12)83*412f47f9SXin Li static inline VTYPE VWRAP (estrin_12) (svbool_t pg, VTYPE x, VTYPE x2,
84*412f47f9SXin Li VTYPE x4, VTYPE x8, const STYPE *poly)
85*412f47f9SXin Li {
86*412f47f9SXin Li return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly),
87*412f47f9SXin Li VWRAP (estrin_4) (pg, x, x2, x4, poly + 8), x8);
88*412f47f9SXin Li }
VWRAP(estrin_13)89*412f47f9SXin Li static inline VTYPE VWRAP (estrin_13) (svbool_t pg, VTYPE x, VTYPE x2,
90*412f47f9SXin Li VTYPE x4, VTYPE x8, const STYPE *poly)
91*412f47f9SXin Li {
92*412f47f9SXin Li return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly),
93*412f47f9SXin Li VWRAP (estrin_5) (pg, x, x2, x4, poly + 8), x8);
94*412f47f9SXin Li }
VWRAP(estrin_14)95*412f47f9SXin Li static inline VTYPE VWRAP (estrin_14) (svbool_t pg, VTYPE x, VTYPE x2,
96*412f47f9SXin Li VTYPE x4, VTYPE x8, const STYPE *poly)
97*412f47f9SXin Li {
98*412f47f9SXin Li return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly),
99*412f47f9SXin Li VWRAP (estrin_6) (pg, x, x2, x4, poly + 8), x8);
100*412f47f9SXin Li }
VWRAP(estrin_15)101*412f47f9SXin Li static inline VTYPE VWRAP (estrin_15) (svbool_t pg, VTYPE x, VTYPE x2,
102*412f47f9SXin Li VTYPE x4, VTYPE x8, const STYPE *poly)
103*412f47f9SXin Li {
104*412f47f9SXin Li return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly),
105*412f47f9SXin Li VWRAP (estrin_7) (pg, x, x2, x4, poly + 8), x8);
106*412f47f9SXin Li }
VWRAP(estrin_16)107*412f47f9SXin Li static inline VTYPE VWRAP (estrin_16) (svbool_t pg, VTYPE x, VTYPE x2,
108*412f47f9SXin Li VTYPE x4, VTYPE x8, VTYPE x16,
109*412f47f9SXin Li const STYPE *poly)
110*412f47f9SXin Li {
111*412f47f9SXin Li return svmla_x (pg, VWRAP (estrin_15) (pg, x, x2, x4, x8, poly), x16,
112*412f47f9SXin Li poly[16]);
113*412f47f9SXin Li }
VWRAP(estrin_17)114*412f47f9SXin Li static inline VTYPE VWRAP (estrin_17) (svbool_t pg, VTYPE x, VTYPE x2,
115*412f47f9SXin Li VTYPE x4, VTYPE x8, VTYPE x16,
116*412f47f9SXin Li const STYPE *poly)
117*412f47f9SXin Li {
118*412f47f9SXin Li VTYPE p16_17 = svmla_x (pg, DUP (poly[16]), x, poly[17]);
119*412f47f9SXin Li return svmla_x (pg, VWRAP (estrin_15) (pg, x, x2, x4, x8, poly), p16_17,
120*412f47f9SXin Li x16);
121*412f47f9SXin Li }
VWRAP(estrin_18)122*412f47f9SXin Li static inline VTYPE VWRAP (estrin_18) (svbool_t pg, VTYPE x, VTYPE x2,
123*412f47f9SXin Li VTYPE x4, VTYPE x8, VTYPE x16,
124*412f47f9SXin Li const STYPE *poly)
125*412f47f9SXin Li {
126*412f47f9SXin Li VTYPE p16_17 = svmla_x (pg, DUP (poly[16]), x, poly[17]);
127*412f47f9SXin Li VTYPE p16_18 = svmla_x (pg, p16_17, x2, poly[18]);
128*412f47f9SXin Li return svmla_x (pg, VWRAP (estrin_15) (pg, x, x2, x4, x8, poly), p16_18,
129*412f47f9SXin Li x16);
130*412f47f9SXin Li }
VWRAP(estrin_19)131*412f47f9SXin Li static inline VTYPE VWRAP (estrin_19) (svbool_t pg, VTYPE x, VTYPE x2,
132*412f47f9SXin Li VTYPE x4, VTYPE x8, VTYPE x16,
133*412f47f9SXin Li const STYPE *poly)
134*412f47f9SXin Li {
135*412f47f9SXin Li return svmla_x (pg, VWRAP (estrin_15) (pg, x, x2, x4, x8, poly),
136*412f47f9SXin Li VWRAP (pairwise_poly_3) (pg, x, x2, poly + 16), x16);
137*412f47f9SXin Li }
138*412f47f9SXin Li
VWRAP(horner_3)139*412f47f9SXin Li static inline VTYPE VWRAP (horner_3) (svbool_t pg, VTYPE x, const STYPE *poly)
140*412f47f9SXin Li {
141*412f47f9SXin Li VTYPE p = svmla_x (pg, DUP (poly[2]), x, poly[3]);
142*412f47f9SXin Li p = svmad_x (pg, x, p, poly[1]);
143*412f47f9SXin Li p = svmad_x (pg, x, p, poly[0]);
144*412f47f9SXin Li return p;
145*412f47f9SXin Li }
VWRAP(horner_4)146*412f47f9SXin Li static inline VTYPE VWRAP (horner_4) (svbool_t pg, VTYPE x, const STYPE *poly)
147*412f47f9SXin Li {
148*412f47f9SXin Li VTYPE p = svmla_x (pg, DUP (poly[3]), x, poly[4]);
149*412f47f9SXin Li p = svmad_x (pg, x, p, poly[2]);
150*412f47f9SXin Li p = svmad_x (pg, x, p, poly[1]);
151*412f47f9SXin Li p = svmad_x (pg, x, p, poly[0]);
152*412f47f9SXin Li return p;
153*412f47f9SXin Li }
VWRAP(horner_5)154*412f47f9SXin Li static inline VTYPE VWRAP (horner_5) (svbool_t pg, VTYPE x, const STYPE *poly)
155*412f47f9SXin Li {
156*412f47f9SXin Li return svmad_x (pg, x, VWRAP (horner_4) (pg, x, poly + 1), poly[0]);
157*412f47f9SXin Li }
VWRAP(horner_6)158*412f47f9SXin Li static inline VTYPE VWRAP (horner_6) (svbool_t pg, VTYPE x, const STYPE *poly)
159*412f47f9SXin Li {
160*412f47f9SXin Li return svmad_x (pg, x, VWRAP (horner_5) (pg, x, poly + 1), poly[0]);
161*412f47f9SXin Li }
VWRAP(horner_7)162*412f47f9SXin Li static inline VTYPE VWRAP (horner_7) (svbool_t pg, VTYPE x, const STYPE *poly)
163*412f47f9SXin Li {
164*412f47f9SXin Li return svmad_x (pg, x, VWRAP (horner_6) (pg, x, poly + 1), poly[0]);
165*412f47f9SXin Li }
VWRAP(horner_8)166*412f47f9SXin Li static inline VTYPE VWRAP (horner_8) (svbool_t pg, VTYPE x, const STYPE *poly)
167*412f47f9SXin Li {
168*412f47f9SXin Li return svmad_x (pg, x, VWRAP (horner_7) (pg, x, poly + 1), poly[0]);
169*412f47f9SXin Li }
VWRAP(horner_9)170*412f47f9SXin Li static inline VTYPE VWRAP (horner_9) (svbool_t pg, VTYPE x, const STYPE *poly)
171*412f47f9SXin Li {
172*412f47f9SXin Li return svmad_x (pg, x, VWRAP (horner_8) (pg, x, poly + 1), poly[0]);
173*412f47f9SXin Li }
174*412f47f9SXin Li static inline VTYPE
sv_horner_10_f32_x(svbool_t pg,VTYPE x,const STYPE * poly)175*412f47f9SXin Li sv_horner_10_f32_x (svbool_t pg, VTYPE x, const STYPE *poly)
176*412f47f9SXin Li {
177*412f47f9SXin Li return svmad_x (pg, x, VWRAP (horner_9) (pg, x, poly + 1), poly[0]);
178*412f47f9SXin Li }
179*412f47f9SXin Li static inline VTYPE
sv_horner_11_f32_x(svbool_t pg,VTYPE x,const STYPE * poly)180*412f47f9SXin Li sv_horner_11_f32_x (svbool_t pg, VTYPE x, const STYPE *poly)
181*412f47f9SXin Li {
182*412f47f9SXin Li return svmad_x (pg, x, sv_horner_10_f32_x (pg, x, poly + 1), poly[0]);
183*412f47f9SXin Li }
184*412f47f9SXin Li static inline VTYPE
sv_horner_12_f32_x(svbool_t pg,VTYPE x,const STYPE * poly)185*412f47f9SXin Li sv_horner_12_f32_x (svbool_t pg, VTYPE x, const STYPE *poly)
186*412f47f9SXin Li {
187*412f47f9SXin Li return svmad_x (pg, x, sv_horner_11_f32_x (pg, x, poly + 1), poly[0]);
188*412f47f9SXin Li }
189*412f47f9SXin Li
VWRAP(pw_horner_4)190*412f47f9SXin Li static inline VTYPE VWRAP (pw_horner_4) (svbool_t pg, VTYPE x, VTYPE x2,
191*412f47f9SXin Li const STYPE *poly)
192*412f47f9SXin Li {
193*412f47f9SXin Li VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
194*412f47f9SXin Li VTYPE p23 = svmla_x (pg, DUP (poly[2]), x, poly[3]);
195*412f47f9SXin Li VTYPE p;
196*412f47f9SXin Li p = svmla_x (pg, p23, x2, poly[4]);
197*412f47f9SXin Li p = svmla_x (pg, p01, x2, p);
198*412f47f9SXin Li return p;
199*412f47f9SXin Li }
VWRAP(pw_horner_5)200*412f47f9SXin Li static inline VTYPE VWRAP (pw_horner_5) (svbool_t pg, VTYPE x, VTYPE x2,
201*412f47f9SXin Li const STYPE *poly)
202*412f47f9SXin Li {
203*412f47f9SXin Li VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
204*412f47f9SXin Li VTYPE p23 = svmla_x (pg, DUP (poly[2]), x, poly[3]);
205*412f47f9SXin Li VTYPE p45 = svmla_x (pg, DUP (poly[4]), x, poly[5]);
206*412f47f9SXin Li VTYPE p;
207*412f47f9SXin Li p = svmla_x (pg, p23, x2, p45);
208*412f47f9SXin Li p = svmla_x (pg, p01, x2, p);
209*412f47f9SXin Li return p;
210*412f47f9SXin Li }
VWRAP(pw_horner_6)211*412f47f9SXin Li static inline VTYPE VWRAP (pw_horner_6) (svbool_t pg, VTYPE x, VTYPE x2,
212*412f47f9SXin Li const STYPE *poly)
213*412f47f9SXin Li {
214*412f47f9SXin Li VTYPE p26 = VWRAP (pw_horner_4) (pg, x, x2, poly + 2);
215*412f47f9SXin Li VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
216*412f47f9SXin Li return svmla_x (pg, p01, x2, p26);
217*412f47f9SXin Li }
VWRAP(pw_horner_7)218*412f47f9SXin Li static inline VTYPE VWRAP (pw_horner_7) (svbool_t pg, VTYPE x, VTYPE x2,
219*412f47f9SXin Li const STYPE *poly)
220*412f47f9SXin Li {
221*412f47f9SXin Li VTYPE p27 = VWRAP (pw_horner_5) (pg, x, x2, poly + 2);
222*412f47f9SXin Li VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
223*412f47f9SXin Li return svmla_x (pg, p01, x2, p27);
224*412f47f9SXin Li }
VWRAP(pw_horner_8)225*412f47f9SXin Li static inline VTYPE VWRAP (pw_horner_8) (svbool_t pg, VTYPE x, VTYPE x2,
226*412f47f9SXin Li const STYPE *poly)
227*412f47f9SXin Li {
228*412f47f9SXin Li VTYPE p28 = VWRAP (pw_horner_6) (pg, x, x2, poly + 2);
229*412f47f9SXin Li VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
230*412f47f9SXin Li return svmla_x (pg, p01, x2, p28);
231*412f47f9SXin Li }
VWRAP(pw_horner_9)232*412f47f9SXin Li static inline VTYPE VWRAP (pw_horner_9) (svbool_t pg, VTYPE x, VTYPE x2,
233*412f47f9SXin Li const STYPE *poly)
234*412f47f9SXin Li {
235*412f47f9SXin Li VTYPE p29 = VWRAP (pw_horner_7) (pg, x, x2, poly + 2);
236*412f47f9SXin Li VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
237*412f47f9SXin Li return svmla_x (pg, p01, x2, p29);
238*412f47f9SXin Li }
VWRAP(pw_horner_10)239*412f47f9SXin Li static inline VTYPE VWRAP (pw_horner_10) (svbool_t pg, VTYPE x, VTYPE x2,
240*412f47f9SXin Li const STYPE *poly)
241*412f47f9SXin Li {
242*412f47f9SXin Li VTYPE p2_10 = VWRAP (pw_horner_8) (pg, x, x2, poly + 2);
243*412f47f9SXin Li VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
244*412f47f9SXin Li return svmla_x (pg, p01, x2, p2_10);
245*412f47f9SXin Li }
VWRAP(pw_horner_11)246*412f47f9SXin Li static inline VTYPE VWRAP (pw_horner_11) (svbool_t pg, VTYPE x, VTYPE x2,
247*412f47f9SXin Li const STYPE *poly)
248*412f47f9SXin Li {
249*412f47f9SXin Li VTYPE p2_11 = VWRAP (pw_horner_9) (pg, x, x2, poly + 2);
250*412f47f9SXin Li VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
251*412f47f9SXin Li return svmla_x (pg, p01, x2, p2_11);
252*412f47f9SXin Li }
VWRAP(pw_horner_12)253*412f47f9SXin Li static inline VTYPE VWRAP (pw_horner_12) (svbool_t pg, VTYPE x, VTYPE x2,
254*412f47f9SXin Li const STYPE *poly)
255*412f47f9SXin Li {
256*412f47f9SXin Li VTYPE p2_12 = VWRAP (pw_horner_10) (pg, x, x2, poly + 2);
257*412f47f9SXin Li VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
258*412f47f9SXin Li return svmla_x (pg, p01, x2, p2_12);
259*412f47f9SXin Li }
VWRAP(pw_horner_13)260*412f47f9SXin Li static inline VTYPE VWRAP (pw_horner_13) (svbool_t pg, VTYPE x, VTYPE x2,
261*412f47f9SXin Li const STYPE *poly)
262*412f47f9SXin Li {
263*412f47f9SXin Li VTYPE p2_13 = VWRAP (pw_horner_11) (pg, x, x2, poly + 2);
264*412f47f9SXin Li VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
265*412f47f9SXin Li return svmla_x (pg, p01, x2, p2_13);
266*412f47f9SXin Li }
VWRAP(pw_horner_14)267*412f47f9SXin Li static inline VTYPE VWRAP (pw_horner_14) (svbool_t pg, VTYPE x, VTYPE x2,
268*412f47f9SXin Li const STYPE *poly)
269*412f47f9SXin Li {
270*412f47f9SXin Li VTYPE p2_14 = VWRAP (pw_horner_12) (pg, x, x2, poly + 2);
271*412f47f9SXin Li VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
272*412f47f9SXin Li return svmla_x (pg, p01, x2, p2_14);
273*412f47f9SXin Li }
VWRAP(pw_horner_15)274*412f47f9SXin Li static inline VTYPE VWRAP (pw_horner_15) (svbool_t pg, VTYPE x, VTYPE x2,
275*412f47f9SXin Li const STYPE *poly)
276*412f47f9SXin Li {
277*412f47f9SXin Li VTYPE p2_15 = VWRAP (pw_horner_13) (pg, x, x2, poly + 2);
278*412f47f9SXin Li VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
279*412f47f9SXin Li return svmla_x (pg, p01, x2, p2_15);
280*412f47f9SXin Li }
VWRAP(pw_horner_16)281*412f47f9SXin Li static inline VTYPE VWRAP (pw_horner_16) (svbool_t pg, VTYPE x, VTYPE x2,
282*412f47f9SXin Li const STYPE *poly)
283*412f47f9SXin Li {
284*412f47f9SXin Li VTYPE p2_16 = VWRAP (pw_horner_14) (pg, x, x2, poly + 2);
285*412f47f9SXin Li VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
286*412f47f9SXin Li return svmla_x (pg, p01, x2, p2_16);
287*412f47f9SXin Li }
VWRAP(pw_horner_17)288*412f47f9SXin Li static inline VTYPE VWRAP (pw_horner_17) (svbool_t pg, VTYPE x, VTYPE x2,
289*412f47f9SXin Li const STYPE *poly)
290*412f47f9SXin Li {
291*412f47f9SXin Li VTYPE p2_17 = VWRAP (pw_horner_15) (pg, x, x2, poly + 2);
292*412f47f9SXin Li VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
293*412f47f9SXin Li return svmla_x (pg, p01, x2, p2_17);
294*412f47f9SXin Li }
VWRAP(pw_horner_18)295*412f47f9SXin Li static inline VTYPE VWRAP (pw_horner_18) (svbool_t pg, VTYPE x, VTYPE x2,
296*412f47f9SXin Li const STYPE *poly)
297*412f47f9SXin Li {
298*412f47f9SXin Li VTYPE p2_18 = VWRAP (pw_horner_16) (pg, x, x2, poly + 2);
299*412f47f9SXin Li VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
300*412f47f9SXin Li return svmla_x (pg, p01, x2, p2_18);
301*412f47f9SXin Li }
302