1*f3782652STreehugger Robot /*
2*f3782652STreehugger Robot
3*f3782652STreehugger Robot Copyright (c) 2009, 2010, 2011, 2012, 2013 STMicroelectronics
4*f3782652STreehugger Robot Written by Christophe Lyon
5*f3782652STreehugger Robot
6*f3782652STreehugger Robot Permission is hereby granted, free of charge, to any person obtaining a copy
7*f3782652STreehugger Robot of this software and associated documentation files (the "Software"), to deal
8*f3782652STreehugger Robot in the Software without restriction, including without limitation the rights
9*f3782652STreehugger Robot to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10*f3782652STreehugger Robot copies of the Software, and to permit persons to whom the Software is
11*f3782652STreehugger Robot furnished to do so, subject to the following conditions:
12*f3782652STreehugger Robot
13*f3782652STreehugger Robot The above copyright notice and this permission notice shall be included in
14*f3782652STreehugger Robot all copies or substantial portions of the Software.
15*f3782652STreehugger Robot
16*f3782652STreehugger Robot THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*f3782652STreehugger Robot IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*f3782652STreehugger Robot FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*f3782652STreehugger Robot AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*f3782652STreehugger Robot LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*f3782652STreehugger Robot OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22*f3782652STreehugger Robot THE SOFTWARE.
23*f3782652STreehugger Robot
24*f3782652STreehugger Robot */
25*f3782652STreehugger Robot
26*f3782652STreehugger Robot #ifndef _STM_ARM_NEON_REF_H_
27*f3782652STreehugger Robot #define _STM_ARM_NEON_REF_H_
28*f3782652STreehugger Robot
29*f3782652STreehugger Robot #if defined(__cplusplus)
30*f3782652STreehugger Robot #include <cstdio>
31*f3782652STreehugger Robot #include <cinttypes>
32*f3782652STreehugger Robot #include <cstring>
33*f3782652STreehugger Robot #else
34*f3782652STreehugger Robot #include <stdio.h>
35*f3782652STreehugger Robot #if defined(_MSC_VER)
36*f3782652STreehugger Robot #include "msinttypes.h"
37*f3782652STreehugger Robot #include <float.h> /* for isnan() ... */
38*f3782652STreehugger Robot static int32_t _ptrNan[]={0x7fc00000L};
39*f3782652STreehugger Robot #define NAN (*(float*)_ptrNan)
40*f3782652STreehugger Robot static int32_t _ptrInf[]={0x7f800000L};
41*f3782652STreehugger Robot #define INFINITY (*(float*)_ptrInf)
42*f3782652STreehugger Robot #define HUGE_VALF INFINITY
43*f3782652STreehugger Robot #else
44*f3782652STreehugger Robot #include <inttypes.h>
45*f3782652STreehugger Robot #endif
46*f3782652STreehugger Robot #include <string.h>
47*f3782652STreehugger Robot #endif
48*f3782652STreehugger Robot
49*f3782652STreehugger Robot #define xSTR(X) #X
50*f3782652STreehugger Robot #define STR(X) xSTR(X)
51*f3782652STreehugger Robot
52*f3782652STreehugger Robot #define xNAME1(V,T) V ## _ ## T
53*f3782652STreehugger Robot #define xNAME(V,T) xNAME1(V,T)
54*f3782652STreehugger Robot
55*f3782652STreehugger Robot #define VAR(V,T,W) xNAME(V,T##W)
56*f3782652STreehugger Robot #define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W)
57*f3782652STreehugger Robot
58*f3782652STreehugger Robot #define VECT_NAME(T, W, N) T##W##x##N
59*f3782652STreehugger Robot #define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L
60*f3782652STreehugger Robot #define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t)
61*f3782652STreehugger Robot #define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t)
62*f3782652STreehugger Robot
63*f3782652STreehugger Robot #define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N))
64*f3782652STreehugger Robot #define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N)
65*f3782652STreehugger Robot
66*f3782652STreehugger Robot /* This one is used for padding between input buffers. */
67*f3782652STreehugger Robot #define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42;
68*f3782652STreehugger Robot
69*f3782652STreehugger Robot /* Array declarations. */
70*f3782652STreehugger Robot #define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N]
71*f3782652STreehugger Robot #define ARRAY4(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[4]
72*f3782652STreehugger Robot
73*f3782652STreehugger Robot /* Arrays of vectors. */
74*f3782652STreehugger Robot #define VECT_ARRAY_VAR(V,T,W,N,L) xNAME(V,VECT_ARRAY_NAME(T,W,N,L))
75*f3782652STreehugger Robot #define VECT_ARRAY(V, T, W, N, L) T##W##_t VECT_ARRAY_VAR(V,T,W,N,L)[N*L]
76*f3782652STreehugger Robot
77*f3782652STreehugger Robot static int result_idx = 0;
78*f3782652STreehugger Robot #define DUMP(MSG,T,W,N,FMT) \
79*f3782652STreehugger Robot fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
80*f3782652STreehugger Robot STR(VECT_VAR(result, T, W, N))); \
81*f3782652STreehugger Robot for(i=0; i<N ; i++) \
82*f3782652STreehugger Robot { \
83*f3782652STreehugger Robot fprintf(ref_file, "%" FMT ", ", VECT_VAR(result, T, W, N)[i]); \
84*f3782652STreehugger Robot } \
85*f3782652STreehugger Robot fprintf(ref_file, " }\n"); \
86*f3782652STreehugger Robot DUMP4GCC(MSG,T,W,N,FMT);
87*f3782652STreehugger Robot
88*f3782652STreehugger Robot /* Use casts for remove sign bits */
89*f3782652STreehugger Robot #define DUMP_POLY(MSG,T,W,N,FMT) \
90*f3782652STreehugger Robot fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
91*f3782652STreehugger Robot STR(VECT_VAR(result, T, W, N))); \
92*f3782652STreehugger Robot for(i=0; i<N ; i++) \
93*f3782652STreehugger Robot { \
94*f3782652STreehugger Robot fprintf(ref_file, "%" FMT ", ", \
95*f3782652STreehugger Robot (uint##W##_t)VECT_VAR(result, T, W, N)[i]); \
96*f3782652STreehugger Robot } \
97*f3782652STreehugger Robot fprintf(ref_file, " }\n"); \
98*f3782652STreehugger Robot DUMP4GCC(MSG,T,W,N,FMT);
99*f3782652STreehugger Robot
100*f3782652STreehugger Robot #define DUMP_FP(MSG,T,W,N,FMT) \
101*f3782652STreehugger Robot fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
102*f3782652STreehugger Robot STR(VECT_VAR(result, T, W, N))); \
103*f3782652STreehugger Robot for(i=0; i<N ; i++) \
104*f3782652STreehugger Robot { \
105*f3782652STreehugger Robot union fp_operand { \
106*f3782652STreehugger Robot uint##W##_t i; \
107*f3782652STreehugger Robot float##W##_t f; \
108*f3782652STreehugger Robot } tmp; \
109*f3782652STreehugger Robot tmp.f = VECT_VAR(result, T, W, N)[i]; \
110*f3782652STreehugger Robot fprintf(ref_file, "%" FMT ", ", tmp.i); \
111*f3782652STreehugger Robot } \
112*f3782652STreehugger Robot fprintf(ref_file, " }\n"); \
113*f3782652STreehugger Robot DUMP4GCC_FP(MSG,T,W,N,FMT);
114*f3782652STreehugger Robot
115*f3782652STreehugger Robot #define DUMP4GCC(MSG,T,W,N,FMT) \
116*f3782652STreehugger Robot fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \
117*f3782652STreehugger Robot STR(T), W, N); \
118*f3782652STreehugger Robot for(i=0; i<(N-1) ; i++) \
119*f3782652STreehugger Robot { \
120*f3782652STreehugger Robot if (W < 32) { \
121*f3782652STreehugger Robot uint32_t tmp = (uint##W##_t) VECT_VAR(result, T, W, N)[i]; \
122*f3782652STreehugger Robot fprintf(gcc_tests_file, "0x%" FMT ", ", tmp); \
123*f3782652STreehugger Robot } else { \
124*f3782652STreehugger Robot fprintf(gcc_tests_file, "0x%" FMT ", ", VECT_VAR(result, T, W, N)[i]); \
125*f3782652STreehugger Robot } \
126*f3782652STreehugger Robot } \
127*f3782652STreehugger Robot if (W < 32) { \
128*f3782652STreehugger Robot uint32_t tmp = (uint##W##_t) VECT_VAR(result, T, W, N)[i]; \
129*f3782652STreehugger Robot fprintf(gcc_tests_file, "0x%" FMT, tmp); \
130*f3782652STreehugger Robot } else { \
131*f3782652STreehugger Robot fprintf(gcc_tests_file, "0x%" FMT, VECT_VAR(result, T, W, N)[i]); \
132*f3782652STreehugger Robot } \
133*f3782652STreehugger Robot fprintf(gcc_tests_file, " };\n");
134*f3782652STreehugger Robot
135*f3782652STreehugger Robot #define DUMP4GCC_FP(MSG,T,W,N,FMT) \
136*f3782652STreehugger Robot { \
137*f3782652STreehugger Robot union fp_operand { \
138*f3782652STreehugger Robot uint##W##_t i; \
139*f3782652STreehugger Robot float##W##_t f; \
140*f3782652STreehugger Robot } tmp; \
141*f3782652STreehugger Robot fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \
142*f3782652STreehugger Robot "hfloat", W, N); \
143*f3782652STreehugger Robot for(i=0; i<(N-1) ; i++) \
144*f3782652STreehugger Robot { \
145*f3782652STreehugger Robot tmp.f = VECT_VAR(result, T, W, N)[i]; \
146*f3782652STreehugger Robot fprintf(gcc_tests_file, "0x%" FMT ", ", tmp.i); \
147*f3782652STreehugger Robot } \
148*f3782652STreehugger Robot tmp.f = VECT_VAR(result, T, W, N)[i]; \
149*f3782652STreehugger Robot fprintf(gcc_tests_file, "0x%" FMT, tmp.i); \
150*f3782652STreehugger Robot fprintf(gcc_tests_file, " };\n"); \
151*f3782652STreehugger Robot }
152*f3782652STreehugger Robot
153*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
154*f3782652STreehugger Robot #define float16_t __fp16
155*f3782652STreehugger Robot
156*f3782652STreehugger Robot #define DUMP_FP16(MSG,T,W,N,FMT) \
157*f3782652STreehugger Robot fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
158*f3782652STreehugger Robot STR(VECT_VAR(result, T, W, N))); \
159*f3782652STreehugger Robot for(i=0; i<N ; i++) \
160*f3782652STreehugger Robot { \
161*f3782652STreehugger Robot uint##W##_t tmp; \
162*f3782652STreehugger Robot tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \
163*f3782652STreehugger Robot fprintf(ref_file, "%" FMT ", ", tmp); \
164*f3782652STreehugger Robot } \
165*f3782652STreehugger Robot fprintf(ref_file, " }\n"); \
166*f3782652STreehugger Robot DUMP4GCC_FP16(MSG,T,W,N,FMT);
167*f3782652STreehugger Robot
168*f3782652STreehugger Robot #define DUMP4GCC_FP16(MSG,T,W,N,FMT) \
169*f3782652STreehugger Robot { \
170*f3782652STreehugger Robot uint##W##_t tmp; \
171*f3782652STreehugger Robot fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \
172*f3782652STreehugger Robot "hfloat", W, N); \
173*f3782652STreehugger Robot for(i=0; i<(N-1) ; i++) \
174*f3782652STreehugger Robot { \
175*f3782652STreehugger Robot tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \
176*f3782652STreehugger Robot fprintf(gcc_tests_file, "0x%" FMT ", ", tmp); \
177*f3782652STreehugger Robot } \
178*f3782652STreehugger Robot tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \
179*f3782652STreehugger Robot fprintf(gcc_tests_file, "0x%" FMT, tmp); \
180*f3782652STreehugger Robot fprintf(gcc_tests_file, " };\n"); \
181*f3782652STreehugger Robot }
182*f3782652STreehugger Robot #endif
183*f3782652STreehugger Robot
184*f3782652STreehugger Robot #define CLEAN_PATTERN_8 0x33
185*f3782652STreehugger Robot #define CLEAN_PATTERN_16 0x3333
186*f3782652STreehugger Robot #define CLEAN_PATTERN_32 0x33333333
187*f3782652STreehugger Robot #define CLEAN_PATTERN_64 0x3333333333333333
188*f3782652STreehugger Robot
189*f3782652STreehugger Robot #define CLEAN(VAR,T,W,N) \
190*f3782652STreehugger Robot memset(VECT_VAR(VAR, T, W, N), \
191*f3782652STreehugger Robot CLEAN_PATTERN_8, \
192*f3782652STreehugger Robot sizeof(VECT_VAR(VAR, T, W, N)));
193*f3782652STreehugger Robot
194*f3782652STreehugger Robot #define CHECK_INIT(VAR,Q,T1,T2,W,N) \
195*f3782652STreehugger Robot { \
196*f3782652STreehugger Robot ARRAY(check_result, T1, W, N); \
197*f3782652STreehugger Robot int i; \
198*f3782652STreehugger Robot \
199*f3782652STreehugger Robot vst1##Q##_##T2##W(VECT_VAR(check_result, T1, W, N), \
200*f3782652STreehugger Robot VECT_VAR(VAR, T1, W, N)); \
201*f3782652STreehugger Robot for(i=0; i<N ; i++) \
202*f3782652STreehugger Robot { \
203*f3782652STreehugger Robot /*if (VECT_VAR(check_result, T1, W, N)[i] == CLEAN_PATTERN_##W)*/ { \
204*f3782652STreehugger Robot fprintf(stdout, "%s:%d: %s[%d] unintialized! %#x\n", \
205*f3782652STreehugger Robot __FUNCTION__, __LINE__, \
206*f3782652STreehugger Robot STR(VECT_VAR(VAR, T1, W, N)), i, \
207*f3782652STreehugger Robot VECT_VAR(check_result, T1, W, N)[i]); \
208*f3782652STreehugger Robot } \
209*f3782652STreehugger Robot } \
210*f3782652STreehugger Robot }
211*f3782652STreehugger Robot
212*f3782652STreehugger Robot /* Generic declarations: */
213*f3782652STreehugger Robot extern FILE* log_file;
214*f3782652STreehugger Robot extern FILE* ref_file;
215*f3782652STreehugger Robot extern FILE* gcc_tests_file;
216*f3782652STreehugger Robot
217*f3782652STreehugger Robot /* Input buffers, one of each size */
218*f3782652STreehugger Robot extern ARRAY(buffer, int, 8, 8);
219*f3782652STreehugger Robot extern ARRAY(buffer, int, 16, 4);
220*f3782652STreehugger Robot extern ARRAY(buffer, int, 32, 2);
221*f3782652STreehugger Robot extern ARRAY(buffer, int, 64, 1);
222*f3782652STreehugger Robot extern ARRAY(buffer, uint, 8, 8);
223*f3782652STreehugger Robot extern ARRAY(buffer, uint, 16, 4);
224*f3782652STreehugger Robot extern ARRAY(buffer, uint, 32, 2);
225*f3782652STreehugger Robot extern ARRAY(buffer, uint, 64, 1);
226*f3782652STreehugger Robot extern ARRAY(buffer, poly, 8, 8);
227*f3782652STreehugger Robot extern ARRAY(buffer, poly, 16, 4);
228*f3782652STreehugger Robot extern ARRAY(buffer, float, 32, 2);
229*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
230*f3782652STreehugger Robot extern ARRAY(buffer, float, 16, 4);
231*f3782652STreehugger Robot #endif
232*f3782652STreehugger Robot extern ARRAY(buffer, int, 8, 16);
233*f3782652STreehugger Robot extern ARRAY(buffer, int, 16, 8);
234*f3782652STreehugger Robot extern ARRAY(buffer, int, 32, 4);
235*f3782652STreehugger Robot extern ARRAY(buffer, int, 64, 2);
236*f3782652STreehugger Robot extern ARRAY(buffer, uint, 8, 16);
237*f3782652STreehugger Robot extern ARRAY(buffer, uint, 16, 8);
238*f3782652STreehugger Robot extern ARRAY(buffer, uint, 32, 4);
239*f3782652STreehugger Robot extern ARRAY(buffer, uint, 64, 2);
240*f3782652STreehugger Robot extern ARRAY(buffer, poly, 8, 16);
241*f3782652STreehugger Robot extern ARRAY(buffer, poly, 16, 8);
242*f3782652STreehugger Robot extern ARRAY(buffer, float, 32, 4);
243*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
244*f3782652STreehugger Robot extern ARRAY(buffer, float, 16, 8);
245*f3782652STreehugger Robot #endif
246*f3782652STreehugger Robot
247*f3782652STreehugger Robot /* The tests for vld1_dup and vdup expect at least 4 entries in the
248*f3782652STreehugger Robot input buffer, so force 1- and 2-elements initializers to have 4
249*f3782652STreehugger Robot entries. */
250*f3782652STreehugger Robot extern ARRAY(buffer_dup, int, 8, 8);
251*f3782652STreehugger Robot extern ARRAY(buffer_dup, int, 16, 4);
252*f3782652STreehugger Robot extern ARRAY4(buffer_dup, int, 32, 2);
253*f3782652STreehugger Robot extern ARRAY4(buffer_dup, int, 64, 1);
254*f3782652STreehugger Robot extern ARRAY(buffer_dup, uint, 8, 8);
255*f3782652STreehugger Robot extern ARRAY(buffer_dup, uint, 16, 4);
256*f3782652STreehugger Robot extern ARRAY4(buffer_dup, uint, 32, 2);
257*f3782652STreehugger Robot extern ARRAY4(buffer_dup, uint, 64, 1);
258*f3782652STreehugger Robot extern ARRAY(buffer_dup, poly, 8, 8);
259*f3782652STreehugger Robot extern ARRAY(buffer_dup, poly, 16, 4);
260*f3782652STreehugger Robot extern ARRAY4(buffer_dup, float, 32, 2);
261*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
262*f3782652STreehugger Robot extern ARRAY4(buffer_dup, float, 16, 4);
263*f3782652STreehugger Robot #endif
264*f3782652STreehugger Robot extern ARRAY(buffer_dup, int, 8, 16);
265*f3782652STreehugger Robot extern ARRAY(buffer_dup, int, 16, 8);
266*f3782652STreehugger Robot extern ARRAY(buffer_dup, int, 32, 4);
267*f3782652STreehugger Robot extern ARRAY4(buffer_dup, int, 64, 2);
268*f3782652STreehugger Robot extern ARRAY(buffer_dup, uint, 8, 16);
269*f3782652STreehugger Robot extern ARRAY(buffer_dup, uint, 16, 8);
270*f3782652STreehugger Robot extern ARRAY(buffer_dup, uint, 32, 4);
271*f3782652STreehugger Robot extern ARRAY4(buffer_dup, uint, 64, 2);
272*f3782652STreehugger Robot extern ARRAY(buffer_dup, poly, 8, 16);
273*f3782652STreehugger Robot extern ARRAY(buffer_dup, poly, 16, 8);
274*f3782652STreehugger Robot extern ARRAY(buffer_dup, float, 32, 4);
275*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
276*f3782652STreehugger Robot extern ARRAY(buffer_dup, float, 16, 8);
277*f3782652STreehugger Robot #endif
278*f3782652STreehugger Robot
279*f3782652STreehugger Robot /* Input buffers for vld2, one of each size */
280*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, int, 8, 8, 2);
281*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, int, 16, 4, 2);
282*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, int, 32, 2, 2);
283*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, int, 64, 1, 2);
284*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, uint, 8, 8, 2);
285*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, uint, 16, 4, 2);
286*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, uint, 32, 2, 2);
287*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, uint, 64, 1, 2);
288*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, poly, 8, 8, 2);
289*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, poly, 16, 4, 2);
290*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, float, 32, 2, 2);
291*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
292*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, float, 16, 4, 2);
293*f3782652STreehugger Robot #endif
294*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, int, 8, 16, 2);
295*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, int, 16, 8, 2);
296*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, int, 32, 4, 2);
297*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, int, 64, 2, 2);
298*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, uint, 8, 16, 2);
299*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, uint, 16, 8, 2);
300*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, uint, 32, 4, 2);
301*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, uint, 64, 2, 2);
302*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, poly, 8, 16, 2);
303*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, poly, 16, 8, 2);
304*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, float, 32, 4, 2);
305*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
306*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, float, 16, 8, 2);
307*f3782652STreehugger Robot #endif
308*f3782652STreehugger Robot
309*f3782652STreehugger Robot /* Input buffers for vld3, one of each size */
310*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, int, 8, 8, 3);
311*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, int, 16, 4, 3);
312*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, int, 32, 2, 3);
313*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, int, 64, 1, 3);
314*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, uint, 8, 8, 3);
315*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, uint, 16, 4, 3);
316*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, uint, 32, 2, 3);
317*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, uint, 64, 1, 3);
318*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, poly, 8, 8, 3);
319*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, poly, 16, 4, 3);
320*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, float, 32, 2, 3);
321*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
322*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, float, 16, 4, 3);
323*f3782652STreehugger Robot #endif
324*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, int, 8, 16, 3);
325*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, int, 16, 8, 3);
326*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, int, 32, 4, 3);
327*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, int, 64, 2, 3);
328*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, uint, 8, 16, 3);
329*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, uint, 16, 8, 3);
330*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, uint, 32, 4, 3);
331*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, uint, 64, 2, 3);
332*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, poly, 8, 16, 3);
333*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, poly, 16, 8, 3);
334*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, float, 32, 4, 3);
335*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
336*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, float, 16, 8, 3);
337*f3782652STreehugger Robot #endif
338*f3782652STreehugger Robot
339*f3782652STreehugger Robot /* Input buffers for vld4, one of each size */
340*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, int, 8, 8, 4);
341*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, int, 16, 4, 4);
342*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, int, 32, 2, 4);
343*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, int, 64, 1, 4);
344*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, uint, 8, 8, 4);
345*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, uint, 16, 4, 4);
346*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, uint, 32, 2, 4);
347*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, uint, 64, 1, 4);
348*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, poly, 8, 8, 4);
349*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, poly, 16, 4, 4);
350*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, float, 32, 2, 4);
351*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
352*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, float, 16, 4, 4);
353*f3782652STreehugger Robot #endif
354*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, int, 8, 16, 4);
355*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, int, 16, 8, 4);
356*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, int, 32, 4, 4);
357*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, int, 64, 2, 4);
358*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, uint, 8, 16, 4);
359*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, uint, 16, 8, 4);
360*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, uint, 32, 4, 4);
361*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, uint, 64, 2, 4);
362*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, poly, 8, 16, 4);
363*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, poly, 16, 8, 4);
364*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, float, 32, 4, 4);
365*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
366*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, float, 16, 8, 4);
367*f3782652STreehugger Robot #endif
368*f3782652STreehugger Robot
369*f3782652STreehugger Robot /* Input buffers for vld2_lane */
370*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, int, 8, 2)[2];
371*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, int, 16, 2)[2];
372*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, int, 32, 2)[2];
373*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, int, 64, 2)[2];
374*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, uint, 8, 2)[2];
375*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, uint, 16, 2)[2];
376*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, uint, 32, 2)[2];
377*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, uint, 64, 2)[2];
378*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, poly, 8, 2)[2];
379*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, poly, 16, 2)[2];
380*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, float, 32, 2)[2];
381*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
382*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, float, 16, 2)[2];
383*f3782652STreehugger Robot #endif
384*f3782652STreehugger Robot
385*f3782652STreehugger Robot /* Input buffers for vld3_lane */
386*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, int, 8, 3)[3];
387*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, int, 16, 3)[3];
388*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, int, 32, 3)[3];
389*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, int, 64, 3)[3];
390*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, uint, 8, 3)[3];
391*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, uint, 16, 3)[3];
392*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, uint, 32, 3)[3];
393*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, uint, 64, 3)[3];
394*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, poly, 8, 3)[3];
395*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, poly, 16, 3)[3];
396*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, float, 32, 3)[3];
397*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
398*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, float, 16, 3)[3];
399*f3782652STreehugger Robot #endif
400*f3782652STreehugger Robot
401*f3782652STreehugger Robot /* Input buffers for vld4_lane */
402*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, int, 8, 4)[4];
403*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, int, 16, 4)[4];
404*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, int, 32, 4)[4];
405*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, int, 64, 4)[4];
406*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, uint, 8, 4)[4];
407*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, uint, 16, 4)[4];
408*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, uint, 32, 4)[4];
409*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, uint, 64, 4)[4];
410*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, poly, 8, 4)[4];
411*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, poly, 16, 4)[4];
412*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, float, 32, 4)[4];
413*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
414*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, float, 16, 4)[4];
415*f3782652STreehugger Robot #endif
416*f3782652STreehugger Robot
417*f3782652STreehugger Robot /* Output buffers, one of each size */
418*f3782652STreehugger Robot static ARRAY(result, int, 8, 8);
419*f3782652STreehugger Robot static ARRAY(result, int, 16, 4);
420*f3782652STreehugger Robot static ARRAY(result, int, 32, 2);
421*f3782652STreehugger Robot static ARRAY(result, int, 64, 1);
422*f3782652STreehugger Robot static ARRAY(result, uint, 8, 8);
423*f3782652STreehugger Robot static ARRAY(result, uint, 16, 4);
424*f3782652STreehugger Robot static ARRAY(result, uint, 32, 2);
425*f3782652STreehugger Robot static ARRAY(result, uint, 64, 1);
426*f3782652STreehugger Robot static ARRAY(result, poly, 8, 8);
427*f3782652STreehugger Robot static ARRAY(result, poly, 16, 4);
428*f3782652STreehugger Robot static ARRAY(result, float, 32, 2);
429*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
430*f3782652STreehugger Robot static ARRAY(result, float, 16, 4);
431*f3782652STreehugger Robot #endif
432*f3782652STreehugger Robot static ARRAY(result, int, 8, 16);
433*f3782652STreehugger Robot static ARRAY(result, int, 16, 8);
434*f3782652STreehugger Robot static ARRAY(result, int, 32, 4);
435*f3782652STreehugger Robot static ARRAY(result, int, 64, 2);
436*f3782652STreehugger Robot static ARRAY(result, uint, 8, 16);
437*f3782652STreehugger Robot static ARRAY(result, uint, 16, 8);
438*f3782652STreehugger Robot static ARRAY(result, uint, 32, 4);
439*f3782652STreehugger Robot static ARRAY(result, uint, 64, 2);
440*f3782652STreehugger Robot static ARRAY(result, poly, 8, 16);
441*f3782652STreehugger Robot static ARRAY(result, poly, 16, 8);
442*f3782652STreehugger Robot static ARRAY(result, float, 32, 4);
443*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
444*f3782652STreehugger Robot static ARRAY(result, float, 16, 8);
445*f3782652STreehugger Robot #endif
446*f3782652STreehugger Robot
447*f3782652STreehugger Robot /* Dump results (generic function) */
dump_results(char * test_name)448*f3782652STreehugger Robot static void dump_results (char *test_name)
449*f3782652STreehugger Robot {
450*f3782652STreehugger Robot int i;
451*f3782652STreehugger Robot
452*f3782652STreehugger Robot fprintf(ref_file, "\n%s output:\n", test_name);
453*f3782652STreehugger Robot fprintf(gcc_tests_file, "\n%s output:\n", test_name);
454*f3782652STreehugger Robot
455*f3782652STreehugger Robot DUMP(test_name, int, 8, 8, PRId8);
456*f3782652STreehugger Robot DUMP(test_name, int, 16, 4, PRId16);
457*f3782652STreehugger Robot DUMP(test_name, int, 32, 2, PRId32);
458*f3782652STreehugger Robot DUMP(test_name, int, 64, 1, PRId64);
459*f3782652STreehugger Robot DUMP(test_name, uint, 8, 8, PRIu8);
460*f3782652STreehugger Robot DUMP(test_name, uint, 16, 4, PRIu16);
461*f3782652STreehugger Robot DUMP(test_name, uint, 32, 2, PRIu32);
462*f3782652STreehugger Robot DUMP(test_name, uint, 64, 1, PRIu64);
463*f3782652STreehugger Robot DUMP_POLY(test_name, poly, 8, 8, PRIu8);
464*f3782652STreehugger Robot DUMP_POLY(test_name, poly, 16, 4, PRIu16);
465*f3782652STreehugger Robot DUMP_FP(test_name, float, 32, 2, PRIx32);
466*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
467*f3782652STreehugger Robot DUMP_FP16(test_name, float, 16, 4, PRIu16);
468*f3782652STreehugger Robot #endif
469*f3782652STreehugger Robot
470*f3782652STreehugger Robot DUMP(test_name, int, 8, 16, PRId8);
471*f3782652STreehugger Robot DUMP(test_name, int, 16, 8, PRId16);
472*f3782652STreehugger Robot DUMP(test_name, int, 32, 4, PRId32);
473*f3782652STreehugger Robot DUMP(test_name, int, 64, 2, PRId64);
474*f3782652STreehugger Robot DUMP(test_name, uint, 8, 16, PRIu8);
475*f3782652STreehugger Robot DUMP(test_name, uint, 16, 8, PRIu16);
476*f3782652STreehugger Robot DUMP(test_name, uint, 32, 4, PRIu32);
477*f3782652STreehugger Robot DUMP(test_name, uint, 64, 2, PRIu64);
478*f3782652STreehugger Robot DUMP_POLY(test_name, poly, 8, 16, PRIu8);
479*f3782652STreehugger Robot DUMP_POLY(test_name, poly, 16, 8, PRIu16);
480*f3782652STreehugger Robot DUMP_FP(test_name, float, 32, 4, PRIx32);
481*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
482*f3782652STreehugger Robot DUMP_FP16(test_name, float, 16, 8, PRIu16);
483*f3782652STreehugger Robot #endif
484*f3782652STreehugger Robot }
485*f3782652STreehugger Robot
486*f3782652STreehugger Robot /* Dump results in hex (generic function) */
dump_results_hex2(const char * test_name,const char * comment)487*f3782652STreehugger Robot static void dump_results_hex2 (const char *test_name, const char* comment)
488*f3782652STreehugger Robot {
489*f3782652STreehugger Robot int i;
490*f3782652STreehugger Robot
491*f3782652STreehugger Robot fprintf(ref_file, "\n%s%s output:\n", test_name, comment);
492*f3782652STreehugger Robot fprintf(gcc_tests_file, "\n%s%s output:\n", test_name, comment);
493*f3782652STreehugger Robot
494*f3782652STreehugger Robot DUMP(test_name, int, 8, 8, PRIx8);
495*f3782652STreehugger Robot DUMP(test_name, int, 16, 4, PRIx16);
496*f3782652STreehugger Robot DUMP(test_name, int, 32, 2, PRIx32);
497*f3782652STreehugger Robot DUMP(test_name, int, 64, 1, PRIx64);
498*f3782652STreehugger Robot DUMP(test_name, uint, 8, 8, PRIx8);
499*f3782652STreehugger Robot DUMP(test_name, uint, 16, 4, PRIx16);
500*f3782652STreehugger Robot DUMP(test_name, uint, 32, 2, PRIx32);
501*f3782652STreehugger Robot DUMP(test_name, uint, 64, 1, PRIx64);
502*f3782652STreehugger Robot DUMP_POLY(test_name, poly, 8, 8, PRIx8);
503*f3782652STreehugger Robot DUMP_POLY(test_name, poly, 16, 4, PRIx16);
504*f3782652STreehugger Robot DUMP_FP(test_name, float, 32, 2, PRIx32);
505*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
506*f3782652STreehugger Robot DUMP_FP16(test_name, float, 16, 4, PRIx16);
507*f3782652STreehugger Robot #endif
508*f3782652STreehugger Robot
509*f3782652STreehugger Robot DUMP(test_name, int, 8, 16, PRIx8);
510*f3782652STreehugger Robot DUMP(test_name, int, 16, 8, PRIx16);
511*f3782652STreehugger Robot DUMP(test_name, int, 32, 4, PRIx32);
512*f3782652STreehugger Robot DUMP(test_name, int, 64, 2, PRIx64);
513*f3782652STreehugger Robot DUMP(test_name, uint, 8, 16, PRIx8);
514*f3782652STreehugger Robot DUMP(test_name, uint, 16, 8, PRIx16);
515*f3782652STreehugger Robot DUMP(test_name, uint, 32, 4, PRIx32);
516*f3782652STreehugger Robot DUMP(test_name, uint, 64, 2, PRIx64);
517*f3782652STreehugger Robot DUMP_POLY(test_name, poly, 8, 16, PRIx8);
518*f3782652STreehugger Robot DUMP_POLY(test_name, poly, 16, 8, PRIx16);
519*f3782652STreehugger Robot DUMP_FP(test_name, float, 32, 4, PRIx32);
520*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
521*f3782652STreehugger Robot DUMP_FP16(test_name, float, 16, 8, PRIx16);
522*f3782652STreehugger Robot #endif
523*f3782652STreehugger Robot }
524*f3782652STreehugger Robot
dump_results_hex(const char * test_name)525*f3782652STreehugger Robot static void dump_results_hex (const char *test_name)
526*f3782652STreehugger Robot {
527*f3782652STreehugger Robot dump_results_hex2(test_name, "");
528*f3782652STreehugger Robot }
529*f3782652STreehugger Robot
530*f3782652STreehugger Robot #ifndef STM_ARM_NEON_MODELS
531*f3782652STreehugger Robot
532*f3782652STreehugger Robot /* This hack is to cope with various compilers/libc which may not
533*f3782652STreehugger Robot provide endian.h or cross-compilers such as llvm which includes the
534*f3782652STreehugger Robot host's endian.h. */
535*f3782652STreehugger Robot #ifndef __arm__
536*f3782652STreehugger Robot #include <endian.h>
537*f3782652STreehugger Robot #define THIS_ENDIAN __BYTE_ORDER
538*f3782652STreehugger Robot #else /* __arm__ */
539*f3782652STreehugger Robot #ifdef __ARMEL__
540*f3782652STreehugger Robot #define THIS_ENDIAN __LITTLE_ENDIAN
541*f3782652STreehugger Robot #else /* __ARMEL__ */
542*f3782652STreehugger Robot #define THIS_ENDIAN __BIG_ENDIAN
543*f3782652STreehugger Robot #endif
544*f3782652STreehugger Robot #endif /* __arm__ */
545*f3782652STreehugger Robot
546*f3782652STreehugger Robot #if THIS_ENDIAN == __LITTLE_ENDIAN
547*f3782652STreehugger Robot
548*f3782652STreehugger Robot typedef union {
549*f3782652STreehugger Robot struct {
550*f3782652STreehugger Robot int _xxx:27;
551*f3782652STreehugger Robot unsigned int QC:1;
552*f3782652STreehugger Robot int V:1;
553*f3782652STreehugger Robot int C:1;
554*f3782652STreehugger Robot int Z:1;
555*f3782652STreehugger Robot int N:1;
556*f3782652STreehugger Robot } b;
557*f3782652STreehugger Robot unsigned int word;
558*f3782652STreehugger Robot } _ARM_FPSCR;
559*f3782652STreehugger Robot
560*f3782652STreehugger Robot #else /* __BIG_ENDIAN */
561*f3782652STreehugger Robot
562*f3782652STreehugger Robot typedef union {
563*f3782652STreehugger Robot struct {
564*f3782652STreehugger Robot int N:1;
565*f3782652STreehugger Robot int Z:1;
566*f3782652STreehugger Robot int C:1;
567*f3782652STreehugger Robot int V:1;
568*f3782652STreehugger Robot unsigned int QC:1;
569*f3782652STreehugger Robot int _dnm:27;
570*f3782652STreehugger Robot } b;
571*f3782652STreehugger Robot unsigned int word;
572*f3782652STreehugger Robot } _ARM_FPSCR;
573*f3782652STreehugger Robot
574*f3782652STreehugger Robot #endif /* __BIG_ENDIAN */
575*f3782652STreehugger Robot
576*f3782652STreehugger Robot #ifdef __ARMCC_VERSION
577*f3782652STreehugger Robot register _ARM_FPSCR _afpscr_for_qc __asm("fpscr");
578*f3782652STreehugger Robot # define Neon_Cumulative_Sat _afpscr_for_qc.b.QC
579*f3782652STreehugger Robot # define Set_Neon_Cumulative_Sat(x, depend) {Neon_Cumulative_Sat = (x);}
580*f3782652STreehugger Robot #else
581*f3782652STreehugger Robot /* GCC/ARM does not know this register */
582*f3782652STreehugger Robot # define Neon_Cumulative_Sat __read_neon_cumulative_sat()
583*f3782652STreehugger Robot /* We need a fake dependency to ensure correct ordering of asm
584*f3782652STreehugger Robot statements to preset the QC flag value, and Neon operators writing
585*f3782652STreehugger Robot to QC. */
586*f3782652STreehugger Robot #define Set_Neon_Cumulative_Sat(x, depend) \
587*f3782652STreehugger Robot __set_neon_cumulative_sat((x), (depend))
588*f3782652STreehugger Robot
589*f3782652STreehugger Robot # if defined(__aarch64__)
__read_neon_cumulative_sat(void)590*f3782652STreehugger Robot static volatile int __read_neon_cumulative_sat (void) {
591*f3782652STreehugger Robot _ARM_FPSCR _afpscr_for_qc;
592*f3782652STreehugger Robot asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc));
593*f3782652STreehugger Robot return _afpscr_for_qc.b.QC;
594*f3782652STreehugger Robot }
595*f3782652STreehugger Robot
596*f3782652STreehugger Robot #define __set_neon_cumulative_sat(x, depend) { \
597*f3782652STreehugger Robot _ARM_FPSCR _afpscr_for_qc; \
598*f3782652STreehugger Robot asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc)); \
599*f3782652STreehugger Robot _afpscr_for_qc.b.QC = x; \
600*f3782652STreehugger Robot asm volatile ("msr fpsr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \
601*f3782652STreehugger Robot }
602*f3782652STreehugger Robot
603*f3782652STreehugger Robot # else
__read_neon_cumulative_sat(void)604*f3782652STreehugger Robot static volatile int __read_neon_cumulative_sat (void) {
605*f3782652STreehugger Robot _ARM_FPSCR _afpscr_for_qc;
606*f3782652STreehugger Robot asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));
607*f3782652STreehugger Robot return _afpscr_for_qc.b.QC;
608*f3782652STreehugger Robot }
609*f3782652STreehugger Robot
610*f3782652STreehugger Robot #define __set_neon_cumulative_sat(x, depend) { \
611*f3782652STreehugger Robot _ARM_FPSCR _afpscr_for_qc; \
612*f3782652STreehugger Robot asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc)); \
613*f3782652STreehugger Robot _afpscr_for_qc.b.QC = x; \
614*f3782652STreehugger Robot asm volatile ("vmsr fpscr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \
615*f3782652STreehugger Robot }
616*f3782652STreehugger Robot
617*f3782652STreehugger Robot # endif
618*f3782652STreehugger Robot #endif
619*f3782652STreehugger Robot
620*f3782652STreehugger Robot #endif /* STM_ARM_NEON_MODELS */
621*f3782652STreehugger Robot
dump_neon_cumulative_sat(const char * msg,const char * name,const char * t1,int w,int n)622*f3782652STreehugger Robot static void dump_neon_cumulative_sat(const char* msg, const char *name,
623*f3782652STreehugger Robot const char* t1, int w, int n)
624*f3782652STreehugger Robot {
625*f3782652STreehugger Robot fprintf(ref_file, "%s:%d:%s Neon cumulative saturation %d\n", msg, result_idx++,
626*f3782652STreehugger Robot name, Neon_Cumulative_Sat);
627*f3782652STreehugger Robot fprintf(gcc_tests_file,
628*f3782652STreehugger Robot "int VECT_VAR(expected_cumulative_sat,%s,%d,%d) = %d;\n",
629*f3782652STreehugger Robot t1, w, n, Neon_Cumulative_Sat);
630*f3782652STreehugger Robot }
631*f3782652STreehugger Robot
632*f3782652STreehugger Robot /* Clean output buffers before execution */
clean_results(void)633*f3782652STreehugger Robot static void clean_results (void)
634*f3782652STreehugger Robot {
635*f3782652STreehugger Robot result_idx = 0;
636*f3782652STreehugger Robot CLEAN(result, int, 8, 8);
637*f3782652STreehugger Robot CLEAN(result, int, 16, 4);
638*f3782652STreehugger Robot CLEAN(result, int, 32, 2);
639*f3782652STreehugger Robot CLEAN(result, int, 64, 1);
640*f3782652STreehugger Robot CLEAN(result, uint, 8, 8);
641*f3782652STreehugger Robot CLEAN(result, uint, 16, 4);
642*f3782652STreehugger Robot CLEAN(result, uint, 32, 2);
643*f3782652STreehugger Robot CLEAN(result, uint, 64, 1);
644*f3782652STreehugger Robot CLEAN(result, poly, 8, 8);
645*f3782652STreehugger Robot CLEAN(result, poly, 16, 4);
646*f3782652STreehugger Robot CLEAN(result, float, 32, 2);
647*f3782652STreehugger Robot
648*f3782652STreehugger Robot CLEAN(result, int, 8, 16);
649*f3782652STreehugger Robot CLEAN(result, int, 16, 8);
650*f3782652STreehugger Robot CLEAN(result, int, 32, 4);
651*f3782652STreehugger Robot CLEAN(result, int, 64, 2);
652*f3782652STreehugger Robot CLEAN(result, uint, 8, 16);
653*f3782652STreehugger Robot CLEAN(result, uint, 16, 8);
654*f3782652STreehugger Robot CLEAN(result, uint, 32, 4);
655*f3782652STreehugger Robot CLEAN(result, uint, 64, 2);
656*f3782652STreehugger Robot CLEAN(result, poly, 8, 16);
657*f3782652STreehugger Robot CLEAN(result, poly, 16, 8);
658*f3782652STreehugger Robot CLEAN(result, float, 32, 4);
659*f3782652STreehugger Robot }
660*f3782652STreehugger Robot
661*f3782652STreehugger Robot
662*f3782652STreehugger Robot /* Helpers to declare variables of various types */
663*f3782652STreehugger Robot #define DECL_VARIABLE(VAR, T1, W, N) \
664*f3782652STreehugger Robot volatile VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
665*f3782652STreehugger Robot
666*f3782652STreehugger Robot #define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR) \
667*f3782652STreehugger Robot DECL_VARIABLE(VAR, int, 8, 8); \
668*f3782652STreehugger Robot DECL_VARIABLE(VAR, int, 16, 4); \
669*f3782652STreehugger Robot DECL_VARIABLE(VAR, int, 32, 2); \
670*f3782652STreehugger Robot DECL_VARIABLE(VAR, int, 64, 1)
671*f3782652STreehugger Robot
672*f3782652STreehugger Robot #define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR) \
673*f3782652STreehugger Robot DECL_VARIABLE(VAR, uint, 8, 8); \
674*f3782652STreehugger Robot DECL_VARIABLE(VAR, uint, 16, 4); \
675*f3782652STreehugger Robot DECL_VARIABLE(VAR, uint, 32, 2); \
676*f3782652STreehugger Robot DECL_VARIABLE(VAR, uint, 64, 1)
677*f3782652STreehugger Robot
678*f3782652STreehugger Robot #define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR) \
679*f3782652STreehugger Robot DECL_VARIABLE(VAR, int, 8, 16); \
680*f3782652STreehugger Robot DECL_VARIABLE(VAR, int, 16, 8); \
681*f3782652STreehugger Robot DECL_VARIABLE(VAR, int, 32, 4); \
682*f3782652STreehugger Robot DECL_VARIABLE(VAR, int, 64, 2)
683*f3782652STreehugger Robot
684*f3782652STreehugger Robot #define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR) \
685*f3782652STreehugger Robot DECL_VARIABLE(VAR, uint, 8, 16); \
686*f3782652STreehugger Robot DECL_VARIABLE(VAR, uint, 16, 8); \
687*f3782652STreehugger Robot DECL_VARIABLE(VAR, uint, 32, 4); \
688*f3782652STreehugger Robot DECL_VARIABLE(VAR, uint, 64, 2)
689*f3782652STreehugger Robot
690*f3782652STreehugger Robot #define DECL_VARIABLE_64BITS_VARIANTS(VAR) \
691*f3782652STreehugger Robot DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \
692*f3782652STreehugger Robot DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
693*f3782652STreehugger Robot DECL_VARIABLE(VAR, poly, 8, 8); \
694*f3782652STreehugger Robot DECL_VARIABLE(VAR, poly, 16, 4); \
695*f3782652STreehugger Robot DECL_VARIABLE(VAR, float, 32, 2)
696*f3782652STreehugger Robot
697*f3782652STreehugger Robot #define DECL_VARIABLE_128BITS_VARIANTS(VAR) \
698*f3782652STreehugger Robot DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR); \
699*f3782652STreehugger Robot DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
700*f3782652STreehugger Robot DECL_VARIABLE(VAR, poly, 8, 16); \
701*f3782652STreehugger Robot DECL_VARIABLE(VAR, poly, 16, 8); \
702*f3782652STreehugger Robot DECL_VARIABLE(VAR, float, 32, 4)
703*f3782652STreehugger Robot
704*f3782652STreehugger Robot #define DECL_VARIABLE_ALL_VARIANTS(VAR) \
705*f3782652STreehugger Robot DECL_VARIABLE_64BITS_VARIANTS(VAR); \
706*f3782652STreehugger Robot DECL_VARIABLE_128BITS_VARIANTS(VAR)
707*f3782652STreehugger Robot
708*f3782652STreehugger Robot #define DECL_VARIABLE_SIGNED_VARIANTS(VAR) \
709*f3782652STreehugger Robot DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \
710*f3782652STreehugger Robot DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)
711*f3782652STreehugger Robot
712*f3782652STreehugger Robot #define DECL_VARIABLE_UNSIGNED_VARIANTS(VAR) \
713*f3782652STreehugger Robot DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
714*f3782652STreehugger Robot DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)
715*f3782652STreehugger Robot
716*f3782652STreehugger Robot /* Helpers to initialize vectors */
717*f3782652STreehugger Robot #define VDUP(VAR, Q, T1, T2, W, N, V) \
718*f3782652STreehugger Robot VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
719*f3782652STreehugger Robot
720*f3782652STreehugger Robot #define TEST_VSET_LANE(VAR, Q, T1, T2, W, N, L, V) \
721*f3782652STreehugger Robot VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V, \
722*f3782652STreehugger Robot VECT_VAR(VAR, T1, W, N), \
723*f3782652STreehugger Robot L)
724*f3782652STreehugger Robot
725*f3782652STreehugger Robot /* We need to load initial values first, so rely on VLD1 */
726*f3782652STreehugger Robot #define VLOAD(VAR, BUF, Q, T1, T2, W, N) \
727*f3782652STreehugger Robot VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N))
728*f3782652STreehugger Robot
729*f3782652STreehugger Robot /* Helpers for macros with 1 constant and 5 variable arguments */
730*f3782652STreehugger Robot #define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \
731*f3782652STreehugger Robot MACRO(VAR, , int, s, 8, 8); \
732*f3782652STreehugger Robot MACRO(VAR, , int, s, 16, 4); \
733*f3782652STreehugger Robot MACRO(VAR, , int, s, 32, 2); \
734*f3782652STreehugger Robot MACRO(VAR, , int, s, 64, 1)
735*f3782652STreehugger Robot
736*f3782652STreehugger Robot #define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR) \
737*f3782652STreehugger Robot MACRO(VAR, , uint, u, 8, 8); \
738*f3782652STreehugger Robot MACRO(VAR, , uint, u, 16, 4); \
739*f3782652STreehugger Robot MACRO(VAR, , uint, u, 32, 2); \
740*f3782652STreehugger Robot MACRO(VAR, , uint, u, 64, 1)
741*f3782652STreehugger Robot
742*f3782652STreehugger Robot #define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \
743*f3782652STreehugger Robot MACRO(VAR, q, int, s, 8, 16); \
744*f3782652STreehugger Robot MACRO(VAR, q, int, s, 16, 8); \
745*f3782652STreehugger Robot MACRO(VAR, q, int, s, 32, 4); \
746*f3782652STreehugger Robot MACRO(VAR, q, int, s, 64, 2)
747*f3782652STreehugger Robot
748*f3782652STreehugger Robot #define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR) \
749*f3782652STreehugger Robot MACRO(VAR, q, uint, u, 8, 16); \
750*f3782652STreehugger Robot MACRO(VAR, q, uint, u, 16, 8); \
751*f3782652STreehugger Robot MACRO(VAR, q, uint, u, 32, 4); \
752*f3782652STreehugger Robot MACRO(VAR, q, uint, u, 64, 2)
753*f3782652STreehugger Robot
754*f3782652STreehugger Robot #define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR) \
755*f3782652STreehugger Robot TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \
756*f3782652STreehugger Robot TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
757*f3782652STreehugger Robot
758*f3782652STreehugger Robot #define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR) \
759*f3782652STreehugger Robot TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \
760*f3782652STreehugger Robot TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
761*f3782652STreehugger Robot
762*f3782652STreehugger Robot #define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR) \
763*f3782652STreehugger Robot TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR); \
764*f3782652STreehugger Robot TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)
765*f3782652STreehugger Robot
766*f3782652STreehugger Robot #define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR) \
767*f3782652STreehugger Robot TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \
768*f3782652STreehugger Robot TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)
769*f3782652STreehugger Robot
770*f3782652STreehugger Robot /* Helpers for macros with 2 constant and 5 variable arguments */
771*f3782652STreehugger Robot #define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
772*f3782652STreehugger Robot MACRO(VAR1, VAR2, , int, s, 8, 8); \
773*f3782652STreehugger Robot MACRO(VAR1, VAR2, , int, s, 16, 4); \
774*f3782652STreehugger Robot MACRO(VAR1, VAR2, , int, s, 32, 2); \
775*f3782652STreehugger Robot MACRO(VAR1, VAR2 , , int, s, 64, 1)
776*f3782652STreehugger Robot
777*f3782652STreehugger Robot #define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
778*f3782652STreehugger Robot MACRO(VAR1, VAR2, , uint, u, 8, 8); \
779*f3782652STreehugger Robot MACRO(VAR1, VAR2, , uint, u, 16, 4); \
780*f3782652STreehugger Robot MACRO(VAR1, VAR2, , uint, u, 32, 2); \
781*f3782652STreehugger Robot MACRO(VAR1, VAR2, , uint, u, 64, 1)
782*f3782652STreehugger Robot
783*f3782652STreehugger Robot #define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
784*f3782652STreehugger Robot MACRO(VAR1, VAR2, q, int, s, 8, 16); \
785*f3782652STreehugger Robot MACRO(VAR1, VAR2, q, int, s, 16, 8); \
786*f3782652STreehugger Robot MACRO(VAR1, VAR2, q, int, s, 32, 4); \
787*f3782652STreehugger Robot MACRO(VAR1, VAR2, q, int, s, 64, 2)
788*f3782652STreehugger Robot
789*f3782652STreehugger Robot #define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
790*f3782652STreehugger Robot MACRO(VAR1, VAR2, q, uint, u, 8, 16); \
791*f3782652STreehugger Robot MACRO(VAR1, VAR2, q, uint, u, 16, 8); \
792*f3782652STreehugger Robot MACRO(VAR1, VAR2, q, uint, u, 32, 4); \
793*f3782652STreehugger Robot MACRO(VAR1, VAR2, q, uint, u, 64, 2)
794*f3782652STreehugger Robot
795*f3782652STreehugger Robot #define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \
796*f3782652STreehugger Robot TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
797*f3782652STreehugger Robot TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
798*f3782652STreehugger Robot MACRO(VAR1, VAR2, , poly, p, 8, 8); \
799*f3782652STreehugger Robot MACRO(VAR1, VAR2, , poly, p, 16, 4)
800*f3782652STreehugger Robot
801*f3782652STreehugger Robot #define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \
802*f3782652STreehugger Robot TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
803*f3782652STreehugger Robot TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
804*f3782652STreehugger Robot MACRO(VAR1, VAR2, q, poly, p, 8, 16); \
805*f3782652STreehugger Robot MACRO(VAR1, VAR2, q, poly, p, 16, 8)
806*f3782652STreehugger Robot
807*f3782652STreehugger Robot #define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \
808*f3782652STreehugger Robot TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2); \
809*f3782652STreehugger Robot TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)
810*f3782652STreehugger Robot
811*f3782652STreehugger Robot #define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
812*f3782652STreehugger Robot TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
813*f3782652STreehugger Robot TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
814*f3782652STreehugger Robot
815*f3782652STreehugger Robot #endif /* _STM_ARM_NEON_REF_H_ */
816