xref: /aosp_15_r20/external/arm-neon-tests/stm-arm-neon-ref.h (revision f37826520a923688f9e110915f3811e385d8b6d1)
1*f3782652STreehugger Robot /*
2*f3782652STreehugger Robot 
3*f3782652STreehugger Robot Copyright (c) 2009, 2010, 2011, 2012, 2013 STMicroelectronics
4*f3782652STreehugger Robot Written by Christophe Lyon
5*f3782652STreehugger Robot 
6*f3782652STreehugger Robot Permission is hereby granted, free of charge, to any person obtaining a copy
7*f3782652STreehugger Robot of this software and associated documentation files (the "Software"), to deal
8*f3782652STreehugger Robot in the Software without restriction, including without limitation the rights
9*f3782652STreehugger Robot to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10*f3782652STreehugger Robot copies of the Software, and to permit persons to whom the Software is
11*f3782652STreehugger Robot furnished to do so, subject to the following conditions:
12*f3782652STreehugger Robot 
13*f3782652STreehugger Robot The above copyright notice and this permission notice shall be included in
14*f3782652STreehugger Robot all copies or substantial portions of the Software.
15*f3782652STreehugger Robot 
16*f3782652STreehugger Robot THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*f3782652STreehugger Robot IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*f3782652STreehugger Robot FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*f3782652STreehugger Robot AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*f3782652STreehugger Robot LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*f3782652STreehugger Robot OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22*f3782652STreehugger Robot THE SOFTWARE.
23*f3782652STreehugger Robot 
24*f3782652STreehugger Robot */
25*f3782652STreehugger Robot 
26*f3782652STreehugger Robot #ifndef _STM_ARM_NEON_REF_H_
27*f3782652STreehugger Robot #define _STM_ARM_NEON_REF_H_
28*f3782652STreehugger Robot 
29*f3782652STreehugger Robot #if defined(__cplusplus)
30*f3782652STreehugger Robot #include <cstdio>
31*f3782652STreehugger Robot #include <cinttypes>
32*f3782652STreehugger Robot #include <cstring>
33*f3782652STreehugger Robot #else
34*f3782652STreehugger Robot #include <stdio.h>
35*f3782652STreehugger Robot #if defined(_MSC_VER)
36*f3782652STreehugger Robot #include "msinttypes.h"
37*f3782652STreehugger Robot #include <float.h> /* for isnan() ... */
38*f3782652STreehugger Robot static int32_t _ptrNan[]={0x7fc00000L};
39*f3782652STreehugger Robot #define NAN (*(float*)_ptrNan)
40*f3782652STreehugger Robot static int32_t _ptrInf[]={0x7f800000L};
41*f3782652STreehugger Robot #define INFINITY (*(float*)_ptrInf)
42*f3782652STreehugger Robot #define HUGE_VALF INFINITY
43*f3782652STreehugger Robot #else
44*f3782652STreehugger Robot #include <inttypes.h>
45*f3782652STreehugger Robot #endif
46*f3782652STreehugger Robot #include <string.h>
47*f3782652STreehugger Robot #endif
48*f3782652STreehugger Robot 
49*f3782652STreehugger Robot #define xSTR(X) #X
50*f3782652STreehugger Robot #define STR(X) xSTR(X)
51*f3782652STreehugger Robot 
52*f3782652STreehugger Robot #define xNAME1(V,T) V ## _ ##  T
53*f3782652STreehugger Robot #define xNAME(V,T) xNAME1(V,T)
54*f3782652STreehugger Robot 
55*f3782652STreehugger Robot #define VAR(V,T,W) xNAME(V,T##W)
56*f3782652STreehugger Robot #define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W)
57*f3782652STreehugger Robot 
58*f3782652STreehugger Robot #define VECT_NAME(T, W, N) T##W##x##N
59*f3782652STreehugger Robot #define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L
60*f3782652STreehugger Robot #define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t)
61*f3782652STreehugger Robot #define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t)
62*f3782652STreehugger Robot 
63*f3782652STreehugger Robot #define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N))
64*f3782652STreehugger Robot #define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N)
65*f3782652STreehugger Robot 
66*f3782652STreehugger Robot /* This one is used for padding between input buffers.  */
67*f3782652STreehugger Robot #define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42;
68*f3782652STreehugger Robot 
69*f3782652STreehugger Robot /* Array declarations.  */
70*f3782652STreehugger Robot #define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N]
71*f3782652STreehugger Robot #define ARRAY4(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[4]
72*f3782652STreehugger Robot 
73*f3782652STreehugger Robot /* Arrays of vectors.  */
74*f3782652STreehugger Robot #define VECT_ARRAY_VAR(V,T,W,N,L) xNAME(V,VECT_ARRAY_NAME(T,W,N,L))
75*f3782652STreehugger Robot #define VECT_ARRAY(V, T, W, N, L) T##W##_t VECT_ARRAY_VAR(V,T,W,N,L)[N*L]
76*f3782652STreehugger Robot 
77*f3782652STreehugger Robot static int result_idx = 0;
78*f3782652STreehugger Robot #define DUMP(MSG,T,W,N,FMT)						\
79*f3782652STreehugger Robot   fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++,		\
80*f3782652STreehugger Robot 	  STR(VECT_VAR(result, T, W, N)));				\
81*f3782652STreehugger Robot   for(i=0; i<N ; i++)							\
82*f3782652STreehugger Robot     {									\
83*f3782652STreehugger Robot       fprintf(ref_file, "%" FMT ", ", VECT_VAR(result, T, W, N)[i]);	\
84*f3782652STreehugger Robot     }									\
85*f3782652STreehugger Robot   fprintf(ref_file, " }\n");						\
86*f3782652STreehugger Robot   DUMP4GCC(MSG,T,W,N,FMT);
87*f3782652STreehugger Robot 
88*f3782652STreehugger Robot /* Use casts for remove sign bits */
89*f3782652STreehugger Robot #define DUMP_POLY(MSG,T,W,N,FMT)					\
90*f3782652STreehugger Robot   fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++,		\
91*f3782652STreehugger Robot 	  STR(VECT_VAR(result, T, W, N)));				\
92*f3782652STreehugger Robot   for(i=0; i<N ; i++)							\
93*f3782652STreehugger Robot     {									\
94*f3782652STreehugger Robot       fprintf(ref_file, "%" FMT ", ",					\
95*f3782652STreehugger Robot 	      (uint##W##_t)VECT_VAR(result, T, W, N)[i]);		\
96*f3782652STreehugger Robot     }									\
97*f3782652STreehugger Robot   fprintf(ref_file, " }\n");						\
98*f3782652STreehugger Robot   DUMP4GCC(MSG,T,W,N,FMT);
99*f3782652STreehugger Robot 
100*f3782652STreehugger Robot #define DUMP_FP(MSG,T,W,N,FMT)						\
101*f3782652STreehugger Robot   fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++,		\
102*f3782652STreehugger Robot 	  STR(VECT_VAR(result, T, W, N)));				\
103*f3782652STreehugger Robot   for(i=0; i<N ; i++)							\
104*f3782652STreehugger Robot     {									\
105*f3782652STreehugger Robot       union fp_operand {						\
106*f3782652STreehugger Robot 	uint##W##_t i;							\
107*f3782652STreehugger Robot 	float##W##_t f;							\
108*f3782652STreehugger Robot       } tmp;								\
109*f3782652STreehugger Robot       tmp.f = VECT_VAR(result, T, W, N)[i];				\
110*f3782652STreehugger Robot       fprintf(ref_file, "%" FMT ", ", tmp.i);				\
111*f3782652STreehugger Robot     }									\
112*f3782652STreehugger Robot   fprintf(ref_file, " }\n");						\
113*f3782652STreehugger Robot   DUMP4GCC_FP(MSG,T,W,N,FMT);
114*f3782652STreehugger Robot 
115*f3782652STreehugger Robot #define DUMP4GCC(MSG,T,W,N,FMT)						\
116*f3782652STreehugger Robot   fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ",	\
117*f3782652STreehugger Robot 	  STR(T), W, N);						\
118*f3782652STreehugger Robot   for(i=0; i<(N-1) ; i++)						\
119*f3782652STreehugger Robot     {									\
120*f3782652STreehugger Robot       if (W < 32) {							\
121*f3782652STreehugger Robot 	uint32_t tmp = (uint##W##_t) VECT_VAR(result, T, W, N)[i];	\
122*f3782652STreehugger Robot 	fprintf(gcc_tests_file, "0x%" FMT ", ", tmp);			\
123*f3782652STreehugger Robot       } else {								\
124*f3782652STreehugger Robot 	fprintf(gcc_tests_file, "0x%" FMT ", ", VECT_VAR(result, T, W, N)[i]); \
125*f3782652STreehugger Robot       }									\
126*f3782652STreehugger Robot     }									\
127*f3782652STreehugger Robot   if (W < 32) {								\
128*f3782652STreehugger Robot     uint32_t tmp = (uint##W##_t) VECT_VAR(result, T, W, N)[i];		\
129*f3782652STreehugger Robot     fprintf(gcc_tests_file, "0x%" FMT, tmp);				\
130*f3782652STreehugger Robot   } else {								\
131*f3782652STreehugger Robot     fprintf(gcc_tests_file, "0x%" FMT, VECT_VAR(result, T, W, N)[i]);	\
132*f3782652STreehugger Robot   }									\
133*f3782652STreehugger Robot   fprintf(gcc_tests_file, " };\n");
134*f3782652STreehugger Robot 
135*f3782652STreehugger Robot #define DUMP4GCC_FP(MSG,T,W,N,FMT)					\
136*f3782652STreehugger Robot   {									\
137*f3782652STreehugger Robot     union fp_operand {							\
138*f3782652STreehugger Robot       uint##W##_t i;							\
139*f3782652STreehugger Robot       float##W##_t f;							\
140*f3782652STreehugger Robot     } tmp;								\
141*f3782652STreehugger Robot     fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ",	\
142*f3782652STreehugger Robot 	    "hfloat", W, N);						\
143*f3782652STreehugger Robot     for(i=0; i<(N-1) ; i++)						\
144*f3782652STreehugger Robot       {									\
145*f3782652STreehugger Robot 	tmp.f = VECT_VAR(result, T, W, N)[i];				\
146*f3782652STreehugger Robot 	fprintf(gcc_tests_file, "0x%" FMT ", ", tmp.i);			\
147*f3782652STreehugger Robot       }									\
148*f3782652STreehugger Robot     tmp.f = VECT_VAR(result, T, W, N)[i];				\
149*f3782652STreehugger Robot     fprintf(gcc_tests_file, "0x%" FMT, tmp.i);				\
150*f3782652STreehugger Robot     fprintf(gcc_tests_file, " };\n");					\
151*f3782652STreehugger Robot   }
152*f3782652STreehugger Robot 
153*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
154*f3782652STreehugger Robot #define float16_t __fp16
155*f3782652STreehugger Robot 
156*f3782652STreehugger Robot #define DUMP_FP16(MSG,T,W,N,FMT)					\
157*f3782652STreehugger Robot   fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++,		\
158*f3782652STreehugger Robot 	  STR(VECT_VAR(result, T, W, N)));				\
159*f3782652STreehugger Robot   for(i=0; i<N ; i++)							\
160*f3782652STreehugger Robot     {									\
161*f3782652STreehugger Robot       uint##W##_t tmp;							\
162*f3782652STreehugger Robot 	tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i];		\
163*f3782652STreehugger Robot 	fprintf(ref_file, "%" FMT ", ", tmp);				\
164*f3782652STreehugger Robot     }									\
165*f3782652STreehugger Robot   fprintf(ref_file, " }\n");						\
166*f3782652STreehugger Robot   DUMP4GCC_FP16(MSG,T,W,N,FMT);
167*f3782652STreehugger Robot 
168*f3782652STreehugger Robot #define DUMP4GCC_FP16(MSG,T,W,N,FMT)					\
169*f3782652STreehugger Robot   {									\
170*f3782652STreehugger Robot     uint##W##_t tmp;							\
171*f3782652STreehugger Robot     fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \
172*f3782652STreehugger Robot 	    "hfloat", W, N);						\
173*f3782652STreehugger Robot     for(i=0; i<(N-1) ; i++)						\
174*f3782652STreehugger Robot       {									\
175*f3782652STreehugger Robot 	tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i];		\
176*f3782652STreehugger Robot 	fprintf(gcc_tests_file, "0x%" FMT ", ", tmp);			\
177*f3782652STreehugger Robot       }									\
178*f3782652STreehugger Robot     tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i];			\
179*f3782652STreehugger Robot     fprintf(gcc_tests_file, "0x%" FMT, tmp);				\
180*f3782652STreehugger Robot     fprintf(gcc_tests_file, " };\n");					\
181*f3782652STreehugger Robot   }
182*f3782652STreehugger Robot #endif
183*f3782652STreehugger Robot 
184*f3782652STreehugger Robot #define CLEAN_PATTERN_8  0x33
185*f3782652STreehugger Robot #define CLEAN_PATTERN_16 0x3333
186*f3782652STreehugger Robot #define CLEAN_PATTERN_32 0x33333333
187*f3782652STreehugger Robot #define CLEAN_PATTERN_64 0x3333333333333333
188*f3782652STreehugger Robot 
189*f3782652STreehugger Robot #define CLEAN(VAR,T,W,N)						\
190*f3782652STreehugger Robot   memset(VECT_VAR(VAR, T, W, N),					\
191*f3782652STreehugger Robot 	 CLEAN_PATTERN_8,						\
192*f3782652STreehugger Robot 	 sizeof(VECT_VAR(VAR, T, W, N)));
193*f3782652STreehugger Robot 
194*f3782652STreehugger Robot #define CHECK_INIT(VAR,Q,T1,T2,W,N)					\
195*f3782652STreehugger Robot   {									\
196*f3782652STreehugger Robot     ARRAY(check_result, T1, W, N);					\
197*f3782652STreehugger Robot     int i;								\
198*f3782652STreehugger Robot 									\
199*f3782652STreehugger Robot     vst1##Q##_##T2##W(VECT_VAR(check_result, T1, W, N),			\
200*f3782652STreehugger Robot 		      VECT_VAR(VAR, T1, W, N));				\
201*f3782652STreehugger Robot     for(i=0; i<N ; i++)							\
202*f3782652STreehugger Robot       {									\
203*f3782652STreehugger Robot 	/*if (VECT_VAR(check_result, T1, W, N)[i] == CLEAN_PATTERN_##W)*/ { \
204*f3782652STreehugger Robot 	  fprintf(stdout, "%s:%d: %s[%d] unintialized! %#x\n",		\
205*f3782652STreehugger Robot 		  __FUNCTION__,	__LINE__,				\
206*f3782652STreehugger Robot 		  STR(VECT_VAR(VAR, T1, W, N)), i,			\
207*f3782652STreehugger Robot 		  VECT_VAR(check_result, T1, W, N)[i]);			\
208*f3782652STreehugger Robot 	}								\
209*f3782652STreehugger Robot       }									\
210*f3782652STreehugger Robot   }
211*f3782652STreehugger Robot 
212*f3782652STreehugger Robot /* Generic declarations: */
213*f3782652STreehugger Robot extern FILE* log_file;
214*f3782652STreehugger Robot extern FILE* ref_file;
215*f3782652STreehugger Robot extern FILE* gcc_tests_file;
216*f3782652STreehugger Robot 
217*f3782652STreehugger Robot /* Input buffers, one of each size */
218*f3782652STreehugger Robot extern ARRAY(buffer, int, 8, 8);
219*f3782652STreehugger Robot extern ARRAY(buffer, int, 16, 4);
220*f3782652STreehugger Robot extern ARRAY(buffer, int, 32, 2);
221*f3782652STreehugger Robot extern ARRAY(buffer, int, 64, 1);
222*f3782652STreehugger Robot extern ARRAY(buffer, uint, 8, 8);
223*f3782652STreehugger Robot extern ARRAY(buffer, uint, 16, 4);
224*f3782652STreehugger Robot extern ARRAY(buffer, uint, 32, 2);
225*f3782652STreehugger Robot extern ARRAY(buffer, uint, 64, 1);
226*f3782652STreehugger Robot extern ARRAY(buffer, poly, 8, 8);
227*f3782652STreehugger Robot extern ARRAY(buffer, poly, 16, 4);
228*f3782652STreehugger Robot extern ARRAY(buffer, float, 32, 2);
229*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
230*f3782652STreehugger Robot extern ARRAY(buffer, float, 16, 4);
231*f3782652STreehugger Robot #endif
232*f3782652STreehugger Robot extern ARRAY(buffer, int, 8, 16);
233*f3782652STreehugger Robot extern ARRAY(buffer, int, 16, 8);
234*f3782652STreehugger Robot extern ARRAY(buffer, int, 32, 4);
235*f3782652STreehugger Robot extern ARRAY(buffer, int, 64, 2);
236*f3782652STreehugger Robot extern ARRAY(buffer, uint, 8, 16);
237*f3782652STreehugger Robot extern ARRAY(buffer, uint, 16, 8);
238*f3782652STreehugger Robot extern ARRAY(buffer, uint, 32, 4);
239*f3782652STreehugger Robot extern ARRAY(buffer, uint, 64, 2);
240*f3782652STreehugger Robot extern ARRAY(buffer, poly, 8, 16);
241*f3782652STreehugger Robot extern ARRAY(buffer, poly, 16, 8);
242*f3782652STreehugger Robot extern ARRAY(buffer, float, 32, 4);
243*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
244*f3782652STreehugger Robot extern ARRAY(buffer, float, 16, 8);
245*f3782652STreehugger Robot #endif
246*f3782652STreehugger Robot 
247*f3782652STreehugger Robot /* The tests for vld1_dup and vdup expect at least 4 entries in the
248*f3782652STreehugger Robot    input buffer, so force 1- and 2-elements initializers to have 4
249*f3782652STreehugger Robot    entries.  */
250*f3782652STreehugger Robot extern ARRAY(buffer_dup, int, 8, 8);
251*f3782652STreehugger Robot extern ARRAY(buffer_dup, int, 16, 4);
252*f3782652STreehugger Robot extern ARRAY4(buffer_dup, int, 32, 2);
253*f3782652STreehugger Robot extern ARRAY4(buffer_dup, int, 64, 1);
254*f3782652STreehugger Robot extern ARRAY(buffer_dup, uint, 8, 8);
255*f3782652STreehugger Robot extern ARRAY(buffer_dup, uint, 16, 4);
256*f3782652STreehugger Robot extern ARRAY4(buffer_dup, uint, 32, 2);
257*f3782652STreehugger Robot extern ARRAY4(buffer_dup, uint, 64, 1);
258*f3782652STreehugger Robot extern ARRAY(buffer_dup, poly, 8, 8);
259*f3782652STreehugger Robot extern ARRAY(buffer_dup, poly, 16, 4);
260*f3782652STreehugger Robot extern ARRAY4(buffer_dup, float, 32, 2);
261*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
262*f3782652STreehugger Robot extern ARRAY4(buffer_dup, float, 16, 4);
263*f3782652STreehugger Robot #endif
264*f3782652STreehugger Robot extern ARRAY(buffer_dup, int, 8, 16);
265*f3782652STreehugger Robot extern ARRAY(buffer_dup, int, 16, 8);
266*f3782652STreehugger Robot extern ARRAY(buffer_dup, int, 32, 4);
267*f3782652STreehugger Robot extern ARRAY4(buffer_dup, int, 64, 2);
268*f3782652STreehugger Robot extern ARRAY(buffer_dup, uint, 8, 16);
269*f3782652STreehugger Robot extern ARRAY(buffer_dup, uint, 16, 8);
270*f3782652STreehugger Robot extern ARRAY(buffer_dup, uint, 32, 4);
271*f3782652STreehugger Robot extern ARRAY4(buffer_dup, uint, 64, 2);
272*f3782652STreehugger Robot extern ARRAY(buffer_dup, poly, 8, 16);
273*f3782652STreehugger Robot extern ARRAY(buffer_dup, poly, 16, 8);
274*f3782652STreehugger Robot extern ARRAY(buffer_dup, float, 32, 4);
275*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
276*f3782652STreehugger Robot extern ARRAY(buffer_dup, float, 16, 8);
277*f3782652STreehugger Robot #endif
278*f3782652STreehugger Robot 
279*f3782652STreehugger Robot /* Input buffers for vld2, one of each size */
280*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, int, 8, 8, 2);
281*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, int, 16, 4, 2);
282*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, int, 32, 2, 2);
283*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, int, 64, 1, 2);
284*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, uint, 8, 8, 2);
285*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, uint, 16, 4, 2);
286*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, uint, 32, 2, 2);
287*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, uint, 64, 1, 2);
288*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, poly, 8, 8, 2);
289*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, poly, 16, 4, 2);
290*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, float, 32, 2, 2);
291*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
292*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, float, 16, 4, 2);
293*f3782652STreehugger Robot #endif
294*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, int, 8, 16, 2);
295*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, int, 16, 8, 2);
296*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, int, 32, 4, 2);
297*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, int, 64, 2, 2);
298*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, uint, 8, 16, 2);
299*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, uint, 16, 8, 2);
300*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, uint, 32, 4, 2);
301*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, uint, 64, 2, 2);
302*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, poly, 8, 16, 2);
303*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, poly, 16, 8, 2);
304*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, float, 32, 4, 2);
305*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
306*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld2, float, 16, 8, 2);
307*f3782652STreehugger Robot #endif
308*f3782652STreehugger Robot 
309*f3782652STreehugger Robot /* Input buffers for vld3, one of each size */
310*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, int, 8, 8, 3);
311*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, int, 16, 4, 3);
312*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, int, 32, 2, 3);
313*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, int, 64, 1, 3);
314*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, uint, 8, 8, 3);
315*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, uint, 16, 4, 3);
316*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, uint, 32, 2, 3);
317*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, uint, 64, 1, 3);
318*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, poly, 8, 8, 3);
319*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, poly, 16, 4, 3);
320*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, float, 32, 2, 3);
321*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
322*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, float, 16, 4, 3);
323*f3782652STreehugger Robot #endif
324*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, int, 8, 16, 3);
325*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, int, 16, 8, 3);
326*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, int, 32, 4, 3);
327*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, int, 64, 2, 3);
328*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, uint, 8, 16, 3);
329*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, uint, 16, 8, 3);
330*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, uint, 32, 4, 3);
331*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, uint, 64, 2, 3);
332*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, poly, 8, 16, 3);
333*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, poly, 16, 8, 3);
334*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, float, 32, 4, 3);
335*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
336*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld3, float, 16, 8, 3);
337*f3782652STreehugger Robot #endif
338*f3782652STreehugger Robot 
339*f3782652STreehugger Robot /* Input buffers for vld4, one of each size */
340*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, int, 8, 8, 4);
341*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, int, 16, 4, 4);
342*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, int, 32, 2, 4);
343*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, int, 64, 1, 4);
344*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, uint, 8, 8, 4);
345*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, uint, 16, 4, 4);
346*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, uint, 32, 2, 4);
347*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, uint, 64, 1, 4);
348*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, poly, 8, 8, 4);
349*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, poly, 16, 4, 4);
350*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, float, 32, 2, 4);
351*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
352*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, float, 16, 4, 4);
353*f3782652STreehugger Robot #endif
354*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, int, 8, 16, 4);
355*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, int, 16, 8, 4);
356*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, int, 32, 4, 4);
357*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, int, 64, 2, 4);
358*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, uint, 8, 16, 4);
359*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, uint, 16, 8, 4);
360*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, uint, 32, 4, 4);
361*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, uint, 64, 2, 4);
362*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, poly, 8, 16, 4);
363*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, poly, 16, 8, 4);
364*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, float, 32, 4, 4);
365*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
366*f3782652STreehugger Robot extern VECT_ARRAY(buffer_vld4, float, 16, 8, 4);
367*f3782652STreehugger Robot #endif
368*f3782652STreehugger Robot 
369*f3782652STreehugger Robot /* Input buffers for vld2_lane */
370*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, int, 8, 2)[2];
371*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, int, 16, 2)[2];
372*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, int, 32, 2)[2];
373*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, int, 64, 2)[2];
374*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, uint, 8, 2)[2];
375*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, uint, 16, 2)[2];
376*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, uint, 32, 2)[2];
377*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, uint, 64, 2)[2];
378*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, poly, 8, 2)[2];
379*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, poly, 16, 2)[2];
380*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, float, 32, 2)[2];
381*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
382*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld2_lane, float, 16, 2)[2];
383*f3782652STreehugger Robot #endif
384*f3782652STreehugger Robot 
385*f3782652STreehugger Robot /* Input buffers for vld3_lane */
386*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, int, 8, 3)[3];
387*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, int, 16, 3)[3];
388*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, int, 32, 3)[3];
389*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, int, 64, 3)[3];
390*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, uint, 8, 3)[3];
391*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, uint, 16, 3)[3];
392*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, uint, 32, 3)[3];
393*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, uint, 64, 3)[3];
394*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, poly, 8, 3)[3];
395*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, poly, 16, 3)[3];
396*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, float, 32, 3)[3];
397*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
398*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld3_lane, float, 16, 3)[3];
399*f3782652STreehugger Robot #endif
400*f3782652STreehugger Robot 
401*f3782652STreehugger Robot /* Input buffers for vld4_lane */
402*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, int, 8, 4)[4];
403*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, int, 16, 4)[4];
404*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, int, 32, 4)[4];
405*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, int, 64, 4)[4];
406*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, uint, 8, 4)[4];
407*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, uint, 16, 4)[4];
408*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, uint, 32, 4)[4];
409*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, uint, 64, 4)[4];
410*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, poly, 8, 4)[4];
411*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, poly, 16, 4)[4];
412*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, float, 32, 4)[4];
413*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
414*f3782652STreehugger Robot extern VECT_VAR_DECL(buffer_vld4_lane, float, 16, 4)[4];
415*f3782652STreehugger Robot #endif
416*f3782652STreehugger Robot 
417*f3782652STreehugger Robot /* Output buffers, one of each size */
418*f3782652STreehugger Robot static ARRAY(result, int, 8, 8);
419*f3782652STreehugger Robot static ARRAY(result, int, 16, 4);
420*f3782652STreehugger Robot static ARRAY(result, int, 32, 2);
421*f3782652STreehugger Robot static ARRAY(result, int, 64, 1);
422*f3782652STreehugger Robot static ARRAY(result, uint, 8, 8);
423*f3782652STreehugger Robot static ARRAY(result, uint, 16, 4);
424*f3782652STreehugger Robot static ARRAY(result, uint, 32, 2);
425*f3782652STreehugger Robot static ARRAY(result, uint, 64, 1);
426*f3782652STreehugger Robot static ARRAY(result, poly, 8, 8);
427*f3782652STreehugger Robot static ARRAY(result, poly, 16, 4);
428*f3782652STreehugger Robot static ARRAY(result, float, 32, 2);
429*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
430*f3782652STreehugger Robot static ARRAY(result, float, 16, 4);
431*f3782652STreehugger Robot #endif
432*f3782652STreehugger Robot static ARRAY(result, int, 8, 16);
433*f3782652STreehugger Robot static ARRAY(result, int, 16, 8);
434*f3782652STreehugger Robot static ARRAY(result, int, 32, 4);
435*f3782652STreehugger Robot static ARRAY(result, int, 64, 2);
436*f3782652STreehugger Robot static ARRAY(result, uint, 8, 16);
437*f3782652STreehugger Robot static ARRAY(result, uint, 16, 8);
438*f3782652STreehugger Robot static ARRAY(result, uint, 32, 4);
439*f3782652STreehugger Robot static ARRAY(result, uint, 64, 2);
440*f3782652STreehugger Robot static ARRAY(result, poly, 8, 16);
441*f3782652STreehugger Robot static ARRAY(result, poly, 16, 8);
442*f3782652STreehugger Robot static ARRAY(result, float, 32, 4);
443*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
444*f3782652STreehugger Robot static ARRAY(result, float, 16, 8);
445*f3782652STreehugger Robot #endif
446*f3782652STreehugger Robot 
447*f3782652STreehugger Robot /* Dump results (generic function) */
dump_results(char * test_name)448*f3782652STreehugger Robot static void dump_results (char *test_name)
449*f3782652STreehugger Robot {
450*f3782652STreehugger Robot   int i;
451*f3782652STreehugger Robot 
452*f3782652STreehugger Robot   fprintf(ref_file, "\n%s output:\n", test_name);
453*f3782652STreehugger Robot   fprintf(gcc_tests_file, "\n%s output:\n", test_name);
454*f3782652STreehugger Robot 
455*f3782652STreehugger Robot   DUMP(test_name, int, 8, 8, PRId8);
456*f3782652STreehugger Robot   DUMP(test_name, int, 16, 4, PRId16);
457*f3782652STreehugger Robot   DUMP(test_name, int, 32, 2, PRId32);
458*f3782652STreehugger Robot   DUMP(test_name, int, 64, 1, PRId64);
459*f3782652STreehugger Robot   DUMP(test_name, uint, 8, 8, PRIu8);
460*f3782652STreehugger Robot   DUMP(test_name, uint, 16, 4, PRIu16);
461*f3782652STreehugger Robot   DUMP(test_name, uint, 32, 2, PRIu32);
462*f3782652STreehugger Robot   DUMP(test_name, uint, 64, 1, PRIu64);
463*f3782652STreehugger Robot   DUMP_POLY(test_name, poly, 8, 8, PRIu8);
464*f3782652STreehugger Robot   DUMP_POLY(test_name, poly, 16, 4, PRIu16);
465*f3782652STreehugger Robot   DUMP_FP(test_name, float, 32, 2, PRIx32);
466*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
467*f3782652STreehugger Robot   DUMP_FP16(test_name, float, 16, 4, PRIu16);
468*f3782652STreehugger Robot #endif
469*f3782652STreehugger Robot 
470*f3782652STreehugger Robot   DUMP(test_name, int, 8, 16, PRId8);
471*f3782652STreehugger Robot   DUMP(test_name, int, 16, 8, PRId16);
472*f3782652STreehugger Robot   DUMP(test_name, int, 32, 4, PRId32);
473*f3782652STreehugger Robot   DUMP(test_name, int, 64, 2, PRId64);
474*f3782652STreehugger Robot   DUMP(test_name, uint, 8, 16, PRIu8);
475*f3782652STreehugger Robot   DUMP(test_name, uint, 16, 8, PRIu16);
476*f3782652STreehugger Robot   DUMP(test_name, uint, 32, 4, PRIu32);
477*f3782652STreehugger Robot   DUMP(test_name, uint, 64, 2, PRIu64);
478*f3782652STreehugger Robot   DUMP_POLY(test_name, poly, 8, 16, PRIu8);
479*f3782652STreehugger Robot   DUMP_POLY(test_name, poly, 16, 8, PRIu16);
480*f3782652STreehugger Robot   DUMP_FP(test_name, float, 32, 4, PRIx32);
481*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
482*f3782652STreehugger Robot   DUMP_FP16(test_name, float, 16, 8, PRIu16);
483*f3782652STreehugger Robot #endif
484*f3782652STreehugger Robot }
485*f3782652STreehugger Robot 
486*f3782652STreehugger Robot /* Dump results in hex (generic function) */
dump_results_hex2(const char * test_name,const char * comment)487*f3782652STreehugger Robot static void dump_results_hex2 (const char *test_name, const char* comment)
488*f3782652STreehugger Robot {
489*f3782652STreehugger Robot   int i;
490*f3782652STreehugger Robot 
491*f3782652STreehugger Robot   fprintf(ref_file, "\n%s%s output:\n", test_name, comment);
492*f3782652STreehugger Robot   fprintf(gcc_tests_file, "\n%s%s output:\n", test_name, comment);
493*f3782652STreehugger Robot 
494*f3782652STreehugger Robot   DUMP(test_name, int, 8, 8, PRIx8);
495*f3782652STreehugger Robot   DUMP(test_name, int, 16, 4, PRIx16);
496*f3782652STreehugger Robot   DUMP(test_name, int, 32, 2, PRIx32);
497*f3782652STreehugger Robot   DUMP(test_name, int, 64, 1, PRIx64);
498*f3782652STreehugger Robot   DUMP(test_name, uint, 8, 8, PRIx8);
499*f3782652STreehugger Robot   DUMP(test_name, uint, 16, 4, PRIx16);
500*f3782652STreehugger Robot   DUMP(test_name, uint, 32, 2, PRIx32);
501*f3782652STreehugger Robot   DUMP(test_name, uint, 64, 1, PRIx64);
502*f3782652STreehugger Robot   DUMP_POLY(test_name, poly, 8, 8, PRIx8);
503*f3782652STreehugger Robot   DUMP_POLY(test_name, poly, 16, 4, PRIx16);
504*f3782652STreehugger Robot   DUMP_FP(test_name, float, 32, 2, PRIx32);
505*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
506*f3782652STreehugger Robot   DUMP_FP16(test_name, float, 16, 4, PRIx16);
507*f3782652STreehugger Robot #endif
508*f3782652STreehugger Robot 
509*f3782652STreehugger Robot   DUMP(test_name, int, 8, 16, PRIx8);
510*f3782652STreehugger Robot   DUMP(test_name, int, 16, 8, PRIx16);
511*f3782652STreehugger Robot   DUMP(test_name, int, 32, 4, PRIx32);
512*f3782652STreehugger Robot   DUMP(test_name, int, 64, 2, PRIx64);
513*f3782652STreehugger Robot   DUMP(test_name, uint, 8, 16, PRIx8);
514*f3782652STreehugger Robot   DUMP(test_name, uint, 16, 8, PRIx16);
515*f3782652STreehugger Robot   DUMP(test_name, uint, 32, 4, PRIx32);
516*f3782652STreehugger Robot   DUMP(test_name, uint, 64, 2, PRIx64);
517*f3782652STreehugger Robot   DUMP_POLY(test_name, poly, 8, 16, PRIx8);
518*f3782652STreehugger Robot   DUMP_POLY(test_name, poly, 16, 8, PRIx16);
519*f3782652STreehugger Robot   DUMP_FP(test_name, float, 32, 4, PRIx32);
520*f3782652STreehugger Robot #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
521*f3782652STreehugger Robot   DUMP_FP16(test_name, float, 16, 8, PRIx16);
522*f3782652STreehugger Robot #endif
523*f3782652STreehugger Robot }
524*f3782652STreehugger Robot 
dump_results_hex(const char * test_name)525*f3782652STreehugger Robot static void dump_results_hex (const char *test_name)
526*f3782652STreehugger Robot {
527*f3782652STreehugger Robot   dump_results_hex2(test_name, "");
528*f3782652STreehugger Robot }
529*f3782652STreehugger Robot 
530*f3782652STreehugger Robot #ifndef STM_ARM_NEON_MODELS
531*f3782652STreehugger Robot 
532*f3782652STreehugger Robot /* This hack is to cope with various compilers/libc which may not
533*f3782652STreehugger Robot    provide endian.h or cross-compilers such as llvm which includes the
534*f3782652STreehugger Robot    host's endian.h.  */
535*f3782652STreehugger Robot #ifndef __arm__
536*f3782652STreehugger Robot #include <endian.h>
537*f3782652STreehugger Robot #define THIS_ENDIAN __BYTE_ORDER
538*f3782652STreehugger Robot #else /* __arm__ */
539*f3782652STreehugger Robot #ifdef __ARMEL__
540*f3782652STreehugger Robot #define THIS_ENDIAN __LITTLE_ENDIAN
541*f3782652STreehugger Robot #else /* __ARMEL__ */
542*f3782652STreehugger Robot #define THIS_ENDIAN __BIG_ENDIAN
543*f3782652STreehugger Robot #endif
544*f3782652STreehugger Robot #endif /* __arm__ */
545*f3782652STreehugger Robot 
546*f3782652STreehugger Robot #if THIS_ENDIAN == __LITTLE_ENDIAN
547*f3782652STreehugger Robot 
548*f3782652STreehugger Robot typedef union {
549*f3782652STreehugger Robot   struct {
550*f3782652STreehugger Robot     int _xxx:27;
551*f3782652STreehugger Robot     unsigned int QC:1;
552*f3782652STreehugger Robot     int V:1;
553*f3782652STreehugger Robot     int C:1;
554*f3782652STreehugger Robot     int Z:1;
555*f3782652STreehugger Robot     int N:1;
556*f3782652STreehugger Robot   } b;
557*f3782652STreehugger Robot   unsigned int word;
558*f3782652STreehugger Robot } _ARM_FPSCR;
559*f3782652STreehugger Robot 
560*f3782652STreehugger Robot #else /* __BIG_ENDIAN */
561*f3782652STreehugger Robot 
562*f3782652STreehugger Robot typedef union {
563*f3782652STreehugger Robot   struct {
564*f3782652STreehugger Robot     int N:1;
565*f3782652STreehugger Robot     int Z:1;
566*f3782652STreehugger Robot     int C:1;
567*f3782652STreehugger Robot     int V:1;
568*f3782652STreehugger Robot     unsigned int QC:1;
569*f3782652STreehugger Robot     int _dnm:27;
570*f3782652STreehugger Robot   } b;
571*f3782652STreehugger Robot   unsigned int word;
572*f3782652STreehugger Robot } _ARM_FPSCR;
573*f3782652STreehugger Robot 
574*f3782652STreehugger Robot #endif /* __BIG_ENDIAN */
575*f3782652STreehugger Robot 
576*f3782652STreehugger Robot #ifdef __ARMCC_VERSION
577*f3782652STreehugger Robot register _ARM_FPSCR _afpscr_for_qc __asm("fpscr");
578*f3782652STreehugger Robot # define Neon_Cumulative_Sat _afpscr_for_qc.b.QC
579*f3782652STreehugger Robot # define Set_Neon_Cumulative_Sat(x, depend)  {Neon_Cumulative_Sat = (x);}
580*f3782652STreehugger Robot #else
581*f3782652STreehugger Robot /* GCC/ARM does not know this register */
582*f3782652STreehugger Robot # define Neon_Cumulative_Sat  __read_neon_cumulative_sat()
583*f3782652STreehugger Robot /* We need a fake dependency to ensure correct ordering of asm
584*f3782652STreehugger Robot    statements to preset the QC flag value, and Neon operators writing
585*f3782652STreehugger Robot    to QC. */
586*f3782652STreehugger Robot #define Set_Neon_Cumulative_Sat(x, depend)	\
587*f3782652STreehugger Robot   __set_neon_cumulative_sat((x), (depend))
588*f3782652STreehugger Robot 
589*f3782652STreehugger Robot # if defined(__aarch64__)
__read_neon_cumulative_sat(void)590*f3782652STreehugger Robot static volatile int __read_neon_cumulative_sat (void) {
591*f3782652STreehugger Robot     _ARM_FPSCR _afpscr_for_qc;
592*f3782652STreehugger Robot     asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc));
593*f3782652STreehugger Robot     return _afpscr_for_qc.b.QC;
594*f3782652STreehugger Robot }
595*f3782652STreehugger Robot 
596*f3782652STreehugger Robot #define __set_neon_cumulative_sat(x, depend) {				\
597*f3782652STreehugger Robot     _ARM_FPSCR _afpscr_for_qc;						\
598*f3782652STreehugger Robot     asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc));		\
599*f3782652STreehugger Robot     _afpscr_for_qc.b.QC = x;						\
600*f3782652STreehugger Robot     asm volatile ("msr fpsr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \
601*f3782652STreehugger Robot   }
602*f3782652STreehugger Robot 
603*f3782652STreehugger Robot # else
__read_neon_cumulative_sat(void)604*f3782652STreehugger Robot static volatile int __read_neon_cumulative_sat (void) {
605*f3782652STreehugger Robot     _ARM_FPSCR _afpscr_for_qc;
606*f3782652STreehugger Robot     asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));
607*f3782652STreehugger Robot     return _afpscr_for_qc.b.QC;
608*f3782652STreehugger Robot }
609*f3782652STreehugger Robot 
610*f3782652STreehugger Robot #define __set_neon_cumulative_sat(x, depend) {				\
611*f3782652STreehugger Robot     _ARM_FPSCR _afpscr_for_qc;						\
612*f3782652STreehugger Robot     asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));		\
613*f3782652STreehugger Robot     _afpscr_for_qc.b.QC = x;						\
614*f3782652STreehugger Robot     asm volatile ("vmsr fpscr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \
615*f3782652STreehugger Robot   }
616*f3782652STreehugger Robot 
617*f3782652STreehugger Robot # endif
618*f3782652STreehugger Robot #endif
619*f3782652STreehugger Robot 
620*f3782652STreehugger Robot #endif /* STM_ARM_NEON_MODELS */
621*f3782652STreehugger Robot 
dump_neon_cumulative_sat(const char * msg,const char * name,const char * t1,int w,int n)622*f3782652STreehugger Robot static void dump_neon_cumulative_sat(const char* msg, const char *name,
623*f3782652STreehugger Robot 				     const char* t1, int w, int n)
624*f3782652STreehugger Robot {
625*f3782652STreehugger Robot   fprintf(ref_file, "%s:%d:%s Neon cumulative saturation %d\n", msg, result_idx++,
626*f3782652STreehugger Robot 	  name, Neon_Cumulative_Sat);
627*f3782652STreehugger Robot   fprintf(gcc_tests_file,
628*f3782652STreehugger Robot 	  "int VECT_VAR(expected_cumulative_sat,%s,%d,%d) = %d;\n",
629*f3782652STreehugger Robot 	  t1, w, n, Neon_Cumulative_Sat);
630*f3782652STreehugger Robot }
631*f3782652STreehugger Robot 
632*f3782652STreehugger Robot /* Clean output buffers before execution */
clean_results(void)633*f3782652STreehugger Robot static void clean_results (void)
634*f3782652STreehugger Robot {
635*f3782652STreehugger Robot   result_idx = 0;
636*f3782652STreehugger Robot   CLEAN(result, int, 8, 8);
637*f3782652STreehugger Robot   CLEAN(result, int, 16, 4);
638*f3782652STreehugger Robot   CLEAN(result, int, 32, 2);
639*f3782652STreehugger Robot   CLEAN(result, int, 64, 1);
640*f3782652STreehugger Robot   CLEAN(result, uint, 8, 8);
641*f3782652STreehugger Robot   CLEAN(result, uint, 16, 4);
642*f3782652STreehugger Robot   CLEAN(result, uint, 32, 2);
643*f3782652STreehugger Robot   CLEAN(result, uint, 64, 1);
644*f3782652STreehugger Robot   CLEAN(result, poly, 8, 8);
645*f3782652STreehugger Robot   CLEAN(result, poly, 16, 4);
646*f3782652STreehugger Robot   CLEAN(result, float, 32, 2);
647*f3782652STreehugger Robot 
648*f3782652STreehugger Robot   CLEAN(result, int, 8, 16);
649*f3782652STreehugger Robot   CLEAN(result, int, 16, 8);
650*f3782652STreehugger Robot   CLEAN(result, int, 32, 4);
651*f3782652STreehugger Robot   CLEAN(result, int, 64, 2);
652*f3782652STreehugger Robot   CLEAN(result, uint, 8, 16);
653*f3782652STreehugger Robot   CLEAN(result, uint, 16, 8);
654*f3782652STreehugger Robot   CLEAN(result, uint, 32, 4);
655*f3782652STreehugger Robot   CLEAN(result, uint, 64, 2);
656*f3782652STreehugger Robot   CLEAN(result, poly, 8, 16);
657*f3782652STreehugger Robot   CLEAN(result, poly, 16, 8);
658*f3782652STreehugger Robot   CLEAN(result, float, 32, 4);
659*f3782652STreehugger Robot }
660*f3782652STreehugger Robot 
661*f3782652STreehugger Robot 
662*f3782652STreehugger Robot /* Helpers to declare variables of various types  */
663*f3782652STreehugger Robot #define DECL_VARIABLE(VAR, T1, W, N)		\
664*f3782652STreehugger Robot   volatile VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
665*f3782652STreehugger Robot 
666*f3782652STreehugger Robot #define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR)	\
667*f3782652STreehugger Robot   DECL_VARIABLE(VAR, int, 8, 8);			\
668*f3782652STreehugger Robot   DECL_VARIABLE(VAR, int, 16, 4);			\
669*f3782652STreehugger Robot   DECL_VARIABLE(VAR, int, 32, 2);			\
670*f3782652STreehugger Robot   DECL_VARIABLE(VAR, int, 64, 1)
671*f3782652STreehugger Robot 
672*f3782652STreehugger Robot #define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR)	\
673*f3782652STreehugger Robot   DECL_VARIABLE(VAR, uint, 8, 8);			\
674*f3782652STreehugger Robot   DECL_VARIABLE(VAR, uint, 16, 4);			\
675*f3782652STreehugger Robot   DECL_VARIABLE(VAR, uint, 32, 2);			\
676*f3782652STreehugger Robot   DECL_VARIABLE(VAR, uint, 64, 1)
677*f3782652STreehugger Robot 
678*f3782652STreehugger Robot #define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)	\
679*f3782652STreehugger Robot   DECL_VARIABLE(VAR, int, 8, 16);			\
680*f3782652STreehugger Robot   DECL_VARIABLE(VAR, int, 16, 8);			\
681*f3782652STreehugger Robot   DECL_VARIABLE(VAR, int, 32, 4);			\
682*f3782652STreehugger Robot   DECL_VARIABLE(VAR, int, 64, 2)
683*f3782652STreehugger Robot 
684*f3782652STreehugger Robot #define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)	\
685*f3782652STreehugger Robot   DECL_VARIABLE(VAR, uint, 8, 16);			\
686*f3782652STreehugger Robot   DECL_VARIABLE(VAR, uint, 16, 8);			\
687*f3782652STreehugger Robot   DECL_VARIABLE(VAR, uint, 32, 4);			\
688*f3782652STreehugger Robot   DECL_VARIABLE(VAR, uint, 64, 2)
689*f3782652STreehugger Robot 
690*f3782652STreehugger Robot #define DECL_VARIABLE_64BITS_VARIANTS(VAR)	\
691*f3782652STreehugger Robot   DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);	\
692*f3782652STreehugger Robot   DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
693*f3782652STreehugger Robot   DECL_VARIABLE(VAR, poly, 8, 8);		\
694*f3782652STreehugger Robot   DECL_VARIABLE(VAR, poly, 16, 4);		\
695*f3782652STreehugger Robot   DECL_VARIABLE(VAR, float, 32, 2)
696*f3782652STreehugger Robot 
697*f3782652STreehugger Robot #define DECL_VARIABLE_128BITS_VARIANTS(VAR)	\
698*f3782652STreehugger Robot   DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR);	\
699*f3782652STreehugger Robot   DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);	\
700*f3782652STreehugger Robot   DECL_VARIABLE(VAR, poly, 8, 16);		\
701*f3782652STreehugger Robot   DECL_VARIABLE(VAR, poly, 16, 8);		\
702*f3782652STreehugger Robot   DECL_VARIABLE(VAR, float, 32, 4)
703*f3782652STreehugger Robot 
704*f3782652STreehugger Robot #define DECL_VARIABLE_ALL_VARIANTS(VAR)		\
705*f3782652STreehugger Robot   DECL_VARIABLE_64BITS_VARIANTS(VAR);		\
706*f3782652STreehugger Robot   DECL_VARIABLE_128BITS_VARIANTS(VAR)
707*f3782652STreehugger Robot 
708*f3782652STreehugger Robot #define DECL_VARIABLE_SIGNED_VARIANTS(VAR)	\
709*f3782652STreehugger Robot   DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);	\
710*f3782652STreehugger Robot   DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)
711*f3782652STreehugger Robot 
712*f3782652STreehugger Robot #define DECL_VARIABLE_UNSIGNED_VARIANTS(VAR)	\
713*f3782652STreehugger Robot   DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
714*f3782652STreehugger Robot   DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)
715*f3782652STreehugger Robot 
716*f3782652STreehugger Robot /* Helpers to initialize vectors */
717*f3782652STreehugger Robot #define VDUP(VAR, Q, T1, T2, W, N, V)		\
718*f3782652STreehugger Robot   VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
719*f3782652STreehugger Robot 
720*f3782652STreehugger Robot #define TEST_VSET_LANE(VAR, Q, T1, T2, W, N, L, V)			\
721*f3782652STreehugger Robot   VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V,			\
722*f3782652STreehugger Robot 						   VECT_VAR(VAR, T1, W, N), \
723*f3782652STreehugger Robot 						   L)
724*f3782652STreehugger Robot 
725*f3782652STreehugger Robot /* We need to load initial values first, so rely on VLD1 */
726*f3782652STreehugger Robot #define VLOAD(VAR, BUF, Q, T1, T2, W, N)				\
727*f3782652STreehugger Robot   VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N))
728*f3782652STreehugger Robot 
729*f3782652STreehugger Robot /* Helpers for macros with 1 constant and 5 variable arguments */
730*f3782652STreehugger Robot #define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
731*f3782652STreehugger Robot   MACRO(VAR, , int, s, 8, 8);					\
732*f3782652STreehugger Robot   MACRO(VAR, , int, s, 16, 4);					\
733*f3782652STreehugger Robot   MACRO(VAR, , int, s, 32, 2);					\
734*f3782652STreehugger Robot   MACRO(VAR, , int, s, 64, 1)
735*f3782652STreehugger Robot 
736*f3782652STreehugger Robot #define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)	\
737*f3782652STreehugger Robot   MACRO(VAR, , uint, u, 8, 8);					\
738*f3782652STreehugger Robot   MACRO(VAR, , uint, u, 16, 4);					\
739*f3782652STreehugger Robot   MACRO(VAR, , uint, u, 32, 2);					\
740*f3782652STreehugger Robot   MACRO(VAR, , uint, u, 64, 1)
741*f3782652STreehugger Robot 
742*f3782652STreehugger Robot #define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
743*f3782652STreehugger Robot   MACRO(VAR, q, int, s, 8, 16);					\
744*f3782652STreehugger Robot   MACRO(VAR, q, int, s, 16, 8);					\
745*f3782652STreehugger Robot   MACRO(VAR, q, int, s, 32, 4);					\
746*f3782652STreehugger Robot   MACRO(VAR, q, int, s, 64, 2)
747*f3782652STreehugger Robot 
748*f3782652STreehugger Robot #define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR)	\
749*f3782652STreehugger Robot   MACRO(VAR, q, uint, u, 8, 16);				\
750*f3782652STreehugger Robot   MACRO(VAR, q, uint, u, 16, 8);				\
751*f3782652STreehugger Robot   MACRO(VAR, q, uint, u, 32, 4);				\
752*f3782652STreehugger Robot   MACRO(VAR, q, uint, u, 64, 2)
753*f3782652STreehugger Robot 
754*f3782652STreehugger Robot #define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR)	\
755*f3782652STreehugger Robot   TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
756*f3782652STreehugger Robot   TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
757*f3782652STreehugger Robot 
758*f3782652STreehugger Robot #define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)	\
759*f3782652STreehugger Robot   TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
760*f3782652STreehugger Robot   TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
761*f3782652STreehugger Robot 
762*f3782652STreehugger Robot #define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR)	\
763*f3782652STreehugger Robot   TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR);	\
764*f3782652STreehugger Robot   TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)
765*f3782652STreehugger Robot 
766*f3782652STreehugger Robot #define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
767*f3782652STreehugger Robot   TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
768*f3782652STreehugger Robot   TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)
769*f3782652STreehugger Robot 
770*f3782652STreehugger Robot /* Helpers for macros with 2 constant and 5 variable arguments */
771*f3782652STreehugger Robot #define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
772*f3782652STreehugger Robot   MACRO(VAR1, VAR2, , int, s, 8, 8);					\
773*f3782652STreehugger Robot   MACRO(VAR1, VAR2, , int, s, 16, 4);					\
774*f3782652STreehugger Robot   MACRO(VAR1, VAR2, , int, s, 32, 2);					\
775*f3782652STreehugger Robot   MACRO(VAR1, VAR2 , , int, s, 64, 1)
776*f3782652STreehugger Robot 
777*f3782652STreehugger Robot #define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
778*f3782652STreehugger Robot   MACRO(VAR1, VAR2, , uint, u, 8, 8);					\
779*f3782652STreehugger Robot   MACRO(VAR1, VAR2, , uint, u, 16, 4);					\
780*f3782652STreehugger Robot   MACRO(VAR1, VAR2, , uint, u, 32, 2);					\
781*f3782652STreehugger Robot   MACRO(VAR1, VAR2, , uint, u, 64, 1)
782*f3782652STreehugger Robot 
783*f3782652STreehugger Robot #define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
784*f3782652STreehugger Robot   MACRO(VAR1, VAR2, q, int, s, 8, 16);					\
785*f3782652STreehugger Robot   MACRO(VAR1, VAR2, q, int, s, 16, 8);					\
786*f3782652STreehugger Robot   MACRO(VAR1, VAR2, q, int, s, 32, 4);					\
787*f3782652STreehugger Robot   MACRO(VAR1, VAR2, q, int, s, 64, 2)
788*f3782652STreehugger Robot 
789*f3782652STreehugger Robot #define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
790*f3782652STreehugger Robot   MACRO(VAR1, VAR2, q, uint, u, 8, 16);					\
791*f3782652STreehugger Robot   MACRO(VAR1, VAR2, q, uint, u, 16, 8);					\
792*f3782652STreehugger Robot   MACRO(VAR1, VAR2, q, uint, u, 32, 4);					\
793*f3782652STreehugger Robot   MACRO(VAR1, VAR2, q, uint, u, 64, 2)
794*f3782652STreehugger Robot 
795*f3782652STreehugger Robot #define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
796*f3782652STreehugger Robot   TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
797*f3782652STreehugger Robot   TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
798*f3782652STreehugger Robot   MACRO(VAR1, VAR2, , poly, p, 8, 8);				\
799*f3782652STreehugger Robot   MACRO(VAR1, VAR2, , poly, p, 16, 4)
800*f3782652STreehugger Robot 
801*f3782652STreehugger Robot #define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
802*f3782652STreehugger Robot   TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
803*f3782652STreehugger Robot   TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
804*f3782652STreehugger Robot   MACRO(VAR1, VAR2, q, poly, p, 8, 16);				\
805*f3782652STreehugger Robot   MACRO(VAR1, VAR2, q, poly, p, 16, 8)
806*f3782652STreehugger Robot 
807*f3782652STreehugger Robot #define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
808*f3782652STreehugger Robot   TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
809*f3782652STreehugger Robot   TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)
810*f3782652STreehugger Robot 
811*f3782652STreehugger Robot #define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
812*f3782652STreehugger Robot   TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
813*f3782652STreehugger Robot   TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
814*f3782652STreehugger Robot 
815*f3782652STreehugger Robot #endif /* _STM_ARM_NEON_REF_H_ */
816