xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/conversions/conversions_data_info.h (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2023 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef CONVERSIONS_DATA_INFO_H
17 #define CONVERSIONS_DATA_INFO_H
18 
19 #if defined(__APPLE__)
20 #include <OpenCL/opencl.h>
21 #else
22 #include <CL/opencl.h>
23 #endif
24 
25 #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
26 #include "fplib.h"
27 extern bool qcom_sat;
28 extern roundingMode qcom_rm;
29 #endif
30 
31 #include "harness/mt19937.h"
32 #include "harness/rounding_mode.h"
33 
34 #include <vector>
35 
36 #if defined(__linux__)
37 #include <sys/param.h>
38 #include <libgen.h>
39 #endif
40 
41 extern size_t gTypeSizes[kTypeCount];
42 extern void *gIn;
43 
44 
45 typedef enum
46 {
47     kUnsaturated = 0,
48     kSaturated,
49 
50     kSaturationModeCount
51 } SaturationMode;
52 
53 struct DataInitInfo
54 {
55     cl_ulong start;
56     cl_uint size;
57     Type outType;
58     Type inType;
59     SaturationMode sat;
60     RoundingMode round;
61     cl_uint threads;
62 
63     static std::vector<uint32_t> specialValuesUInt;
64     static std::vector<float> specialValuesFloat;
65     static std::vector<double> specialValuesDouble;
66 };
67 
68 struct DataInitBase : public DataInitInfo
69 {
70     virtual ~DataInitBase() = default;
71 
DataInitBaseDataInitBase72     explicit DataInitBase(const DataInitInfo &agg): DataInitInfo(agg) {}
conv_arrayDataInitBase73     virtual void conv_array(void *out, void *in, size_t n) {}
conv_array_satDataInitBase74     virtual void conv_array_sat(void *out, void *in, size_t n) {}
initDataInitBase75     virtual void init(const cl_uint &, const cl_uint &) {}
76 };
77 
78 template <typename InType, typename OutType>
79 struct DataInfoSpec : public DataInitBase
80 {
81     explicit DataInfoSpec(const DataInitInfo &agg);
82 
83     // helpers
84     float round_to_int(float f);
85     long long round_to_int_and_clamp(double d);
86 
87     OutType absolute(const OutType &x);
88 
89     // actual conversion of reference values
90     void conv(OutType *out, InType *in);
91     void conv_sat(OutType *out, InType *in);
92 
93     // min/max ranges for output type of data
94     std::pair<OutType, OutType> ranges;
95 
96     // matrix of clamping ranges for each rounding type
97     std::vector<std::pair<InType, InType>> clamp_ranges;
98 
99     std::vector<MTdataHolder> mdv;
100 
conv_arrayDataInfoSpec101     void conv_array(void *out, void *in, size_t n) override
102     {
103         for (size_t i = 0; i < n; i++)
104             conv(&((OutType *)out)[i], &((InType *)in)[i]);
105     }
106 
conv_array_satDataInfoSpec107     void conv_array_sat(void *out, void *in, size_t n) override
108     {
109         for (size_t i = 0; i < n; i++)
110             conv_sat(&((OutType *)out)[i], &((InType *)in)[i]);
111     }
112 
113     void init(const cl_uint &, const cl_uint &) override;
114     InType clamp(const InType &);
fclampDataInfoSpec115     inline float fclamp(float lo, float v, float hi)
116     {
117         v = v < lo ? lo : v;
118         return v < hi ? v : hi;
119     }
120 
dclampDataInfoSpec121     inline double dclamp(double lo, double v, double hi)
122     {
123         v = v < lo ? lo : v;
124         return v < hi ? v : hi;
125     }
126 };
127 
128 template <typename InType, typename OutType>
DataInfoSpec(const DataInitInfo & agg)129 DataInfoSpec<InType, OutType>::DataInfoSpec(const DataInitInfo &agg)
130     : DataInitBase(agg), mdv(0)
131 {
132     if (std::is_same<cl_float, OutType>::value)
133         ranges = std::make_pair(CL_FLT_MIN, CL_FLT_MAX);
134     else if (std::is_same<cl_double, OutType>::value)
135         ranges = std::make_pair(CL_DBL_MIN, CL_DBL_MAX);
136     else if (std::is_same<cl_uchar, OutType>::value)
137         ranges = std::make_pair(0, CL_UCHAR_MAX);
138     else if (std::is_same<cl_char, OutType>::value)
139         ranges = std::make_pair(CL_CHAR_MIN, CL_CHAR_MAX);
140     else if (std::is_same<cl_ushort, OutType>::value)
141         ranges = std::make_pair(0, CL_USHRT_MAX);
142     else if (std::is_same<cl_short, OutType>::value)
143         ranges = std::make_pair(CL_SHRT_MIN, CL_SHRT_MAX);
144     else if (std::is_same<cl_uint, OutType>::value)
145         ranges = std::make_pair(0, CL_UINT_MAX);
146     else if (std::is_same<cl_int, OutType>::value)
147         ranges = std::make_pair(CL_INT_MIN, CL_INT_MAX);
148     else if (std::is_same<cl_ulong, OutType>::value)
149         ranges = std::make_pair(0, CL_ULONG_MAX);
150     else if (std::is_same<cl_long, OutType>::value)
151         ranges = std::make_pair(CL_LONG_MIN, CL_LONG_MAX);
152 
153     // clang-format off
154     // for readability sake keep this section unformatted
155     if (std::is_floating_point<InType>::value)
156     { // from float/double
157         InType outMin = static_cast<InType>(ranges.first);
158         InType outMax = static_cast<InType>(ranges.second);
159 
160         InType eps = std::is_same<InType, cl_float>::value ? (InType) FLT_EPSILON : (InType) DBL_EPSILON;
161         if (std::is_integral<OutType>::value)
162         { // to char/uchar/short/ushort/int/uint/long/ulong
163             if (sizeof(OutType)<=sizeof(cl_short))
164             { // to char/uchar/short/ushort
165                 clamp_ranges=
166                 {{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
167                   {outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
168                   {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, outMax-1.f},
169                   {outMin-0.0f, outMax - outMax * 0.5f * eps },
170                   {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, outMax - outMax * 0.5f * eps}};
171             }
172             else if (std::is_same<InType, cl_float>::value)
173             { // from float
174                 if (std::is_same<OutType, cl_uint>::value)
175                 { // to uint
176                     clamp_ranges=
177                     { {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)},
178                       {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)},
179                       {outMin-1.0f+0.5f*eps, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)},
180                       {outMin-0.0f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) },
181                       {outMin-1.0f+0.5f*eps, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)}};
182                 }
183                 else if (std::is_same<OutType, cl_int>::value)
184                 { // to int
185                     clamp_ranges=
186                     { {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)},
187                       {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)},
188                       {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)},
189                       {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) },
190                       {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)}};
191                 }
192                 else if (std::is_same<OutType, cl_ulong>::value)
193                 { // to ulong
194                     clamp_ranges=
195                     {{outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)},
196                       {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)},
197                       {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)},
198                       {outMin-0.0f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) },
199                       {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)}};
200                 }
201                 else if (std::is_same<OutType, cl_long>::value)
202                 { // to long
203                     clamp_ranges=
204                     { {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)},
205                       {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)},
206                       {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)},
207                       {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)},
208                       {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}};
209                 }
210             }
211             else
212             { // from double
213                 if (std::is_same<OutType, cl_uint>::value)
214                 { // to uint
215                     clamp_ranges=
216                     { {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * eps},
217                       {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * eps},
218                       {outMin-1.0f+0.5f*eps, outMax},
219                       {outMin-0.0f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21) },
220                       {outMin-1.0f+0.5f*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21)}};
221                 }
222                 else if (std::is_same<OutType, cl_int>::value)
223                 { // to int
224                     clamp_ranges=
225                     { {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps},
226                       {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps},
227                       {outMin-1.0f+outMax*eps, outMax},
228                       {outMin-0.0f, outMax + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps },
229                       {outMin-1.0f+outMax*eps, outMax + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps}};
230                 }
231                 else if (std::is_same<OutType, cl_ulong>::value)
232                 { // to ulong
233                     clamp_ranges=
234                     {{outMin-0.5f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)},
235                       {outMin-0.5f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)},
236                       {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)},
237                       {outMin-0.0f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) },
238                       {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)}};
239                 }
240                 else if (std::is_same<OutType, cl_long>::value)
241                 { // to long
242                     clamp_ranges=
243                     { {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)},
244                       {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)},
245                       {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)},
246                       {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)},
247                       {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}};
248                 }
249             }
250         }
251     }
252     // clang-format on
253 }
254 
255 template <typename InType, typename OutType>
round_to_int(float f)256 float DataInfoSpec<InType, OutType>::round_to_int(float f)
257 {
258     static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23),
259                                     -MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23) };
260 
261     // Round fractional values to integer in round towards nearest mode
262     if (fabsf(f) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23))
263     {
264         volatile float x = f;
265         float magicVal = magic[f < 0];
266 
267 #if defined(__SSE__)
268         // Defeat x87 based arithmetic, which cant do FTZ, and will round this
269         // incorrectly
270         __m128 v = _mm_set_ss(x);
271         __m128 m = _mm_set_ss(magicVal);
272         v = _mm_add_ss(v, m);
273         v = _mm_sub_ss(v, m);
274         _mm_store_ss((float *)&x, v);
275 #else
276         x += magicVal;
277         x -= magicVal;
278 #endif
279         f = x;
280     }
281     return f;
282 }
283 
284 template <typename InType, typename OutType>
round_to_int_and_clamp(double f)285 long long DataInfoSpec<InType, OutType>::round_to_int_and_clamp(double f)
286 {
287     static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52),
288                                      MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
289 
290     if (f >= -(double)LLONG_MIN) return LLONG_MAX;
291 
292     if (f <= (double)LLONG_MIN) return LLONG_MIN;
293 
294     // Round fractional values to integer in round towards nearest mode
295     if (fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52))
296     {
297         volatile double x = f;
298         double magicVal = magic[f < 0];
299 #if defined(__SSE2__) || defined(_MSC_VER)
300         // Defeat x87 based arithmetic, which cant do FTZ, and will round this
301         // incorrectly
302         __m128d v = _mm_set_sd(x);
303         __m128d m = _mm_set_sd(magicVal);
304         v = _mm_add_sd(v, m);
305         v = _mm_sub_sd(v, m);
306         _mm_store_sd((double *)&x, v);
307 #else
308         x += magicVal;
309         x -= magicVal;
310 #endif
311         f = x;
312     }
313     return (long long)f;
314 }
315 
316 template <typename InType, typename OutType>
absolute(const OutType & x)317 OutType DataInfoSpec<InType, OutType>::absolute(const OutType &x)
318 {
319     union {
320         cl_uint u;
321         OutType f;
322     } u;
323     u.f = x;
324     if (std::is_same<OutType, float>::value)
325         u.u &= 0x7fffffff;
326     else if (std::is_same<OutType, double>::value)
327         u.u &= 0x7fffffffffffffffULL;
328     else
329         log_error("Unexpected argument type of DataInfoSpec::absolute");
330 
331     return u.f;
332 }
333 
334 template <typename InType, typename OutType>
conv(OutType * out,InType * in)335 void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
336 {
337     if (std::is_same<cl_float, InType>::value)
338     {
339         cl_float inVal = *in;
340 
341         if (std::is_floating_point<OutType>::value)
342         {
343             *out = (OutType)inVal;
344         }
345         else if (std::is_same<cl_ulong, OutType>::value)
346         {
347 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
348             // VS2005 (at least) on x86 uses fistp to store the float as a
349             // 64-bit int. However, fistp stores it as a signed int, and some of
350             // the test values won't fit into a signed int. (These test values
351             // are >= 2^63.) The result on VS2005 is that these end up silently
352             // (at least by default settings) clamped to the max lowest ulong.
353             cl_float x = round_to_int(inVal);
354             if (x >= 9223372036854775808.0f)
355             {
356                 x -= 9223372036854775808.0f;
357                 ((cl_ulong *)out)[0] = x;
358                 ((cl_ulong *)out)[0] += 9223372036854775808ULL;
359             }
360             else
361             {
362                 ((cl_ulong *)out)[0] = x;
363             }
364 #else
365             *out = round_to_int(inVal);
366 #endif
367         }
368         else if (std::is_same<cl_long, OutType>::value)
369         {
370             *out = round_to_int_and_clamp(inVal);
371         }
372         else
373             *out = round_to_int(inVal);
374     }
375     else if (std::is_same<cl_double, InType>::value)
376     {
377         if (std::is_same<cl_float, OutType>::value)
378             *out = (OutType)*in;
379         else
380             *out = rint(*in);
381     }
382     else if (std::is_same<cl_ulong, InType>::value
383              || std::is_same<cl_long, InType>::value)
384     {
385         if (std::is_same<cl_double, OutType>::value)
386         {
387 #if defined(_MSC_VER)
388             cl_ulong l = ((cl_ulong *)in)[0];
389             double result;
390 
391             if (std::is_same<cl_ulong, InType>::value)
392             {
393                 cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1))
394                                               : (cl_long)l;
395 #if defined(_M_X64)
396                 _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), sl));
397 #else
398                 result = sl;
399 #endif
400                 ((double *)out)[0] =
401                     (l == 0 ? 0.0 : (((cl_long)l < 0) ? result * 2.0 : result));
402             }
403             else
404             {
405                 _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), l));
406                 ((double *)out)[0] =
407                     (l == 0 ? 0.0 : result); // Per IEEE-754-2008 5.4.1, 0's
408                                              // always convert to +0.0
409             }
410 #else
411             // Use volatile to prevent optimization by Clang compiler
412             volatile InType vi = *in;
413             *out = (vi == 0 ? 0.0 : static_cast<OutType>(vi));
414 #endif
415         }
416         else if (std::is_same<cl_float, OutType>::value)
417         {
418             cl_float outVal = 0.f;
419 
420 #if defined(_MSC_VER) && defined(_M_X64)
421             cl_ulong l = ((cl_ulong *)in)[0];
422             float result;
423             if (std::is_same<cl_ulong, InType>::value)
424             {
425                 cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1))
426                                               : (cl_long)l;
427                 _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl));
428                 outVal = (l == 0 ? 0.0f
429                                  : (((cl_long)l < 0) ? result * 2.0f : result));
430             }
431             else
432             {
433                 _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), l));
434                 outVal = (l == 0 ? 0.0f : result); // Per IEEE-754-2008 5.4.1,
435                                                    // 0's always convert to +0.0
436             }
437 #else
438             InType l = ((InType *)in)[0];
439 #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
440             /* ARM VFP doesn't have hardware instruction for converting from
441              * 64-bit integer to float types, hence GCC ARM uses the
442              * floating-point emulation code despite which -mfloat-abi setting
443              * it is. But the emulation code in libgcc.a has only one rounding
444              * mode (round to nearest even in this case) and ignores the user
445              * rounding mode setting in hardware. As a result setting rounding
446              * modes in hardware won't give correct rounding results for type
447              * covert from 64-bit integer to float using GCC for ARM compiler so
448              * for testing different rounding modes, we need to use alternative
449              * reference function. ARM64 does have an instruction, however we
450              * cannot guarantee the compiler will use it.  On all ARM
451              * architechures use emulation to calculate reference.*/
452             if (std::is_same<cl_ulong, InType>::value)
453                 outVal = qcom_u64_2_f32(l, qcom_sat, qcom_rm);
454             else
455                 outVal = (l == 0 ? 0.0f : qcom_s64_2_f32(l, qcom_sat, qcom_rm));
456 #else
457             outVal = (l == 0 ? 0.0f : (float)l); // Per IEEE-754-2008 5.4.1, 0's
458                                                  // always convert to +0.0
459 #endif
460 #endif
461 
462             *out = outVal;
463         }
464         else
465         {
466             *out = (OutType)*in;
467         }
468     }
469     else
470     {
471         if (std::is_same<cl_float, OutType>::value)
472         {
473             // Use volatile to prevent optimization by Clang compiler
474             volatile InType vi = *in;
475             // Per IEEE-754-2008 5.4.1, 0 always converts to +0.0
476             *out = (vi == 0 ? 0.0f : vi);
477         }
478         else if (std::is_same<cl_double, OutType>::value)
479         {
480             // Per IEEE-754-2008 5.4.1, 0 always converts to +0.0
481             *out = (*in == 0 ? 0.0 : *in);
482         }
483         else
484         {
485             *out = (OutType)*in;
486         }
487     }
488 }
489 
490 #define CLAMP(_lo, _x, _hi)                                                    \
491     ((_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x)))
492 
493 template <typename InType, typename OutType>
conv_sat(OutType * out,InType * in)494 void DataInfoSpec<InType, OutType>::conv_sat(OutType *out, InType *in)
495 {
496     if (std::is_floating_point<InType>::value)
497     {
498         if (std::is_floating_point<OutType>::value)
499         { // in float/double, out float/double
500             *out = (OutType)(*in);
501         }
502         else if ((std::is_same<InType, cl_float>::value)
503                  && std::is_same<cl_ulong, OutType>::value)
504         {
505             cl_float x = round_to_int(*in);
506 
507 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
508             // VS2005 (at least) on x86 uses fistp to store the float as a
509             // 64-bit int. However, fistp stores it as a signed int, and some of
510             // the test values won't fit into a signed int. (These test values
511             // are >= 2^63.) The result on VS2005 is that these end up silently
512             // (at least by default settings) clamped to the max lowest ulong.
513             if (x >= 18446744073709551616.0f)
514             { // 2^64
515                 *out = 0xFFFFFFFFFFFFFFFFULL;
516             }
517             else if (x < 0)
518             {
519                 *out = 0;
520             }
521             else if (x >= 9223372036854775808.0f)
522             { // 2^63
523                 x -= 9223372036854775808.0f;
524                 *out = x;
525                 *out += 9223372036854775808ULL;
526             }
527             else
528             {
529                 *out = x;
530             }
531 #else
532             *out = x >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64)
533                 ? 0xFFFFFFFFFFFFFFFFULL
534                 : x < 0 ? 0 : (OutType)x;
535 #endif
536         }
537         else if ((std::is_same<InType, cl_float>::value)
538                  && std::is_same<cl_long, OutType>::value)
539         {
540             cl_float f = round_to_int(*in);
541             *out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63)
542                 ? 0x7FFFFFFFFFFFFFFFULL
543                 : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63)
544                     ? 0x8000000000000000LL
545                     : (OutType)f;
546         }
547         else if (std::is_same<InType, cl_double>::value
548                  && std::is_same<cl_ulong, OutType>::value)
549         {
550             InType f = rint(*in);
551             *out = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64)
552                 ? 0xFFFFFFFFFFFFFFFFULL
553                 : f < 0 ? 0 : (OutType)f;
554         }
555         else if (std::is_same<InType, cl_double>::value
556                  && std::is_same<cl_long, OutType>::value)
557         {
558             InType f = rint(*in);
559             *out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63)
560                 ? 0x7FFFFFFFFFFFFFFFULL
561                 : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63)
562                     ? 0x8000000000000000LL
563                     : (OutType)f;
564         }
565         else
566         { // in float/double, out char/uchar/short/ushort/int/uint
567             *out =
568                 CLAMP(ranges.first, round_to_int_and_clamp(*in), ranges.second);
569         }
570     }
571     else if (std::is_integral<InType>::value
572              && std::is_integral<OutType>::value)
573     {
574         {
575             if ((std::is_signed<InType>::value
576                  && std::is_signed<OutType>::value)
577                 || (!std::is_signed<InType>::value
578                     && !std::is_signed<OutType>::value))
579             {
580                 if (sizeof(InType) <= sizeof(OutType))
581                 {
582                     *out = (OutType)*in;
583                 }
584                 else
585                 {
586                     *out = CLAMP(ranges.first, *in, ranges.second);
587                 }
588             }
589             else
590             { // mixed signed/unsigned types
591                 if (sizeof(InType) < sizeof(OutType))
592                 {
593                     *out = (!std::is_signed<InType>::value)
594                         ? (OutType)*in
595                         : CLAMP(0, *in, ranges.second); // *in < 0 ? 0 : *in
596                 }
597                 else
598                 { // bigger/equal mixed signed/unsigned types - always clamp
599                     *out = CLAMP(0, *in, ranges.second);
600                 }
601             }
602         }
603     }
604     else
605     { // InType integral, OutType floating
606         *out = std::is_signed<InType>::value ? (OutType)*in
607                                              : absolute((OutType)*in);
608     }
609 }
610 
611 template <typename InType, typename OutType>
init(const cl_uint & job_id,const cl_uint & thread_id)612 void DataInfoSpec<InType, OutType>::init(const cl_uint &job_id,
613                                          const cl_uint &thread_id)
614 {
615     uint64_t ulStart = start;
616     void *pIn = (char *)gIn + job_id * size * gTypeSizes[inType];
617 
618     if (std::is_integral<InType>::value)
619     {
620         InType *o = (InType *)pIn;
621         if (sizeof(InType) <= sizeof(cl_short))
622         { // char/uchar/ushort/short
623             for (int i = 0; i < size; i++) o[i] = ulStart++;
624         }
625         else if (sizeof(InType) <= sizeof(cl_int))
626         { // int/uint
627             int i = 0;
628             if (gIsEmbedded)
629                 for (i = 0; i < size; i++)
630                     o[i] = (InType)genrand_int32(mdv[thread_id]);
631             else
632                 for (i = 0; i < size; i++) o[i] = (InType)i + ulStart;
633 
634             if (0 == ulStart)
635             {
636                 size_t tableSize = specialValuesUInt.size()
637                     * sizeof(decltype(specialValuesUInt)::value_type);
638                 if (sizeof(InType) * size < tableSize)
639                     tableSize = sizeof(InType) * size;
640                 memcpy((char *)(o + i) - tableSize, &specialValuesUInt.front(),
641                        tableSize);
642             }
643         }
644         else
645         { // long/ulong
646             cl_ulong *o = (cl_ulong *)pIn;
647             cl_ulong i, j, k;
648 
649             i = 0;
650             if (ulStart == 0)
651             {
652                 // Try various powers of two
653                 for (j = 0; j < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
654                     o[j] = (cl_ulong)1 << j;
655                 i = j;
656 
657                 // try the complement of those
658                 for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
659                     o[i++] = ~((cl_ulong)1 << j);
660 
661                 // Try various negative powers of two
662                 for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
663                     o[i++] = (cl_ulong)0xFFFFFFFFFFFFFFFEULL << j;
664 
665                 // try various powers of two plus 1, shifted by various amounts
666                 for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
667                     for (k = 0;
668                          i < (cl_ulong)size && k < 8 * sizeof(cl_ulong) - j;
669                          k++)
670                         o[i++] = (((cl_ulong)1 << j) + 1) << k;
671 
672                 // try various powers of two minus 1
673                 for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
674                     for (k = 0;
675                          i < (cl_ulong)size && k < 8 * sizeof(cl_ulong) - j;
676                          k++)
677                         o[i++] = (((cl_ulong)1 << j) - 1) << k;
678 
679                 // Other patterns
680                 cl_ulong pattern[] = {
681                     0x3333333333333333ULL, 0x5555555555555555ULL,
682                     0x9999999999999999ULL, 0x6666666666666666ULL,
683                     0xccccccccccccccccULL, 0xaaaaaaaaaaaaaaaaULL
684                 };
685                 cl_ulong mask[] = { 0xffffffffffffffffULL,
686                                     0xff00ff00ff00ff00ULL,
687                                     0xffff0000ffff0000ULL,
688                                     0xffffffff00000000ULL };
689                 for (j = 0; i < (cl_ulong)size
690                      && j < sizeof(pattern) / sizeof(pattern[0]);
691                      j++)
692                     for (k = 0; i + 2 <= (cl_ulong)size
693                          && k < sizeof(mask) / sizeof(mask[0]);
694                          k++)
695                     {
696                         o[i++] = pattern[j] & mask[k];
697                         o[i++] = pattern[j] & ~mask[k];
698                     }
699             }
700 
701             auto &md = mdv[thread_id];
702             for (; i < (cl_ulong)size; i++)
703                 o[i] = (cl_ulong)genrand_int32(md)
704                     | ((cl_ulong)genrand_int32(md) << 32);
705         }
706     } // integrals
707     else if (std::is_same<InType, cl_float>::value)
708     {
709         cl_uint *o = (cl_uint *)pIn;
710         int i;
711 
712         if (gIsEmbedded)
713             for (i = 0; i < size; i++)
714                 o[i] = (cl_uint)genrand_int32(mdv[thread_id]);
715         else
716             for (i = 0; i < size; i++) o[i] = (cl_uint)i + ulStart;
717 
718         if (0 == ulStart)
719         {
720             size_t tableSize = specialValuesFloat.size()
721                 * sizeof(decltype(specialValuesFloat)::value_type);
722             if (sizeof(InType) * size < tableSize)
723                 tableSize = sizeof(InType) * size;
724             memcpy((char *)(o + i) - tableSize, &specialValuesFloat.front(),
725                    tableSize);
726         }
727 
728         if (kUnsaturated == sat)
729         {
730             InType *f = (InType *)pIn;
731             for (i = 0; i < size; i++) f[i] = clamp(f[i]);
732         }
733     }
734     else if (std::is_same<InType, cl_double>::value)
735     {
736         InType *o = (InType *)pIn;
737         int i = 0;
738 
739         union {
740             uint64_t u;
741             InType d;
742         } u;
743 
744         for (i = 0; i < size; i++)
745         {
746             uint64_t z = i + ulStart;
747 
748             uint32_t bits = ((uint32_t)z ^ (uint32_t)(z >> 32));
749             // split 0x89abcdef to 0x89abc00000000def
750             u.u = bits & 0xfffU;
751             u.u |= (uint64_t)(bits & ~0xfffU) << 32;
752             // sign extend the leading bit of def segment as sign bit so that
753             // the middle region consists of either all 1s or 0s
754             u.u -= (bits & 0x800U) << 1;
755             o[i] = u.d;
756         }
757 
758         if (0 == ulStart)
759         {
760             size_t tableSize = specialValuesDouble.size()
761                 * sizeof(decltype(specialValuesDouble)::value_type);
762             if (sizeof(InType) * size < tableSize)
763                 tableSize = sizeof(InType) * size;
764             memcpy((char *)(o + i) - tableSize, &specialValuesDouble.front(),
765                    tableSize);
766         }
767 
768         if (0 == sat)
769             for (i = 0; i < size; i++) o[i] = clamp(o[i]);
770     }
771 }
772 
773 template <typename InType, typename OutType>
clamp(const InType & in)774 InType DataInfoSpec<InType, OutType>::clamp(const InType &in)
775 {
776     if (std::is_integral<OutType>::value)
777     {
778         if (std::is_same<InType, cl_float>::value)
779         {
780             return fclamp(clamp_ranges[round].first, in,
781                           clamp_ranges[round].second);
782         }
783         else if (std::is_same<InType, cl_double>::value)
784         {
785             return dclamp(clamp_ranges[round].first, in,
786                           clamp_ranges[round].second);
787         }
788     }
789     return in;
790 }
791 
792 #endif /* CONVERSIONS_DATA_INFO_H */
793