1 //
2 // Copyright (c) 2023 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef CONVERSIONS_DATA_INFO_H
17 #define CONVERSIONS_DATA_INFO_H
18
19 #if defined(__APPLE__)
20 #include <OpenCL/opencl.h>
21 #else
22 #include <CL/opencl.h>
23 #endif
24
25 #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
26 #include "fplib.h"
27 extern bool qcom_sat;
28 extern roundingMode qcom_rm;
29 #endif
30
31 #include "harness/mt19937.h"
32 #include "harness/rounding_mode.h"
33
34 #include <vector>
35
36 #if defined(__linux__)
37 #include <sys/param.h>
38 #include <libgen.h>
39 #endif
40
41 extern size_t gTypeSizes[kTypeCount];
42 extern void *gIn;
43
44
45 typedef enum
46 {
47 kUnsaturated = 0,
48 kSaturated,
49
50 kSaturationModeCount
51 } SaturationMode;
52
53 struct DataInitInfo
54 {
55 cl_ulong start;
56 cl_uint size;
57 Type outType;
58 Type inType;
59 SaturationMode sat;
60 RoundingMode round;
61 cl_uint threads;
62
63 static std::vector<uint32_t> specialValuesUInt;
64 static std::vector<float> specialValuesFloat;
65 static std::vector<double> specialValuesDouble;
66 };
67
68 struct DataInitBase : public DataInitInfo
69 {
70 virtual ~DataInitBase() = default;
71
DataInitBaseDataInitBase72 explicit DataInitBase(const DataInitInfo &agg): DataInitInfo(agg) {}
conv_arrayDataInitBase73 virtual void conv_array(void *out, void *in, size_t n) {}
conv_array_satDataInitBase74 virtual void conv_array_sat(void *out, void *in, size_t n) {}
initDataInitBase75 virtual void init(const cl_uint &, const cl_uint &) {}
76 };
77
78 template <typename InType, typename OutType>
79 struct DataInfoSpec : public DataInitBase
80 {
81 explicit DataInfoSpec(const DataInitInfo &agg);
82
83 // helpers
84 float round_to_int(float f);
85 long long round_to_int_and_clamp(double d);
86
87 OutType absolute(const OutType &x);
88
89 // actual conversion of reference values
90 void conv(OutType *out, InType *in);
91 void conv_sat(OutType *out, InType *in);
92
93 // min/max ranges for output type of data
94 std::pair<OutType, OutType> ranges;
95
96 // matrix of clamping ranges for each rounding type
97 std::vector<std::pair<InType, InType>> clamp_ranges;
98
99 std::vector<MTdataHolder> mdv;
100
conv_arrayDataInfoSpec101 void conv_array(void *out, void *in, size_t n) override
102 {
103 for (size_t i = 0; i < n; i++)
104 conv(&((OutType *)out)[i], &((InType *)in)[i]);
105 }
106
conv_array_satDataInfoSpec107 void conv_array_sat(void *out, void *in, size_t n) override
108 {
109 for (size_t i = 0; i < n; i++)
110 conv_sat(&((OutType *)out)[i], &((InType *)in)[i]);
111 }
112
113 void init(const cl_uint &, const cl_uint &) override;
114 InType clamp(const InType &);
fclampDataInfoSpec115 inline float fclamp(float lo, float v, float hi)
116 {
117 v = v < lo ? lo : v;
118 return v < hi ? v : hi;
119 }
120
dclampDataInfoSpec121 inline double dclamp(double lo, double v, double hi)
122 {
123 v = v < lo ? lo : v;
124 return v < hi ? v : hi;
125 }
126 };
127
128 template <typename InType, typename OutType>
DataInfoSpec(const DataInitInfo & agg)129 DataInfoSpec<InType, OutType>::DataInfoSpec(const DataInitInfo &agg)
130 : DataInitBase(agg), mdv(0)
131 {
132 if (std::is_same<cl_float, OutType>::value)
133 ranges = std::make_pair(CL_FLT_MIN, CL_FLT_MAX);
134 else if (std::is_same<cl_double, OutType>::value)
135 ranges = std::make_pair(CL_DBL_MIN, CL_DBL_MAX);
136 else if (std::is_same<cl_uchar, OutType>::value)
137 ranges = std::make_pair(0, CL_UCHAR_MAX);
138 else if (std::is_same<cl_char, OutType>::value)
139 ranges = std::make_pair(CL_CHAR_MIN, CL_CHAR_MAX);
140 else if (std::is_same<cl_ushort, OutType>::value)
141 ranges = std::make_pair(0, CL_USHRT_MAX);
142 else if (std::is_same<cl_short, OutType>::value)
143 ranges = std::make_pair(CL_SHRT_MIN, CL_SHRT_MAX);
144 else if (std::is_same<cl_uint, OutType>::value)
145 ranges = std::make_pair(0, CL_UINT_MAX);
146 else if (std::is_same<cl_int, OutType>::value)
147 ranges = std::make_pair(CL_INT_MIN, CL_INT_MAX);
148 else if (std::is_same<cl_ulong, OutType>::value)
149 ranges = std::make_pair(0, CL_ULONG_MAX);
150 else if (std::is_same<cl_long, OutType>::value)
151 ranges = std::make_pair(CL_LONG_MIN, CL_LONG_MAX);
152
153 // clang-format off
154 // for readability sake keep this section unformatted
155 if (std::is_floating_point<InType>::value)
156 { // from float/double
157 InType outMin = static_cast<InType>(ranges.first);
158 InType outMax = static_cast<InType>(ranges.second);
159
160 InType eps = std::is_same<InType, cl_float>::value ? (InType) FLT_EPSILON : (InType) DBL_EPSILON;
161 if (std::is_integral<OutType>::value)
162 { // to char/uchar/short/ushort/int/uint/long/ulong
163 if (sizeof(OutType)<=sizeof(cl_short))
164 { // to char/uchar/short/ushort
165 clamp_ranges=
166 {{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
167 {outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
168 {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, outMax-1.f},
169 {outMin-0.0f, outMax - outMax * 0.5f * eps },
170 {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, outMax - outMax * 0.5f * eps}};
171 }
172 else if (std::is_same<InType, cl_float>::value)
173 { // from float
174 if (std::is_same<OutType, cl_uint>::value)
175 { // to uint
176 clamp_ranges=
177 { {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)},
178 {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)},
179 {outMin-1.0f+0.5f*eps, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)},
180 {outMin-0.0f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) },
181 {outMin-1.0f+0.5f*eps, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)}};
182 }
183 else if (std::is_same<OutType, cl_int>::value)
184 { // to int
185 clamp_ranges=
186 { {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)},
187 {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)},
188 {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)},
189 {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) },
190 {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)}};
191 }
192 else if (std::is_same<OutType, cl_ulong>::value)
193 { // to ulong
194 clamp_ranges=
195 {{outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)},
196 {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)},
197 {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)},
198 {outMin-0.0f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) },
199 {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)}};
200 }
201 else if (std::is_same<OutType, cl_long>::value)
202 { // to long
203 clamp_ranges=
204 { {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)},
205 {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)},
206 {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)},
207 {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)},
208 {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}};
209 }
210 }
211 else
212 { // from double
213 if (std::is_same<OutType, cl_uint>::value)
214 { // to uint
215 clamp_ranges=
216 { {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * eps},
217 {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * eps},
218 {outMin-1.0f+0.5f*eps, outMax},
219 {outMin-0.0f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21) },
220 {outMin-1.0f+0.5f*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21)}};
221 }
222 else if (std::is_same<OutType, cl_int>::value)
223 { // to int
224 clamp_ranges=
225 { {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps},
226 {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps},
227 {outMin-1.0f+outMax*eps, outMax},
228 {outMin-0.0f, outMax + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps },
229 {outMin-1.0f+outMax*eps, outMax + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps}};
230 }
231 else if (std::is_same<OutType, cl_ulong>::value)
232 { // to ulong
233 clamp_ranges=
234 {{outMin-0.5f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)},
235 {outMin-0.5f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)},
236 {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)},
237 {outMin-0.0f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) },
238 {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)}};
239 }
240 else if (std::is_same<OutType, cl_long>::value)
241 { // to long
242 clamp_ranges=
243 { {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)},
244 {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)},
245 {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)},
246 {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)},
247 {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}};
248 }
249 }
250 }
251 }
252 // clang-format on
253 }
254
255 template <typename InType, typename OutType>
round_to_int(float f)256 float DataInfoSpec<InType, OutType>::round_to_int(float f)
257 {
258 static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23),
259 -MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23) };
260
261 // Round fractional values to integer in round towards nearest mode
262 if (fabsf(f) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23))
263 {
264 volatile float x = f;
265 float magicVal = magic[f < 0];
266
267 #if defined(__SSE__)
268 // Defeat x87 based arithmetic, which cant do FTZ, and will round this
269 // incorrectly
270 __m128 v = _mm_set_ss(x);
271 __m128 m = _mm_set_ss(magicVal);
272 v = _mm_add_ss(v, m);
273 v = _mm_sub_ss(v, m);
274 _mm_store_ss((float *)&x, v);
275 #else
276 x += magicVal;
277 x -= magicVal;
278 #endif
279 f = x;
280 }
281 return f;
282 }
283
284 template <typename InType, typename OutType>
round_to_int_and_clamp(double f)285 long long DataInfoSpec<InType, OutType>::round_to_int_and_clamp(double f)
286 {
287 static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52),
288 MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
289
290 if (f >= -(double)LLONG_MIN) return LLONG_MAX;
291
292 if (f <= (double)LLONG_MIN) return LLONG_MIN;
293
294 // Round fractional values to integer in round towards nearest mode
295 if (fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52))
296 {
297 volatile double x = f;
298 double magicVal = magic[f < 0];
299 #if defined(__SSE2__) || defined(_MSC_VER)
300 // Defeat x87 based arithmetic, which cant do FTZ, and will round this
301 // incorrectly
302 __m128d v = _mm_set_sd(x);
303 __m128d m = _mm_set_sd(magicVal);
304 v = _mm_add_sd(v, m);
305 v = _mm_sub_sd(v, m);
306 _mm_store_sd((double *)&x, v);
307 #else
308 x += magicVal;
309 x -= magicVal;
310 #endif
311 f = x;
312 }
313 return (long long)f;
314 }
315
316 template <typename InType, typename OutType>
absolute(const OutType & x)317 OutType DataInfoSpec<InType, OutType>::absolute(const OutType &x)
318 {
319 union {
320 cl_uint u;
321 OutType f;
322 } u;
323 u.f = x;
324 if (std::is_same<OutType, float>::value)
325 u.u &= 0x7fffffff;
326 else if (std::is_same<OutType, double>::value)
327 u.u &= 0x7fffffffffffffffULL;
328 else
329 log_error("Unexpected argument type of DataInfoSpec::absolute");
330
331 return u.f;
332 }
333
334 template <typename InType, typename OutType>
conv(OutType * out,InType * in)335 void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
336 {
337 if (std::is_same<cl_float, InType>::value)
338 {
339 cl_float inVal = *in;
340
341 if (std::is_floating_point<OutType>::value)
342 {
343 *out = (OutType)inVal;
344 }
345 else if (std::is_same<cl_ulong, OutType>::value)
346 {
347 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
348 // VS2005 (at least) on x86 uses fistp to store the float as a
349 // 64-bit int. However, fistp stores it as a signed int, and some of
350 // the test values won't fit into a signed int. (These test values
351 // are >= 2^63.) The result on VS2005 is that these end up silently
352 // (at least by default settings) clamped to the max lowest ulong.
353 cl_float x = round_to_int(inVal);
354 if (x >= 9223372036854775808.0f)
355 {
356 x -= 9223372036854775808.0f;
357 ((cl_ulong *)out)[0] = x;
358 ((cl_ulong *)out)[0] += 9223372036854775808ULL;
359 }
360 else
361 {
362 ((cl_ulong *)out)[0] = x;
363 }
364 #else
365 *out = round_to_int(inVal);
366 #endif
367 }
368 else if (std::is_same<cl_long, OutType>::value)
369 {
370 *out = round_to_int_and_clamp(inVal);
371 }
372 else
373 *out = round_to_int(inVal);
374 }
375 else if (std::is_same<cl_double, InType>::value)
376 {
377 if (std::is_same<cl_float, OutType>::value)
378 *out = (OutType)*in;
379 else
380 *out = rint(*in);
381 }
382 else if (std::is_same<cl_ulong, InType>::value
383 || std::is_same<cl_long, InType>::value)
384 {
385 if (std::is_same<cl_double, OutType>::value)
386 {
387 #if defined(_MSC_VER)
388 cl_ulong l = ((cl_ulong *)in)[0];
389 double result;
390
391 if (std::is_same<cl_ulong, InType>::value)
392 {
393 cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1))
394 : (cl_long)l;
395 #if defined(_M_X64)
396 _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), sl));
397 #else
398 result = sl;
399 #endif
400 ((double *)out)[0] =
401 (l == 0 ? 0.0 : (((cl_long)l < 0) ? result * 2.0 : result));
402 }
403 else
404 {
405 _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), l));
406 ((double *)out)[0] =
407 (l == 0 ? 0.0 : result); // Per IEEE-754-2008 5.4.1, 0's
408 // always convert to +0.0
409 }
410 #else
411 // Use volatile to prevent optimization by Clang compiler
412 volatile InType vi = *in;
413 *out = (vi == 0 ? 0.0 : static_cast<OutType>(vi));
414 #endif
415 }
416 else if (std::is_same<cl_float, OutType>::value)
417 {
418 cl_float outVal = 0.f;
419
420 #if defined(_MSC_VER) && defined(_M_X64)
421 cl_ulong l = ((cl_ulong *)in)[0];
422 float result;
423 if (std::is_same<cl_ulong, InType>::value)
424 {
425 cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1))
426 : (cl_long)l;
427 _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl));
428 outVal = (l == 0 ? 0.0f
429 : (((cl_long)l < 0) ? result * 2.0f : result));
430 }
431 else
432 {
433 _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), l));
434 outVal = (l == 0 ? 0.0f : result); // Per IEEE-754-2008 5.4.1,
435 // 0's always convert to +0.0
436 }
437 #else
438 InType l = ((InType *)in)[0];
439 #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
440 /* ARM VFP doesn't have hardware instruction for converting from
441 * 64-bit integer to float types, hence GCC ARM uses the
442 * floating-point emulation code despite which -mfloat-abi setting
443 * it is. But the emulation code in libgcc.a has only one rounding
444 * mode (round to nearest even in this case) and ignores the user
445 * rounding mode setting in hardware. As a result setting rounding
446 * modes in hardware won't give correct rounding results for type
447 * covert from 64-bit integer to float using GCC for ARM compiler so
448 * for testing different rounding modes, we need to use alternative
449 * reference function. ARM64 does have an instruction, however we
450 * cannot guarantee the compiler will use it. On all ARM
451 * architechures use emulation to calculate reference.*/
452 if (std::is_same<cl_ulong, InType>::value)
453 outVal = qcom_u64_2_f32(l, qcom_sat, qcom_rm);
454 else
455 outVal = (l == 0 ? 0.0f : qcom_s64_2_f32(l, qcom_sat, qcom_rm));
456 #else
457 outVal = (l == 0 ? 0.0f : (float)l); // Per IEEE-754-2008 5.4.1, 0's
458 // always convert to +0.0
459 #endif
460 #endif
461
462 *out = outVal;
463 }
464 else
465 {
466 *out = (OutType)*in;
467 }
468 }
469 else
470 {
471 if (std::is_same<cl_float, OutType>::value)
472 {
473 // Use volatile to prevent optimization by Clang compiler
474 volatile InType vi = *in;
475 // Per IEEE-754-2008 5.4.1, 0 always converts to +0.0
476 *out = (vi == 0 ? 0.0f : vi);
477 }
478 else if (std::is_same<cl_double, OutType>::value)
479 {
480 // Per IEEE-754-2008 5.4.1, 0 always converts to +0.0
481 *out = (*in == 0 ? 0.0 : *in);
482 }
483 else
484 {
485 *out = (OutType)*in;
486 }
487 }
488 }
489
490 #define CLAMP(_lo, _x, _hi) \
491 ((_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x)))
492
493 template <typename InType, typename OutType>
conv_sat(OutType * out,InType * in)494 void DataInfoSpec<InType, OutType>::conv_sat(OutType *out, InType *in)
495 {
496 if (std::is_floating_point<InType>::value)
497 {
498 if (std::is_floating_point<OutType>::value)
499 { // in float/double, out float/double
500 *out = (OutType)(*in);
501 }
502 else if ((std::is_same<InType, cl_float>::value)
503 && std::is_same<cl_ulong, OutType>::value)
504 {
505 cl_float x = round_to_int(*in);
506
507 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
508 // VS2005 (at least) on x86 uses fistp to store the float as a
509 // 64-bit int. However, fistp stores it as a signed int, and some of
510 // the test values won't fit into a signed int. (These test values
511 // are >= 2^63.) The result on VS2005 is that these end up silently
512 // (at least by default settings) clamped to the max lowest ulong.
513 if (x >= 18446744073709551616.0f)
514 { // 2^64
515 *out = 0xFFFFFFFFFFFFFFFFULL;
516 }
517 else if (x < 0)
518 {
519 *out = 0;
520 }
521 else if (x >= 9223372036854775808.0f)
522 { // 2^63
523 x -= 9223372036854775808.0f;
524 *out = x;
525 *out += 9223372036854775808ULL;
526 }
527 else
528 {
529 *out = x;
530 }
531 #else
532 *out = x >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64)
533 ? 0xFFFFFFFFFFFFFFFFULL
534 : x < 0 ? 0 : (OutType)x;
535 #endif
536 }
537 else if ((std::is_same<InType, cl_float>::value)
538 && std::is_same<cl_long, OutType>::value)
539 {
540 cl_float f = round_to_int(*in);
541 *out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63)
542 ? 0x7FFFFFFFFFFFFFFFULL
543 : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63)
544 ? 0x8000000000000000LL
545 : (OutType)f;
546 }
547 else if (std::is_same<InType, cl_double>::value
548 && std::is_same<cl_ulong, OutType>::value)
549 {
550 InType f = rint(*in);
551 *out = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64)
552 ? 0xFFFFFFFFFFFFFFFFULL
553 : f < 0 ? 0 : (OutType)f;
554 }
555 else if (std::is_same<InType, cl_double>::value
556 && std::is_same<cl_long, OutType>::value)
557 {
558 InType f = rint(*in);
559 *out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63)
560 ? 0x7FFFFFFFFFFFFFFFULL
561 : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63)
562 ? 0x8000000000000000LL
563 : (OutType)f;
564 }
565 else
566 { // in float/double, out char/uchar/short/ushort/int/uint
567 *out =
568 CLAMP(ranges.first, round_to_int_and_clamp(*in), ranges.second);
569 }
570 }
571 else if (std::is_integral<InType>::value
572 && std::is_integral<OutType>::value)
573 {
574 {
575 if ((std::is_signed<InType>::value
576 && std::is_signed<OutType>::value)
577 || (!std::is_signed<InType>::value
578 && !std::is_signed<OutType>::value))
579 {
580 if (sizeof(InType) <= sizeof(OutType))
581 {
582 *out = (OutType)*in;
583 }
584 else
585 {
586 *out = CLAMP(ranges.first, *in, ranges.second);
587 }
588 }
589 else
590 { // mixed signed/unsigned types
591 if (sizeof(InType) < sizeof(OutType))
592 {
593 *out = (!std::is_signed<InType>::value)
594 ? (OutType)*in
595 : CLAMP(0, *in, ranges.second); // *in < 0 ? 0 : *in
596 }
597 else
598 { // bigger/equal mixed signed/unsigned types - always clamp
599 *out = CLAMP(0, *in, ranges.second);
600 }
601 }
602 }
603 }
604 else
605 { // InType integral, OutType floating
606 *out = std::is_signed<InType>::value ? (OutType)*in
607 : absolute((OutType)*in);
608 }
609 }
610
611 template <typename InType, typename OutType>
init(const cl_uint & job_id,const cl_uint & thread_id)612 void DataInfoSpec<InType, OutType>::init(const cl_uint &job_id,
613 const cl_uint &thread_id)
614 {
615 uint64_t ulStart = start;
616 void *pIn = (char *)gIn + job_id * size * gTypeSizes[inType];
617
618 if (std::is_integral<InType>::value)
619 {
620 InType *o = (InType *)pIn;
621 if (sizeof(InType) <= sizeof(cl_short))
622 { // char/uchar/ushort/short
623 for (int i = 0; i < size; i++) o[i] = ulStart++;
624 }
625 else if (sizeof(InType) <= sizeof(cl_int))
626 { // int/uint
627 int i = 0;
628 if (gIsEmbedded)
629 for (i = 0; i < size; i++)
630 o[i] = (InType)genrand_int32(mdv[thread_id]);
631 else
632 for (i = 0; i < size; i++) o[i] = (InType)i + ulStart;
633
634 if (0 == ulStart)
635 {
636 size_t tableSize = specialValuesUInt.size()
637 * sizeof(decltype(specialValuesUInt)::value_type);
638 if (sizeof(InType) * size < tableSize)
639 tableSize = sizeof(InType) * size;
640 memcpy((char *)(o + i) - tableSize, &specialValuesUInt.front(),
641 tableSize);
642 }
643 }
644 else
645 { // long/ulong
646 cl_ulong *o = (cl_ulong *)pIn;
647 cl_ulong i, j, k;
648
649 i = 0;
650 if (ulStart == 0)
651 {
652 // Try various powers of two
653 for (j = 0; j < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
654 o[j] = (cl_ulong)1 << j;
655 i = j;
656
657 // try the complement of those
658 for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
659 o[i++] = ~((cl_ulong)1 << j);
660
661 // Try various negative powers of two
662 for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
663 o[i++] = (cl_ulong)0xFFFFFFFFFFFFFFFEULL << j;
664
665 // try various powers of two plus 1, shifted by various amounts
666 for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
667 for (k = 0;
668 i < (cl_ulong)size && k < 8 * sizeof(cl_ulong) - j;
669 k++)
670 o[i++] = (((cl_ulong)1 << j) + 1) << k;
671
672 // try various powers of two minus 1
673 for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
674 for (k = 0;
675 i < (cl_ulong)size && k < 8 * sizeof(cl_ulong) - j;
676 k++)
677 o[i++] = (((cl_ulong)1 << j) - 1) << k;
678
679 // Other patterns
680 cl_ulong pattern[] = {
681 0x3333333333333333ULL, 0x5555555555555555ULL,
682 0x9999999999999999ULL, 0x6666666666666666ULL,
683 0xccccccccccccccccULL, 0xaaaaaaaaaaaaaaaaULL
684 };
685 cl_ulong mask[] = { 0xffffffffffffffffULL,
686 0xff00ff00ff00ff00ULL,
687 0xffff0000ffff0000ULL,
688 0xffffffff00000000ULL };
689 for (j = 0; i < (cl_ulong)size
690 && j < sizeof(pattern) / sizeof(pattern[0]);
691 j++)
692 for (k = 0; i + 2 <= (cl_ulong)size
693 && k < sizeof(mask) / sizeof(mask[0]);
694 k++)
695 {
696 o[i++] = pattern[j] & mask[k];
697 o[i++] = pattern[j] & ~mask[k];
698 }
699 }
700
701 auto &md = mdv[thread_id];
702 for (; i < (cl_ulong)size; i++)
703 o[i] = (cl_ulong)genrand_int32(md)
704 | ((cl_ulong)genrand_int32(md) << 32);
705 }
706 } // integrals
707 else if (std::is_same<InType, cl_float>::value)
708 {
709 cl_uint *o = (cl_uint *)pIn;
710 int i;
711
712 if (gIsEmbedded)
713 for (i = 0; i < size; i++)
714 o[i] = (cl_uint)genrand_int32(mdv[thread_id]);
715 else
716 for (i = 0; i < size; i++) o[i] = (cl_uint)i + ulStart;
717
718 if (0 == ulStart)
719 {
720 size_t tableSize = specialValuesFloat.size()
721 * sizeof(decltype(specialValuesFloat)::value_type);
722 if (sizeof(InType) * size < tableSize)
723 tableSize = sizeof(InType) * size;
724 memcpy((char *)(o + i) - tableSize, &specialValuesFloat.front(),
725 tableSize);
726 }
727
728 if (kUnsaturated == sat)
729 {
730 InType *f = (InType *)pIn;
731 for (i = 0; i < size; i++) f[i] = clamp(f[i]);
732 }
733 }
734 else if (std::is_same<InType, cl_double>::value)
735 {
736 InType *o = (InType *)pIn;
737 int i = 0;
738
739 union {
740 uint64_t u;
741 InType d;
742 } u;
743
744 for (i = 0; i < size; i++)
745 {
746 uint64_t z = i + ulStart;
747
748 uint32_t bits = ((uint32_t)z ^ (uint32_t)(z >> 32));
749 // split 0x89abcdef to 0x89abc00000000def
750 u.u = bits & 0xfffU;
751 u.u |= (uint64_t)(bits & ~0xfffU) << 32;
752 // sign extend the leading bit of def segment as sign bit so that
753 // the middle region consists of either all 1s or 0s
754 u.u -= (bits & 0x800U) << 1;
755 o[i] = u.d;
756 }
757
758 if (0 == ulStart)
759 {
760 size_t tableSize = specialValuesDouble.size()
761 * sizeof(decltype(specialValuesDouble)::value_type);
762 if (sizeof(InType) * size < tableSize)
763 tableSize = sizeof(InType) * size;
764 memcpy((char *)(o + i) - tableSize, &specialValuesDouble.front(),
765 tableSize);
766 }
767
768 if (0 == sat)
769 for (i = 0; i < size; i++) o[i] = clamp(o[i]);
770 }
771 }
772
773 template <typename InType, typename OutType>
clamp(const InType & in)774 InType DataInfoSpec<InType, OutType>::clamp(const InType &in)
775 {
776 if (std::is_integral<OutType>::value)
777 {
778 if (std::is_same<InType, cl_float>::value)
779 {
780 return fclamp(clamp_ranges[round].first, in,
781 clamp_ranges[round].second);
782 }
783 else if (std::is_same<InType, cl_double>::value)
784 {
785 return dclamp(clamp_ranges[round].first, in,
786 clamp_ranges[round].second);
787 }
788 }
789 return in;
790 }
791
792 #endif /* CONVERSIONS_DATA_INFO_H */
793