xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/images/kernel_read_write/test_iterations.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017, 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "test_common.h"
17 #include <float.h>
18 
19 #include <algorithm>
20 
21 #if defined( __APPLE__ )
22     #include <signal.h>
23     #include <sys/signal.h>
24     #include <setjmp.h>
25 #endif
26 
27 extern bool gTestImage2DFromBuffer;
28 
29 // Utility function to clamp down image sizes for certain tests to avoid
30 // using too much memory.
reduceImageSizeRange(size_t maxDimSize)31 static size_t reduceImageSizeRange(size_t maxDimSize) {
32   size_t DimSize = maxDimSize/32;
33   if (DimSize < (size_t) 16)
34     return 16;
35   else if (DimSize > (size_t) 256)
36     return 256;
37   else
38     return DimSize;
39 }
40 
41 const char *read2DKernelSourcePattern =
42     "%s\n"
43     "__kernel void sample_kernel( read_only %s input,%s __global float "
44     "*xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
45     "{\n"
46     "%s"
47     "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
48     "%s"
49     "%s"
50     "   results[offset] = read_image%s( input, imageSampler, coords %s);\n"
51     "}";
52 
53 const char *read_write2DKernelSourcePattern =
54     "%s\n"
55     "__kernel void sample_kernel( read_write %s input,%s __global float "
56     "*xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
57     "{\n"
58     "%s"
59     "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
60     "%s"
61     "%s"
62     "   results[offset] = read_image%s( input, coords %s);\n"
63     "}";
64 
65 const char *intCoordKernelSource =
66 "   int2 coords = (int2)( xOffsets[offset], yOffsets[offset]);\n";
67 
68 const char *floatKernelSource =
69 "   float2 coords = (float2)( (float)( xOffsets[offset] ), (float)( yOffsets[offset] ) );\n";
70 
71 static const char *samplerKernelArg = " sampler_t imageSampler,";
72 
73 static const char *lodOffsetSource =
74 "   unsigned int lod_int = (unsigned int) lod;\n"
75 "   int width_lod = (get_image_width(input) >> lod_int) ?(get_image_width(input) >> lod_int):1 ;\n"
76 "   int offset = tidY*width_lod + tidX;\n";
77 
78 static const char *offsetSource =
79 "   int offset = tidY*get_image_width(input) + tidX;\n";
80 
determine_validation_error(void * imagePtr,image_descriptor * imageInfo,image_sampler_data * imageSampler,T * resultPtr,T * expected,float error,float x,float y,float xAddressOffset,float yAddressOffset,size_t j,int & numTries,int & numClamped,bool printAsFloat,int lod=0)81 template <class T> int determine_validation_error( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
82                                                 T *resultPtr, T * expected, float error,
83                                 float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod = 0 )
84 {
85     int actualX, actualY;
86     int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, NULL, lod );
87     bool clampingErr = false, clamped = false, otherClampingBug = false;
88     int clampedX, clampedY, ignoreMe;
89 
90     clamped = get_integer_coords_offset( x, y, 0.f, xAddressOffset, yAddressOffset, 0.0f, imageInfo->width, imageInfo->height, 0, imageSampler, imageInfo, clampedX, clampedY, ignoreMe );
91 
92     if( found )
93     {
94         // Is it a clamping bug?
95         if( clamped && clampedX == actualX && clampedY == actualY )
96         {
97             if( (--numClamped) == 0 )
98             {
99                 log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
100                 if (imageInfo->format->image_channel_order == CL_DEPTH)
101                 {
102                     if( printAsFloat )
103                     {
104                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot      (%g),\n\terror of %g\n",
105                                 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
106                     }
107                     else
108                     {
109                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot      (%x)\n",
110                                 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
111                     }
112                 }
113                 else
114                 {
115                     if( printAsFloat )
116                     {
117                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
118                                 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
119                                 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
120                     }
121                     else
122                     {
123                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
124                                 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
125                                 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
126                     }
127                 }
128                 return 1;
129             }
130             clampingErr = true;
131             otherClampingBug = true;
132         }
133     }
134     if( clamped && !otherClampingBug )
135     {
136         // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
137         if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
138         {
139             if( (--numClamped) == 0 )
140             {
141                 log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
142                 if (imageInfo->format->image_channel_order == CL_DEPTH)
143                 {
144                     if( printAsFloat )
145                     {
146                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot      (%g),\n\terror of %g\n",
147                                 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
148                     }
149                     else
150                     {
151                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot      (%x)\n",
152                                 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
153                     }
154                 }
155                 else
156                 {
157                     if( printAsFloat )
158                     {
159                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
160                                 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
161                                 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
162                     }
163                     else
164                     {
165                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
166                                 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
167                                 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
168                     }
169                 }
170                 return 1;
171             }
172             clampingErr = true;
173         }
174     }
175     if( !clampingErr )
176     {
177         if (imageInfo->format->image_channel_order == CL_DEPTH)
178         {
179             if( printAsFloat )
180             {
181               log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot      (%g),\n\terror of %g\n",
182                         (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
183             }
184             else
185             {
186               log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot      (%x)\n",
187                         (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
188             }
189         }
190         else
191         {
192             if( printAsFloat )
193             {
194                 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g), error of %g\n",
195                           (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
196                           (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
197             }
198             else
199             {
200                 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
201                           (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
202                                     (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
203             }
204         }
205         log_error( "img size %ld,%ld (pitch %ld)", imageInfo->width, imageInfo->height, imageInfo->rowPitch );
206         if( clamped )
207         {
208             log_error( " which would clamp to %d,%d\n", clampedX, clampedY );
209         }
210         if( printAsFloat && gExtraValidateInfo)
211         {
212             log_error( "Nearby values:\n" );
213             log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
214             for( int yOff = -2; yOff <= 1; yOff++ )
215             {
216                 float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
217                 read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
218                 read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
219                 read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
220                 read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
221                 if (imageInfo->format->image_channel_order == CL_DEPTH)
222                 {
223                     log_error( "%d\t(%g)",clampedY + yOff, top[0] );
224                     log_error( " (%g)", real[0] );
225                     log_error( " (%g)",bot[0] );
226                     log_error( " (%g)\n",bot2[0] );
227                 }
228                 else
229                 {
230                     log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
231                     log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
232                     log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
233                     log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
234                 }
235             }
236 
237             if( clampedY < 1 )
238             {
239                 log_error( "Nearby values:\n" );
240                 log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
241                 for( int yOff = (int)imageInfo->height - 2; yOff <= (int)imageInfo->height + 1; yOff++ )
242                 {
243                     float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
244                     read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
245                     read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
246                     read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
247                     read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
248                     if (imageInfo->format->image_channel_order == CL_DEPTH)
249                     {
250                         log_error( "%d\t(%g)",clampedY + yOff, top[0] );
251                         log_error( " (%g)", real[0] );
252                         log_error( " (%g)",bot[0] );
253                         log_error( " (%g)\n",bot2[0] );
254                     }
255                     else
256                     {
257                         log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
258                         log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
259                         log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
260                         log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
261                     }
262                 }
263             }
264         }
265 
266         if( imageSampler->filter_mode != CL_FILTER_LINEAR )
267         {
268             if( found )
269                 log_error( "\tValue really found in image at %d,%d (%s)\n", actualX, actualY, ( found > 1 ) ? "NOT unique!!" : "unique" );
270             else
271                 log_error( "\tValue not actually found in image\n" );
272         }
273         log_error( "\n" );
274 
275         numClamped = -1; // We force the clamped counter to never work
276         if( ( --numTries ) == 0 )
277         {
278             return 1;
279         }
280     }
281     return 0;
282 }
283 
InitFloatCoords(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float xfract,float yfract,int normalized_coords,MTdata d,size_t lod)284 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d, size_t lod)
285 {
286     size_t i = 0;
287     size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
288 
289     if( gTestMipmaps )
290     {
291         width_lod = (imageInfo->width >> lod)?(imageInfo->width >> lod):1;
292         height_lod = (imageInfo->height >> lod)?(imageInfo->height >> lod):1;
293     }
294     if( gDisableOffsets )
295     {
296         for( size_t y = 0; y < height_lod; y++ )
297         {
298             for( size_t x = 0; x < width_lod; x++, i++ )
299             {
300                 xOffsets[ i ] = (float) (xfract + (float) x);
301                 yOffsets[ i ] = (float) (yfract + (float) y);
302             }
303         }
304     }
305     else
306     {
307         for( size_t y = 0; y < height_lod; y++ )
308         {
309             for( size_t x = 0; x < width_lod; x++, i++ )
310             {
311                 xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
312                 yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
313             }
314         }
315     }
316 
317     if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
318     {
319         i = 0;
320         for( size_t y = 0; y < height_lod; y++ )
321         {
322             for( size_t x = 0; x < width_lod; x++, i++ )
323             {
324                 xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) width_lod - 1.0);
325                 yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)height_lod - 1.0);
326             }
327         }
328     }
329 
330     if( normalized_coords )
331     {
332         i = 0;
333         for( size_t y = 0; y < height_lod; y++ )
334         {
335             for( size_t x = 0; x < width_lod; x++, i++ )
336             {
337                 xOffsets[ i ] = (float) ((float) xOffsets[ i ] / (float) width_lod);
338                 yOffsets[ i ] = (float) ((float) yOffsets[ i ] / (float) height_lod);
339             }
340         }
341     }
342 }
343 
validate_image_2D_depth_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)344 int validate_image_2D_depth_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
345                                                         ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
346 {
347     // Validate results element by element
348     size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
349     size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
350     /*
351      * FLOAT output type
352      */
353     if( outputType == kFloat )
354     {
355         // Validate float results
356         float *resultPtr = (float *)(char *)resultValues;
357         float expected[4], error=0.0f;
358         float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
359         for( size_t y = 0, j = 0; y < height_lod; y++ )
360         {
361             for( size_t x = 0; x < width_lod; x++, j++ )
362             {
363                 // Step 1: go through and see if the results verify for the pixel
364                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
365                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
366                 int checkOnlyOnePixel = 0;
367                 int found_pixel = 0;
368                 float offset = NORM_OFFSET;
369                 if (!imageSampler->normalized_coords
370                     || imageSampler->filter_mode != CL_FILTER_NEAREST
371                     || NORM_OFFSET == 0
372 #if defined( __APPLE__ )
373                     // Apple requires its CPU implementation to do correctly
374                     // rounded address arithmetic in all modes
375                     || !(gDeviceType & CL_DEVICE_TYPE_GPU)
376 #endif
377                 )
378                     offset = 0.0f;          // Loop only once
379 
380                 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
381                     for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
382 
383                         // Try sampling the pixel, without flushing denormals.
384                         int containsDenormals = 0;
385                         FloatPixel maxPixel;
386                         maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
387                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
388                                                                     imageSampler, expected, 0, &containsDenormals );
389 
390                         float err1 = ABS_ERROR(resultPtr[0], expected[0]);
391                         // Clamp to the minimum absolute error for the format
392                         if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
393                         float maxErr1 =
394                             std::max(maxErr * maxPixel.p[0], FLT_MIN);
395 
396                         // Check if the result matches.
397                         if( ! (err1 <= maxErr1) )
398                         {
399                             //try flushing the denormals, if there is a failure.
400                             if( containsDenormals )
401                             {
402                                 // If implementation decide to flush subnormals to zero,
403                                 // max error needs to be adjusted
404                                 maxErr1 += 4 * FLT_MIN;
405 
406                                 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
407                                                                              xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
408                                                                              imageSampler, expected, 0, NULL );
409 
410                                 err1 = ABS_ERROR(resultPtr[0], expected[0]);
411                             }
412                         }
413 
414                         // If the final result DOES match, then we've found a valid result and we're done with this pixel.
415                         found_pixel = (err1 <= maxErr1);
416                     }//norm_offset_x
417                 }//norm_offset_y
418 
419 
420                 // Step 2: If we did not find a match, then print out debugging info.
421                 if (!found_pixel) {
422                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
423                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
424                     checkOnlyOnePixel = 0;
425                     int shouldReturn = 0;
426                     for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
427                         for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
428 
429                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
430                             // E.g., test one pixel.
431                             if (!imageSampler->normalized_coords
432                                 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
433                                 || NORM_OFFSET == 0)
434                             {
435                                 norm_offset_x = 0.0f;
436                                 norm_offset_y = 0.0f;
437                                 checkOnlyOnePixel = 1;
438                             }
439 
440                             int containsDenormals = 0;
441                             FloatPixel maxPixel;
442                             maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
443                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
444                                                                                     imageSampler, expected, 0, &containsDenormals );
445 
446                             float err1 = ABS_ERROR(resultPtr[0], expected[0]);
447                             float maxErr1 =
448                                 std::max(maxErr * maxPixel.p[0], FLT_MIN);
449 
450 
451                             if( ! (err1 <= maxErr1) )
452                             {
453                                 //try flushing the denormals, if there is a failure.
454                                 if( containsDenormals )
455                                 {
456                                     maxErr1 += 4 * FLT_MIN;
457 
458                                     maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
459                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
460                                                                                  imageSampler, expected, 0, NULL );
461 
462                                     err1 = ABS_ERROR(resultPtr[0], expected[0]);
463                                 }
464                             }
465                             if( ! (err1 <= maxErr1) )
466                             {
467                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
468 
469                                 float tempOut[4];
470                                 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
471                                                                                   expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
472 
473                                 log_error( "Step by step:\n" );
474                                 FloatPixel temp;
475                                 temp = sample_image_pixel_float_offset( imageValues, imageInfo,
476                                                                                xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
477                                                                                imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
478                                 log_error( "\tulps: %2.2f  (max allowed: %2.2f)\n\n",
479                                                     Ulp_Error( resultPtr[0], expected[0] ),
480                                                     Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
481 
482                             } else {
483                                 log_error("Test error: we should have detected this passing above.\n");
484                             }
485 
486                         }//norm_offset_x
487                     }//norm_offset_y
488                     if( shouldReturn )
489                         return 1;
490                 } // if (!found_pixel)
491 
492                 resultPtr += 1;
493             }
494         }
495     }
496     else
497     {
498         log_error("Test error: Not supported format.\n");
499         return 1;
500     }
501     return 0;
502 }
503 
validate_image_2D_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)504 int validate_image_2D_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
505                                                         ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
506 {
507     // Validate results element by element
508     size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
509     size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
510     /*
511      * FLOAT output type
512      */
513     if( outputType == kFloat )
514     {
515         // Validate float results
516         float *resultPtr = (float *)(char *)resultValues;
517         float expected[4], error=0.0f;
518         float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
519         for( size_t y = 0, j = 0; y < height_lod; y++ )
520         {
521             for( size_t x = 0; x < width_lod; x++, j++ )
522             {
523                 // Step 1: go through and see if the results verify for the pixel
524                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
525                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
526                 int checkOnlyOnePixel = 0;
527                 int found_pixel = 0;
528                 float offset = NORM_OFFSET;
529                 if (!imageSampler->normalized_coords
530                     || imageSampler->filter_mode != CL_FILTER_NEAREST
531                     || NORM_OFFSET == 0
532 #if defined( __APPLE__ )
533                     // Apple requires its CPU implementation to do correctly
534                     // rounded address arithmetic in all modes
535                     || !(gDeviceType & CL_DEVICE_TYPE_GPU)
536 #endif
537                 )
538                     offset = 0.0f;          // Loop only once
539 
540                 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
541                     for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
542 
543 
544                         // Try sampling the pixel, without flushing denormals.
545                         int containsDenormals = 0;
546                         FloatPixel maxPixel;
547                         if ( gTestMipmaps )
548                             maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
549                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
550                                                                         imageSampler, expected, 0, &containsDenormals, lod );
551                         else
552                             maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
553                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
554                                                                         imageSampler, expected, 0, &containsDenormals );
555 
556                         float err1 = ABS_ERROR(resultPtr[0], expected[0]);
557                         float err2 = ABS_ERROR(resultPtr[1], expected[1]);
558                         float err3 = ABS_ERROR(resultPtr[2], expected[2]);
559                         float err4 = ABS_ERROR(resultPtr[3], expected[3]);
560                         // Clamp to the minimum absolute error for the format
561                         if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
562                         if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
563                         if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
564                         if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
565                         float maxErr1 =
566                             std::max(maxErr * maxPixel.p[0], FLT_MIN);
567                         float maxErr2 =
568                             std::max(maxErr * maxPixel.p[1], FLT_MIN);
569                         float maxErr3 =
570                             std::max(maxErr * maxPixel.p[2], FLT_MIN);
571                         float maxErr4 =
572                             std::max(maxErr * maxPixel.p[3], FLT_MIN);
573 
574                         // Check if the result matches.
575                         if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
576                            ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
577                         {
578                             //try flushing the denormals, if there is a failure.
579                             if( containsDenormals )
580                             {
581                                // If implementation decide to flush subnormals to zero,
582                                // max error needs to be adjusted
583                                 maxErr1 += 4 * FLT_MIN;
584                                 maxErr2 += 4 * FLT_MIN;
585                                 maxErr3 += 4 * FLT_MIN;
586                                 maxErr4 += 4 * FLT_MIN;
587 
588                                 if(gTestMipmaps)
589                                     maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
590                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
591                                                                                  imageSampler, expected, 0, NULL,lod );
592                                 else
593                                     maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
594                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
595                                                                                  imageSampler, expected, 0, NULL );
596 
597                                 err1 = ABS_ERROR(resultPtr[0], expected[0]);
598                                 err2 = ABS_ERROR(resultPtr[1], expected[1]);
599                                 err3 = ABS_ERROR(resultPtr[2], expected[2]);
600                                 err4 = ABS_ERROR(resultPtr[3], expected[3]);
601                             }
602                         }
603 
604                         // If the final result DOES match, then we've found a valid result and we're done with this pixel.
605                         found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2)  && (err3 <= maxErr3) && (err4 <= maxErr4);
606                     }//norm_offset_x
607                 }//norm_offset_y
608 
609 
610                 // Step 2: If we did not find a match, then print out debugging info.
611                 if (!found_pixel) {
612                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
613                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
614                     checkOnlyOnePixel = 0;
615                     int shouldReturn = 0;
616                     for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
617                         for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
618 
619                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
620                             // E.g., test one pixel.
621                             if (!imageSampler->normalized_coords
622                                 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
623                                 || NORM_OFFSET == 0)
624                             {
625                                 norm_offset_x = 0.0f;
626                                 norm_offset_y = 0.0f;
627                                 checkOnlyOnePixel = 1;
628                             }
629 
630                             int containsDenormals = 0;
631                             FloatPixel maxPixel;
632                             if(gTestMipmaps)
633                                 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
634                                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
635                                                                                         imageSampler, expected, 0, &containsDenormals, lod );
636                             else
637                                 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
638                                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
639                                                                                         imageSampler, expected, 0, &containsDenormals );
640 
641                             float err1 = ABS_ERROR(resultPtr[0], expected[0]);
642                             float err2 = ABS_ERROR(resultPtr[1], expected[1]);
643                             float err3 = ABS_ERROR(resultPtr[2], expected[2]);
644                             float err4 = ABS_ERROR(resultPtr[3], expected[3]);
645                             float maxErr1 =
646                                 std::max(maxErr * maxPixel.p[0], FLT_MIN);
647                             float maxErr2 =
648                                 std::max(maxErr * maxPixel.p[1], FLT_MIN);
649                             float maxErr3 =
650                                 std::max(maxErr * maxPixel.p[2], FLT_MIN);
651                             float maxErr4 =
652                                 std::max(maxErr * maxPixel.p[3], FLT_MIN);
653 
654 
655                             if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
656                                ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
657                             {
658                                 //try flushing the denormals, if there is a failure.
659                                 if( containsDenormals )
660                                 {
661                                     maxErr1 += 4 * FLT_MIN;
662                                     maxErr2 += 4 * FLT_MIN;
663                                     maxErr3 += 4 * FLT_MIN;
664                                     maxErr4 += 4 * FLT_MIN;
665 
666                                     if(gTestMipmaps)
667                                         maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
668                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
669                                                                                      imageSampler, expected, 0, NULL, lod );
670                                     else
671                                         maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
672                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
673                                                                                      imageSampler, expected, 0, NULL );
674 
675                                     err1 = ABS_ERROR(resultPtr[0], expected[0]);
676                                     err2 = ABS_ERROR(resultPtr[1], expected[1]);
677                                     err3 = ABS_ERROR(resultPtr[2], expected[2]);
678                                     err4 = ABS_ERROR(resultPtr[3], expected[3]);
679                                 }
680                             }
681                             if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
682                                ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
683                             {
684                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
685 
686                                 float tempOut[4];
687                                 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
688                                                                                   expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
689 
690                                 log_error( "Step by step:\n" );
691                                 FloatPixel temp;
692                                 if( gTestMipmaps )
693                                      temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
694                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
695                                                                                     imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
696                                  else
697                                      temp = sample_image_pixel_float_offset( imageValues, imageInfo,
698                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
699                                                                                     imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
700                                 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
701                                                     Ulp_Error( resultPtr[0], expected[0] ),
702                                                     Ulp_Error( resultPtr[1], expected[1] ),
703                                                     Ulp_Error( resultPtr[2], expected[2] ),
704                                                     Ulp_Error( resultPtr[3], expected[3] ),
705                                                     Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
706 
707                             } else {
708                                 log_error("Test error: we should have detected this passing above.\n");
709                             }
710 
711                         }//norm_offset_x
712                     }//norm_offset_y
713                     if( shouldReturn )
714                         return 1;
715                 } // if (!found_pixel)
716 
717                 resultPtr += 4;
718             }
719         }
720     }
721     /*
722      * UINT output type
723      */
724     else if( outputType == kUInt )
725     {
726         // Validate unsigned integer results
727         unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
728         unsigned int expected[4];
729         float error;
730         for( size_t y = 0, j = 0; y < height_lod ; y++ )
731         {
732             for( size_t x = 0; x < width_lod ; x++, j++ )
733             {
734                 // Step 1: go through and see if the results verify for the pixel
735                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
736                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
737                 int checkOnlyOnePixel = 0;
738                 int found_pixel = 0;
739                 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
740                     for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
741 
742                         // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
743                         // E.g., test one pixel.
744                         if (!imageSampler->normalized_coords
745                             || !(gDeviceType & CL_DEVICE_TYPE_GPU)
746                             || NORM_OFFSET == 0)
747                         {
748                             norm_offset_x = 0.0f;
749                             norm_offset_y = 0.0f;
750                             checkOnlyOnePixel = 1;
751                         }
752 
753                         if ( gTestMipmaps )
754                             sample_image_pixel_offset<unsigned int>( (char*)imagePtr, imageInfo,
755                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
756                                                                                              imageSampler, expected, lod );
757                         else
758                             sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
759                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
760                                                                                              imageSampler, expected);
761 
762 
763                         error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
764                                        errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
765 
766                         if (error <= MAX_ERR)
767                             found_pixel = 1;
768                     }//norm_offset_x
769                 }//norm_offset_y
770 
771                 // Step 2: If we did not find a match, then print out debugging info.
772                 if (!found_pixel) {
773                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
774                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
775                     checkOnlyOnePixel = 0;
776                     int shouldReturn = 0;
777                     for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
778                         for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
779 
780                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
781                             // E.g., test one pixel.
782                             if (!imageSampler->normalized_coords
783                                 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
784                                 || NORM_OFFSET == 0)
785                             {
786                                 norm_offset_x = 0.0f;
787                                 norm_offset_y = 0.0f;
788                                 checkOnlyOnePixel = 1;
789                             }
790 
791                             if( gTestMipmaps )
792                                 sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
793                                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
794                                                                                                  imageSampler, expected, lod );
795                             else
796                                 sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
797                                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
798                                                                                                  imageSampler, expected);
799 
800 
801                             error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
802                                            errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
803 
804                             if( error > MAX_ERR )
805                             {
806                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
807 
808                                 shouldReturn |= determine_validation_error<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
809                                                                                          expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
810                             } else {
811                                 log_error("Test error: we should have detected this passing above.\n");
812                             }
813                         }//norm_offset_x
814                     }//norm_offset_y
815                     if( shouldReturn )
816                         return 1;
817                 } // if (!found_pixel)
818 
819                 resultPtr += 4;
820             }
821         }
822     }
823     /*
824      * INT output type
825      */
826     else
827     {
828         // Validate integer results
829         int *resultPtr = (int *)(char *)resultValues;
830         int expected[4];
831         float error;
832         for( size_t y = 0, j = 0; y < height_lod ; y++ )
833         {
834             for( size_t x = 0; x < width_lod; x++, j++ )
835             {
836                 // Step 1: go through and see if the results verify for the pixel
837                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
838                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
839                 int checkOnlyOnePixel = 0;
840                 int found_pixel = 0;
841                 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
842                     for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
843 
844                         // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
845                         // E.g., test one pixel.
846                         if (!imageSampler->normalized_coords
847                             || !(gDeviceType & CL_DEVICE_TYPE_GPU)
848                             || NORM_OFFSET == 0)
849                         {
850                             norm_offset_x = 0.0f;
851                             norm_offset_y = 0.0f;
852                             checkOnlyOnePixel = 1;
853                         }
854 
855                         if ( gTestMipmaps )
856                             sample_image_pixel_offset<int>( imagePtr, imageInfo,
857                                                             xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
858                                                             imageSampler, expected , lod);
859                         else
860                             sample_image_pixel_offset<int>( imageValues, imageInfo,
861                                                             xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
862                                                             imageSampler, expected );
863 
864 
865                         error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
866                                        errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
867 
868                         if (error <= MAX_ERR)
869                             found_pixel = 1;
870                     }//norm_offset_x
871                 }//norm_offset_y
872 
873                 // Step 2: If we did not find a match, then print out debugging info.
874                 if (!found_pixel) {
875                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
876                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
877                     checkOnlyOnePixel = 0;
878                     int shouldReturn = 0;
879                     for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
880                         for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
881 
882                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
883                             // E.g., test one pixel.
884                             if (!imageSampler->normalized_coords
885                                 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
886                                 || NORM_OFFSET == 0)
887                             {
888                                 norm_offset_x = 0.0f;
889                                 norm_offset_y = 0.0f;
890                                 checkOnlyOnePixel = 1;
891                             }
892 
893                             if ( gTestMipmaps )
894                                 sample_image_pixel_offset<int>( imageValues, imageInfo,
895                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
896                                                                 imageSampler, expected, lod );
897                             else
898                                 sample_image_pixel_offset<int>( imageValues, imageInfo,
899                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
900                                                                 imageSampler, expected );
901 
902 
903                             error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
904                                            errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
905 
906                             if( error > MAX_ERR )
907                             {
908                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
909 
910                                 shouldReturn |= determine_validation_error<int>( imagePtr, imageInfo, imageSampler, resultPtr,
911                                                                                 expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
912                             } else {
913                                 log_error("Test error: we should have detected this passing above.\n");
914                             }
915                         }//norm_offset_x
916                     }//norm_offset_y
917                     if( shouldReturn )
918                         return 1;
919                 } // if (!found_pixel)
920 
921                 resultPtr += 4;
922             }
923         }
924     }
925     return 0;
926 }
927 
validate_image_2D_sRGB_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)928 int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
929                                                         ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
930 {
931     // Validate results element by element
932     size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
933     size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
934     /*
935      * FLOAT output type
936      */
937     if( outputType == kFloat )
938     {
939         // Validate float results
940         float *resultPtr = (float *)(char *)resultValues;
941         float expected[4], error=0.0f;
942         float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
943         for( size_t y = 0, j = 0; y < height_lod; y++ )
944         {
945             for( size_t x = 0; x < width_lod; x++, j++ )
946             {
947                 // Step 1: go through and see if the results verify for the pixel
948                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
949                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
950                 int checkOnlyOnePixel = 0;
951                 int found_pixel = 0;
952                 float offset = NORM_OFFSET;
953                 if (!imageSampler->normalized_coords
954                     || imageSampler->filter_mode != CL_FILTER_NEAREST
955                     || NORM_OFFSET == 0
956 #if defined( __APPLE__ )
957                     // Apple requires its CPU implementation to do correctly
958                     // rounded address arithmetic in all modes
959                     || !(gDeviceType & CL_DEVICE_TYPE_GPU)
960 #endif
961                 )
962                     offset = 0.0f;          // Loop only once
963 
964                 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
965                     for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
966 
967 
968                         // Try sampling the pixel, without flushing denormals.
969                         int containsDenormals = 0;
970                         FloatPixel maxPixel;
971                         if ( gTestMipmaps )
972                             maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
973                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
974                                                                         imageSampler, expected, 0, &containsDenormals, lod );
975                         else
976                             maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
977                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
978                                                                         imageSampler, expected, 0, &containsDenormals );
979                         float err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
980                                                sRGBmap(expected[0]));
981                         float err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
982                                                sRGBmap(expected[1]));
983                         float err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
984                                                sRGBmap(expected[2]));
985                         float err4 = ABS_ERROR(resultPtr[3], expected[3]);
986                         float maxErr = 0.5;
987 
988                         // Check if the result matches.
989                         if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
990                            ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
991                         {
992                             //try flushing the denormals, if there is a failure.
993                             if( containsDenormals )
994                             {
995                                 // If implementation decide to flush subnormals to zero,
996                                 // max error needs to be adjusted
997                                 maxErr += 4 * FLT_MIN;
998 
999                                 if(gTestMipmaps)
1000                                     maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1001                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1002                                                                                  imageSampler, expected, 0, NULL,lod );
1003                                 else
1004                                     maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1005                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1006                                                                                  imageSampler, expected, 0, NULL );
1007 
1008                                 err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1009                                                  sRGBmap(expected[0]));
1010                                 err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1011                                                  sRGBmap(expected[1]));
1012                                 err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1013                                                  sRGBmap(expected[2]));
1014                                 err4 = ABS_ERROR(resultPtr[3], expected[3]);
1015                             }
1016                         }
1017 
1018                         // If the final result DOES match, then we've found a valid result and we're done with this pixel.
1019                         found_pixel = (err1 <= maxErr) && (err2 <= maxErr)  && (err3 <= maxErr) && (err4 <= maxErr);
1020                     }//norm_offset_x
1021                 }//norm_offset_y
1022 
1023 
1024                 // Step 2: If we did not find a match, then print out debugging info.
1025                 if (!found_pixel) {
1026                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
1027                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
1028                     checkOnlyOnePixel = 0;
1029                     int shouldReturn = 0;
1030                     for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
1031                         for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
1032 
1033                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
1034                             // E.g., test one pixel.
1035                             if (!imageSampler->normalized_coords
1036                                 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
1037                                 || NORM_OFFSET == 0)
1038                             {
1039                                 norm_offset_x = 0.0f;
1040                                 norm_offset_y = 0.0f;
1041                                 checkOnlyOnePixel = 1;
1042                             }
1043 
1044                             int containsDenormals = 0;
1045                             FloatPixel maxPixel;
1046                             if(gTestMipmaps)
1047                                 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1048                                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1049                                                                                         imageSampler, expected, 0, &containsDenormals, lod );
1050                             else
1051                                 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1052                                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1053                                                                                         imageSampler, expected, 0, &containsDenormals );
1054 
1055                             float err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1056                                                    sRGBmap(expected[0]));
1057                             float err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1058                                                    sRGBmap(expected[1]));
1059                             float err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1060                                                    sRGBmap(expected[2]));
1061                             float err4 = ABS_ERROR(resultPtr[3], expected[3]);
1062                             float maxErr = 0.6;
1063 
1064                             if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
1065                                ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
1066                             {
1067                                 //try flushing the denormals, if there is a failure.
1068                                 if( containsDenormals )
1069                                 {
1070                                     // If implementation decide to flush subnormals to zero,
1071                                     // max error needs to be adjusted
1072                                     maxErr += 4 * FLT_MIN;
1073                                     if(gTestMipmaps)
1074                                         maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1075                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1076                                                                                      imageSampler, expected, 0, NULL, lod );
1077                                     else
1078                                         maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1079                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1080                                                                                      imageSampler, expected, 0, NULL );
1081 
1082                                     err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1083                                                      sRGBmap(expected[0]));
1084                                     err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1085                                                      sRGBmap(expected[1]));
1086                                     err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1087                                                      sRGBmap(expected[2]));
1088                                     err4 = ABS_ERROR(resultPtr[3], expected[3]);
1089                                 }
1090                             }
1091                             if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
1092                                ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
1093                             {
1094                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
1095 
1096                                 float tempOut[4];
1097                                 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
1098                                                                                   expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
1099 
1100                                 log_error( "Step by step:\n" );
1101                                 FloatPixel temp;
1102                                 if( gTestMipmaps )
1103                                      temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
1104                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1105                                                                                     imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
1106                                  else
1107                                      temp = sample_image_pixel_float_offset( imageValues, imageInfo,
1108                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1109                                                                                     imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
1110                                 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
1111                                                     Ulp_Error( resultPtr[0], expected[0] ),
1112                                                     Ulp_Error( resultPtr[1], expected[1] ),
1113                                                     Ulp_Error( resultPtr[2], expected[2] ),
1114                                                     Ulp_Error( resultPtr[3], expected[3] ),
1115                                                     Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
1116 
1117                             } else {
1118                                 log_error("Test error: we should have detected this passing above.\n");
1119                             }
1120 
1121                         }//norm_offset_x
1122                     }//norm_offset_y
1123                     if( shouldReturn )
1124                         return 1;
1125                 } // if (!found_pixel)
1126 
1127                 resultPtr += 4;
1128             }
1129         }
1130     }
1131     else {
1132         log_error("Test error: NOT SUPPORTED.\n");
1133     }
1134     return 0;
1135 }
1136 
validate_float_write_results(float * expected,float * actual,image_descriptor * imageInfo)1137 bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo )
1138 {
1139     bool pass = true;
1140     // Compare floats
1141     if( memcmp( expected, actual, sizeof( cl_float ) * get_format_channel_count( imageInfo->format ) ) != 0 )
1142     {
1143         // 8.3.3 Fix up cases where we have NaNs or flushed denorms; "all other values must be preserved"
1144         for ( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
1145         {
1146             if ( isnan( expected[j] ) && isnan( actual[j] ) )
1147                 continue;
1148             if ( IsFloatSubnormal( expected[j] ) && actual[j] == 0.0f )
1149                 continue;
1150             if (expected[j] != actual[j])
1151             {
1152                 pass = false;
1153                 break;
1154             }
1155         }
1156     }
1157     return pass;
1158 }
1159 
validate_half_write_results(cl_half * expected,cl_half * actual,image_descriptor * imageInfo)1160 bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo )
1161 {
1162     bool pass = true;
1163     // Compare half floats
1164     if (memcmp(expected, actual, sizeof( cl_half ) * get_format_channel_count(imageInfo->format)) != 0) {
1165 
1166         // 8.3.2 Fix up cases where we have NaNs or generated half denormals
1167         for ( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ ) {
1168             if ( is_half_nan( expected[j] ) && is_half_nan( actual[j] ) )
1169                 continue;
1170             if ( is_half_denorm( expected[j] ) && is_half_zero( actual[j] ) )
1171                 continue;
1172             if (expected[j] != actual[j])
1173             {
1174                 pass = false;
1175                 break;
1176             }
1177         }
1178     }
1179     return pass;
1180 }
1181 
test_read_image_2D(cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,image_sampler_data * imageSampler,bool useFloatCoords,ExplicitType outputType,MTdata d)1182 int test_read_image_2D( cl_context context, cl_command_queue queue, cl_kernel kernel,
1183                         image_descriptor *imageInfo, image_sampler_data *imageSampler,
1184                        bool useFloatCoords, ExplicitType outputType, MTdata d )
1185 {
1186     int error;
1187     static int initHalf = 0;
1188     cl_mem imageBuffer;
1189     cl_mem_flags    image_read_write_flags = CL_MEM_READ_ONLY;
1190     size_t threads[2];
1191 
1192     clMemWrapper xOffsets, yOffsets, results;
1193     clSamplerWrapper actualSampler;
1194     BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
1195 
1196     // The DataBuffer template class really does use delete[], not free -- IRO
1197     BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
1198     BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
1199 
1200     if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
1201         if( DetectFloatToHalfRoundingMode(queue) )
1202             return 1;
1203 
1204     // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
1205     BufferOwningPtr<char> imageValues;
1206     generate_random_image_data( imageInfo, imageValues, d );
1207 
1208     if( gDebugTrace )
1209     {
1210         log_info( " - Creating image %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height );
1211         if( gTestMipmaps )
1212         {
1213             log_info( " - with %d mip levels", (int) imageInfo->num_mip_levels );
1214         }
1215     }
1216 
1217     // Construct testing sources
1218     clProtectedImage protImage;
1219     clMemWrapper unprotImage;
1220     cl_mem image;
1221 
1222     if(gtestTypesToRun & kReadTests)
1223     {
1224         image_read_write_flags = CL_MEM_READ_ONLY;
1225     }
1226     else
1227     {
1228         image_read_write_flags = CL_MEM_READ_WRITE;
1229     }
1230 
1231     if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
1232     {
1233         if (gTestImage2DFromBuffer)
1234         {
1235             generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
1236             imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
1237                                           imageInfo->rowPitch * imageInfo->height, maxImageUseHostPtrBackingStore, &error);
1238             test_error( error, "Unable to create buffer" );
1239             unprotImage = create_image_2d_buffer( context,
1240                                           image_read_write_flags,
1241                                           imageInfo->format,
1242                                           imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1243                                           imageBuffer, &error );
1244 
1245         }
1246         else
1247         {
1248             // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
1249             // Do not use protected images for max image size test since it rounds the row size to a page size
1250             if (gTestMaxImages) {
1251                 generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
1252                 unprotImage = create_image_2d( context,
1253                                         image_read_write_flags | CL_MEM_USE_HOST_PTR,
1254                                         imageInfo->format,
1255                                         imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1256                                         maxImageUseHostPtrBackingStore, &error );
1257             }
1258             else
1259             {
1260                 error = protImage.Create( context,
1261                                         image_read_write_flags,
1262                                         imageInfo->format, imageInfo->width, imageInfo->height );
1263             }
1264         }
1265 
1266         if( error != CL_SUCCESS )
1267         {
1268             if (gTestImage2DFromBuffer) {
1269                 clReleaseMemObject(imageBuffer);
1270                 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1271                     log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1272                     return 0;
1273                 }
1274             }
1275 
1276             log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1277             return error;
1278         }
1279 
1280         if (gTestMaxImages || gTestImage2DFromBuffer)
1281             image = (cl_mem)unprotImage;
1282         else
1283             image = (cl_mem)protImage;
1284     }
1285     else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
1286     {
1287         if (gTestImage2DFromBuffer)
1288         {
1289             imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1290                                          imageInfo->rowPitch * imageInfo->height, imageValues, &error);
1291             test_error( error, "Unable to create buffer" );
1292             unprotImage = create_image_2d_buffer( context,
1293                                                  image_read_write_flags,
1294                                                  imageInfo->format,
1295                                                  imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1296                                                  imageBuffer, &error );
1297 
1298         }
1299         else
1300         {
1301             // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
1302             unprotImage = create_image_2d( context,
1303                                       image_read_write_flags | CL_MEM_COPY_HOST_PTR,
1304                                       imageInfo->format,
1305                                       imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1306                                       imageValues, &error );
1307         }
1308         if( error != CL_SUCCESS )
1309         {
1310             if (gTestImage2DFromBuffer) {
1311                 clReleaseMemObject(imageBuffer);
1312                 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1313                     log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1314                     return 0;
1315                 }
1316             }
1317 
1318             log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1319             return error;
1320         }
1321         image = unprotImage;
1322     }
1323     else // Either CL_MEM_ALLOC_HOST_PTR or none
1324     {
1325         if( gTestMipmaps )
1326         {
1327             cl_image_desc image_desc = {0};
1328             image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
1329             image_desc.image_width = imageInfo->width;
1330             image_desc.image_height = imageInfo->height;
1331             image_desc.num_mip_levels = imageInfo->num_mip_levels;
1332             unprotImage = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
1333         }
1334         else if (gTestImage2DFromBuffer)
1335         {
1336             imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | gMemFlagsToUse,
1337                                          imageInfo->rowPitch * imageInfo->height, imageValues, &error);
1338             test_error( error, "Unable to create buffer" );
1339             unprotImage = create_image_2d_buffer( context,
1340                                                  image_read_write_flags,
1341                                                  imageInfo->format,
1342                                                  imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1343                                                  imageBuffer, &error );
1344 
1345         }
1346         else
1347         {
1348             // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
1349             // it works just as if no flag is specified, so we just do the same thing either way
1350             unprotImage = create_image_2d( context,
1351                                       image_read_write_flags | gMemFlagsToUse,
1352                                       imageInfo->format,
1353                                       imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1354                                       imageValues, &error );
1355         }
1356         if( error != CL_SUCCESS )
1357         {
1358             if (gTestImage2DFromBuffer) {
1359                 clReleaseMemObject(imageBuffer);
1360                 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1361                     log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1362                     return 0;
1363                 }
1364             }
1365 
1366             log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1367             return error;
1368         }
1369         image = unprotImage;
1370     }
1371 
1372     if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
1373     {
1374         if( gDebugTrace )
1375             log_info( " - Writing image...\n" );
1376 
1377         size_t origin[ 3 ] = { 0, 0, 0 };
1378         size_t region[ 3 ] = { imageInfo->width, imageInfo->height, 1 };
1379 
1380         if(!gTestMipmaps)
1381         {
1382             error = clEnqueueWriteImage(queue, image, CL_TRUE,
1383                                         origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), 0,
1384                                        imageValues, 0, NULL, NULL);
1385             if (error != CL_SUCCESS)
1386             {
1387                 log_error( "ERROR: Unable to write to 2D image of size %d x %d\n", (int)imageInfo->width, (int)imageInfo->height );
1388                 return error;
1389             }
1390         }
1391         else
1392         {
1393             size_t tmpNextLevelOffset = 0;
1394             for(size_t level = 0; level < imageInfo->num_mip_levels; level++)
1395             {
1396                 origin[2] = level;
1397                 error = clEnqueueWriteImage(queue, image, CL_TRUE,
1398                                             origin, region, (( gEnablePitch || gTestImage2DFromBuffer) ? imageInfo->rowPitch : 0 ), 0,
1399                                             (char*)imageValues + tmpNextLevelOffset, 0, NULL, NULL);
1400                 tmpNextLevelOffset += region[0]*region[1]*get_pixel_size(imageInfo->format);
1401                 region[0] = (region[0] >> 1) ? (region[0] >> 1) : 1;
1402                 region[1] = (region[1] >> 1) ? (region[1] >> 1) : 1;
1403             }
1404         }
1405     }
1406 
1407     if( gDebugTrace )
1408         log_info( " - Creating kernel arguments...\n" );
1409 
1410     xOffsets =
1411         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
1412                        sizeof(cl_float) * imageInfo->width * imageInfo->height,
1413                        xOffsetValues, &error);
1414     test_error( error, "Unable to create x offset buffer" );
1415     yOffsets =
1416         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
1417                        sizeof(cl_float) * imageInfo->width * imageInfo->height,
1418                        yOffsetValues, &error);
1419     test_error( error, "Unable to create y offset buffer" );
1420     results = clCreateBuffer(context, CL_MEM_READ_WRITE,
1421                              get_explicit_type_size(outputType) * 4
1422                                  * imageInfo->width * imageInfo->height,
1423                              NULL, &error);
1424     test_error( error, "Unable to create result buffer" );
1425 
1426     // Create sampler to use
1427     actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
1428     test_error(error, "Unable to create image sampler");
1429 
1430     // Set arguments
1431     int idx = 0;
1432     error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
1433     test_error( error, "Unable to set kernel arguments" );
1434     if( !gUseKernelSamplers )
1435     {
1436         error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
1437         test_error( error, "Unable to set kernel arguments" );
1438     }
1439     error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
1440     test_error( error, "Unable to set kernel arguments" );
1441     error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
1442     test_error( error, "Unable to set kernel arguments" );
1443     error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
1444     test_error( error, "Unable to set kernel arguments" );
1445 
1446     // A cast of troublesome offsets. The first one has to be zero.
1447     const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
1448     int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
1449     int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
1450     int loopCount = 2 * float_offset_count;
1451     if( ! useFloatCoords )
1452         loopCount = 1;
1453     if (gTestMaxImages) {
1454         loopCount = 1;
1455       log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
1456     }
1457 
1458     if(gtestTypesToRun & kReadWriteTests)
1459     {
1460         loopCount = 1;
1461     }
1462 
1463     // Get the maximum absolute error for this format
1464     double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler);
1465     if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
1466 
1467     if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
1468         initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
1469         if (initHalf) {
1470             log_info("Half rounding mode successfully detected.\n");
1471         }
1472     }
1473 
1474     size_t nextLevelOffset = 0;
1475     size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
1476     for( size_t lod = 0; (gTestMipmaps && (lod < imageInfo->num_mip_levels))|| (!gTestMipmaps && lod < 1); lod ++)
1477     {
1478         size_t resultValuesSize = width_lod * height_lod * get_explicit_type_size( outputType ) * 4;
1479         BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
1480         float lod_float = (float)lod;
1481         char *imagePtr = (char *)imageValues + nextLevelOffset;
1482         if( gTestMipmaps )
1483         {
1484             if(gDebugTrace)
1485                 log_info("\t- Working at mip level %d\n", lod);
1486             error = clSetKernelArg( kernel, idx, sizeof(float), &lod_float);
1487         }
1488 
1489         // Validate results element by element
1490         for( int q = 0; q < loopCount; q++ )
1491         {
1492             float offset = float_offsets[ q % float_offset_count ];
1493 
1494             // Init the coordinates
1495             InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues,
1496                                 q>=float_offset_count ? -offset: offset,
1497                                 q>=float_offset_count ? offset: -offset, imageSampler->normalized_coords, d, lod );
1498 
1499             error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, xOffsetValues, 0, NULL, NULL );
1500             test_error( error, "Unable to write x offsets" );
1501             error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, yOffsetValues, 0, NULL, NULL );
1502             test_error( error, "Unable to write y offsets" );
1503 
1504             // Get results
1505             memset( resultValues, 0xff, resultValuesSize );
1506             clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
1507 
1508             // Run the kernel
1509             threads[0] = (size_t)width_lod;
1510             threads[1] = (size_t)height_lod;
1511             error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
1512             test_error( error, "Unable to run kernel" );
1513 
1514             if( gDebugTrace )
1515                 log_info( "    reading results, %ld kbytes\n", (unsigned long)( width_lod * height_lod * get_explicit_type_size( outputType ) * 4 / 1024 ) );
1516 
1517             error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * height_lod * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL ); //XXX check
1518             test_error( error, "Unable to read results from kernel" );
1519             if( gDebugTrace )
1520                 log_info( "    results read\n" );
1521 
1522             int retCode;
1523             switch (imageInfo->format->image_channel_order) {
1524             case CL_DEPTH:
1525                 retCode = validate_image_2D_depth_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1526                 break;
1527             case CL_sRGB:
1528             case CL_sRGBx:
1529             case CL_sRGBA:
1530             case CL_sBGRA:
1531                 retCode = validate_image_2D_sRGB_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1532                 break;
1533             default:
1534                 retCode = validate_image_2D_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1535             }
1536             if (retCode)
1537                 return retCode;
1538         }
1539         if ( gTestMipmaps )
1540         {
1541             nextLevelOffset += width_lod * height_lod * get_pixel_size( imageInfo->format );
1542             width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
1543             height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
1544         }
1545     }
1546 
1547     if (gTestImage2DFromBuffer) clReleaseMemObject(imageBuffer);
1548 
1549     return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
1550 }
1551 
test_read_image_set_2D(cl_device_id device,cl_context context,cl_command_queue queue,const cl_image_format * format,image_sampler_data * imageSampler,bool floatCoords,ExplicitType outputType)1552 int test_read_image_set_2D(cl_device_id device, cl_context context,
1553                            cl_command_queue queue,
1554                            const cl_image_format *format,
1555                            image_sampler_data *imageSampler, bool floatCoords,
1556                            ExplicitType outputType)
1557 {
1558     char programSrc[10240];
1559     const char *ptr;
1560     const char *readFormat;
1561     clProgramWrapper program;
1562     clKernelWrapper kernel;
1563     const char *KernelSourcePattern = NULL;
1564 
1565     if (gTestImage2DFromBuffer)
1566     {
1567         if (format->image_channel_order == CL_RGB || format->image_channel_order == CL_RGBx)
1568         {
1569             switch (format->image_channel_data_type)
1570             {
1571                 case CL_UNORM_INT8:
1572                 case CL_UNORM_INT16:
1573                 case CL_SNORM_INT8:
1574                 case CL_SNORM_INT16:
1575                 case CL_HALF_FLOAT:
1576                 case CL_FLOAT:
1577                 case CL_SIGNED_INT8:
1578                 case CL_SIGNED_INT16:
1579                 case CL_SIGNED_INT32:
1580                 case CL_UNSIGNED_INT8:
1581                 case CL_UNSIGNED_INT16:
1582                 case CL_UNSIGNED_INT32:
1583                     log_info( "Skipping image format: %s %s\n", GetChannelOrderName( format->image_channel_order ),
1584                              GetChannelTypeName( format->image_channel_data_type ));
1585                     return 0;
1586                 default:
1587                     break;
1588             }
1589         }
1590     }
1591 
1592 
1593     RandomSeed seed( gRandomSeed );
1594     int error;
1595 
1596     // Get our operating params
1597     size_t maxWidth, maxHeight;
1598     cl_ulong maxAllocSize, memSize;
1599     image_descriptor imageInfo = { 0x0 };
1600     size_t pixelSize;
1601 
1602     imageInfo.format = format;
1603     imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
1604     imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
1605     pixelSize = get_pixel_size( imageInfo.format );
1606 
1607     error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
1608     error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
1609     error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
1610     error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
1611     test_error( error, "Unable to get max image 2D size from device" );
1612 
1613     if (memSize > (cl_ulong)SIZE_MAX) {
1614       memSize = (cl_ulong)SIZE_MAX;
1615     }
1616 
1617     // Determine types
1618     if( outputType == kInt )
1619         readFormat = "i";
1620     else if( outputType == kUInt )
1621         readFormat = "ui";
1622     else // kFloat
1623         readFormat = "f";
1624 
1625     // Construct the source
1626     const char *samplerArg = samplerKernelArg;
1627     char samplerVar[ 1024 ] = "";
1628     if( gUseKernelSamplers )
1629     {
1630         get_sampler_kernel_code( imageSampler, samplerVar );
1631         samplerArg = "";
1632     }
1633 
1634     if(gtestTypesToRun & kReadTests)
1635     {
1636         KernelSourcePattern = read2DKernelSourcePattern;
1637     }
1638     else
1639     {
1640         KernelSourcePattern = read_write2DKernelSourcePattern;
1641     }
1642 
1643 
1644     sprintf(programSrc, KernelSourcePattern,
1645             gTestMipmaps
1646                 ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable"
1647                 : "",
1648             (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t"
1649                                                       : "image2d_t",
1650             samplerArg, get_explicit_type_name(outputType),
1651             (format->image_channel_order == CL_DEPTH) ? "" : "4",
1652             gTestMipmaps ? ", float lod" : " ", samplerVar,
1653             gTestMipmaps ? lodOffsetSource : offsetSource,
1654             floatCoords ? floatKernelSource : intCoordKernelSource, readFormat,
1655             gTestMipmaps ? ", lod" : " ");
1656 
1657     ptr = programSrc;
1658     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
1659                                         "sample_kernel");
1660     test_error( error, "Unable to create testing kernel" );
1661 
1662     if( gTestSmallImages )
1663     {
1664         for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
1665         {
1666             imageInfo.rowPitch = imageInfo.width * pixelSize;
1667             for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
1668             {
1669                 if( gTestMipmaps )
1670                 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1671 
1672                 if( gDebugTrace )
1673                     log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
1674 
1675                 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1676                 if( retCode )
1677                     return retCode;
1678             }
1679         }
1680     }
1681     else if( gTestMaxImages )
1682     {
1683         // Try a specific set of maximum sizes
1684         size_t numbeOfSizes;
1685         size_t sizes[100][3];
1686 
1687         get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format, CL_TRUE);
1688 
1689         for( size_t idx = 0; idx < numbeOfSizes; idx++ )
1690         {
1691             imageInfo.width = sizes[ idx ][ 0 ];
1692             imageInfo.height = sizes[ idx ][ 1 ];
1693             imageInfo.rowPitch = imageInfo.width * pixelSize;
1694             log_info("Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
1695 
1696             if( gTestMipmaps )
1697                 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1698 
1699             if( gDebugTrace )
1700                 log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
1701             int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1702             if( retCode )
1703                 return retCode;
1704         }
1705     }
1706     else if( gTestRounding )
1707     {
1708         uint64_t typeRange = 1LL << ( get_format_type_size( imageInfo.format ) * 8 );
1709         typeRange /= pixelSize / get_format_type_size( imageInfo.format );
1710         imageInfo.height = (size_t)( ( typeRange + 255LL ) / 256LL );
1711         imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
1712         while( imageInfo.height >= maxHeight / 2 )
1713         {
1714             imageInfo.width <<= 1;
1715             imageInfo.height >>= 1;
1716         }
1717 
1718         while( imageInfo.width >= maxWidth / 2 )
1719             imageInfo.width >>= 1;
1720         imageInfo.rowPitch = imageInfo.width * pixelSize;
1721 
1722         gRoundingStartValue = 0;
1723         do
1724         {
1725             if( gDebugTrace )
1726                 log_info( "   at size %d,%d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, (int)imageInfo.height, gRoundingStartValue, typeRange );
1727             int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1728             if( retCode )
1729                 return retCode;
1730 
1731             gRoundingStartValue += imageInfo.width * imageInfo.height * pixelSize / get_format_type_size( imageInfo.format );
1732 
1733         } while( gRoundingStartValue < typeRange );
1734     }
1735     else
1736     {
1737         cl_uint imagePitchAlign = 0;
1738         if (gTestImage2DFromBuffer)
1739         {
1740 #if defined(CL_DEVICE_IMAGE_PITCH_ALIGNMENT)
1741             error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof( cl_uint ), &imagePitchAlign, NULL );
1742 #endif
1743             if (!imagePitchAlign || error) {
1744               test_error( error, "Unable to get CL_DEVICE_IMAGE_PITCH_ALIGNMENT from device" );
1745               imagePitchAlign = 1;
1746             }
1747         }
1748 
1749         int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
1750         int maxHeightRange = (int) reduceImageSizeRange(maxHeight);
1751 
1752         for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
1753         {
1754             cl_ulong size;
1755             // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
1756             // image, the result array, plus offset arrays, will fit in the global ram space
1757             do
1758             {
1759                 imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, seed );
1760                 imageInfo.height = (size_t)random_log_in_range( 16, maxHeightRange, seed );
1761 
1762                 imageInfo.rowPitch = imageInfo.width * pixelSize;
1763                 if( gTestMipmaps )
1764                 {
1765                     imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1766                     size = 4 * compute_mipmapped_image_size(imageInfo);
1767                 }
1768                 else
1769                 {
1770                     if( gEnablePitch )
1771                     {
1772                         size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
1773                         imageInfo.rowPitch += extraWidth * pixelSize;
1774                     }
1775 
1776                 // if we are creating a 2D image from a buffer, make sure that the rowpitch is aligned to CL_DEVICE_IMAGE_PITCH_ALIGNMENT_APPLE
1777                     if (gTestImage2DFromBuffer)
1778                     {
1779                         size_t pitch = imagePitchAlign * pixelSize;
1780                         imageInfo.rowPitch = ((imageInfo.rowPitch + pitch - 1) / pitch ) * pitch;
1781                     }
1782 
1783                     size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
1784                 }
1785             } while(  size > maxAllocSize || ( size * 3 ) > memSize );
1786 
1787             if( gDebugTrace )
1788                 log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
1789             int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1790             if( retCode )
1791                 return retCode;
1792         }
1793     }
1794 
1795     return 0;
1796 }
1797