xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/images/kernel_read_write/test_common.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "test_common.h"
18 
19 #include <algorithm>
20 
create_sampler(cl_context context,image_sampler_data * sdata,bool test_mipmaps,cl_int * error)21 cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool test_mipmaps, cl_int *error) {
22     cl_sampler sampler = nullptr;
23     if (test_mipmaps) {
24         cl_sampler_properties properties[] = {
25             CL_SAMPLER_NORMALIZED_COORDS, sdata->normalized_coords,
26             CL_SAMPLER_ADDRESSING_MODE, sdata->addressing_mode,
27             CL_SAMPLER_FILTER_MODE, sdata->filter_mode,
28             CL_SAMPLER_MIP_FILTER_MODE, sdata->filter_mode,
29             0};
30         sampler = clCreateSamplerWithProperties(context, properties, error);
31     } else {
32         sampler = clCreateSampler(context, sdata->normalized_coords, sdata->addressing_mode, sdata->filter_mode, error);
33     }
34     return sampler;
35 }
36 
get_image_dimensions(image_descriptor * imageInfo,size_t & width,size_t & height,size_t & depth)37 bool get_image_dimensions(image_descriptor *imageInfo, size_t &width,
38                           size_t &height, size_t &depth)
39 {
40     width = imageInfo->width;
41     height = 1;
42     depth = 1;
43     switch (imageInfo->type)
44     {
45         case CL_MEM_OBJECT_IMAGE1D: break;
46         case CL_MEM_OBJECT_IMAGE1D_ARRAY: height = imageInfo->arraySize; break;
47         case CL_MEM_OBJECT_IMAGE2D: height = imageInfo->height; break;
48         case CL_MEM_OBJECT_IMAGE2D_ARRAY:
49             height = imageInfo->height;
50             depth = imageInfo->arraySize;
51             break;
52         case CL_MEM_OBJECT_IMAGE3D:
53             height = imageInfo->height;
54             depth = imageInfo->depth;
55             break;
56         default:
57             log_error("ERROR: Test does not support image type");
58             return TEST_FAIL;
59     }
60     return 0;
61 }
62 
InitFloatCoordsCommon(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float * zOffsets,float xfract,float yfract,float zfract,int normalized_coords,MTdata d,int lod)63 static bool InitFloatCoordsCommon(image_descriptor *imageInfo,
64                                   image_sampler_data *imageSampler,
65                                   float *xOffsets, float *yOffsets,
66                                   float *zOffsets, float xfract, float yfract,
67                                   float zfract, int normalized_coords, MTdata d,
68                                   int lod)
69 {
70     size_t i = 0;
71     size_t width_loop, height_loop, depth_loop;
72     bool error =
73         get_image_dimensions(imageInfo, width_loop, height_loop, depth_loop);
74     if (!error)
75     {
76         if (gDisableOffsets)
77         {
78             for (size_t z = 0; z < depth_loop; z++)
79             {
80                 for (size_t y = 0; y < height_loop; y++)
81                 {
82                     for (size_t x = 0; x < width_loop; x++, i++)
83                     {
84                         xOffsets[i] = (float)(xfract + (double)x);
85                         yOffsets[i] = (float)(yfract + (double)y);
86                         zOffsets[i] = (float)(zfract + (double)z);
87                     }
88                 }
89             }
90         }
91         else
92         {
93             for (size_t z = 0; z < depth_loop; z++)
94             {
95                 for (size_t y = 0; y < height_loop; y++)
96                 {
97                     for (size_t x = 0; x < width_loop; x++, i++)
98                     {
99                         xOffsets[i] =
100                             (float)(xfract
101                                     + (double)((int)x
102                                                + random_in_range(-10, 10, d)));
103                         yOffsets[i] =
104                             (float)(yfract
105                                     + (double)((int)y
106                                                + random_in_range(-10, 10, d)));
107                         zOffsets[i] =
108                             (float)(zfract
109                                     + (double)((int)z
110                                                + random_in_range(-10, 10, d)));
111                     }
112                 }
113             }
114         }
115 
116         if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
117         {
118             i = 0;
119             for (size_t z = 0; z < depth_loop; z++)
120             {
121                 for (size_t y = 0; y < height_loop; y++)
122                 {
123                     for (size_t x = 0; x < width_loop; x++, i++)
124                     {
125                         xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0,
126                                                    (double)width_loop - 1.0);
127                         yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0,
128                                                    (double)height_loop - 1.0);
129                         zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0,
130                                                    (double)depth_loop - 1.0);
131                     }
132                 }
133             }
134         }
135 
136         if (normalized_coords || gTestMipmaps)
137         {
138             i = 0;
139             if (lod == 0)
140             {
141                 for (size_t z = 0; z < depth_loop; z++)
142                 {
143                     for (size_t y = 0; y < height_loop; y++)
144                     {
145                         for (size_t x = 0; x < width_loop; x++, i++)
146                         {
147                             xOffsets[i] = (float)((double)xOffsets[i]
148                                                   / (double)width_loop);
149                             if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
150                             {
151                                 yOffsets[i] = (float)((double)yOffsets[i]
152                                                       / (double)height_loop);
153                             }
154                             if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
155                             {
156                                 zOffsets[i] = (float)((double)zOffsets[i]
157                                                       / (double)depth_loop);
158                             }
159                         }
160                     }
161                 }
162             }
163             else if (gTestMipmaps)
164             {
165                 size_t width_lod =
166                     (width_loop >> lod) ? (width_loop >> lod) : 1;
167                 size_t height_lod = height_loop;
168                 size_t depth_lod = depth_loop;
169                 if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
170                 {
171                     height_lod =
172                         (height_loop >> lod) ? (height_loop >> lod) : 1;
173                 }
174                 if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
175                 {
176                     depth_lod = (depth_loop >> lod) ? (depth_loop >> lod) : 1;
177                 }
178 
179                 for (size_t z = 0; z < depth_lod; z++)
180                 {
181                     for (size_t y = 0; y < height_lod; y++)
182                     {
183                         for (size_t x = 0; x < width_lod; x++, i++)
184                         {
185                             xOffsets[i] = (float)((double)xOffsets[i]
186                                                   / (double)width_lod);
187                             if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
188                             {
189                                 yOffsets[i] = (float)((double)yOffsets[i]
190                                                       / (double)height_lod);
191                             }
192                             if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
193                             {
194                                 zOffsets[i] = (float)((double)zOffsets[i]
195                                                       / (double)depth_lod);
196                             }
197                         }
198                     }
199                 }
200             }
201         }
202     }
203     return error;
204 }
205 
create_image_of_type(cl_context context,cl_mem_flags mem_flags,image_descriptor * imageInfo,size_t row_pitch,size_t slice_pitch,void * host_ptr,cl_int * error)206 cl_mem create_image_of_type(cl_context context, cl_mem_flags mem_flags,
207                             image_descriptor *imageInfo, size_t row_pitch,
208                             size_t slice_pitch, void *host_ptr, cl_int *error)
209 {
210     cl_mem image;
211     switch (imageInfo->type)
212     {
213         case CL_MEM_OBJECT_IMAGE3D:
214             image = create_image_3d(context, mem_flags, imageInfo->format,
215                                     imageInfo->width, imageInfo->height,
216                                     imageInfo->depth, row_pitch, slice_pitch,
217                                     host_ptr, error);
218             break;
219         default:
220             log_error("Implementation is incomplete, only 3D images are "
221                       "supported so far");
222             return nullptr;
223     }
224     return image;
225 }
226 
get_image_num_pixels(image_descriptor * imageInfo,size_t width,size_t height,size_t depth,size_t array_size)227 static size_t get_image_num_pixels(image_descriptor *imageInfo, size_t width,
228                                    size_t height, size_t depth,
229                                    size_t array_size)
230 {
231     size_t image_size;
232     switch (imageInfo->type)
233     {
234         case CL_MEM_OBJECT_IMAGE3D: image_size = width * height * depth; break;
235         default:
236             log_error("Implementation is incomplete, only 3D images are "
237                       "supported so far");
238             return 0;
239     }
240     return image_size;
241 }
242 
test_read_image(cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,image_sampler_data * imageSampler,bool useFloatCoords,ExplicitType outputType,MTdata d)243 int test_read_image(cl_context context, cl_command_queue queue,
244                     cl_kernel kernel, image_descriptor *imageInfo,
245                     image_sampler_data *imageSampler, bool useFloatCoords,
246                     ExplicitType outputType, MTdata d)
247 {
248     int error;
249     size_t threads[3];
250     static int initHalf = 0;
251 
252     size_t image_size =
253         get_image_num_pixels(imageInfo, imageInfo->width, imageInfo->height,
254                              imageInfo->depth, imageInfo->arraySize);
255     test_assert_error(0 != image_size, "Invalid image size");
256     size_t width_size, height_size, depth_size;
257     if (get_image_dimensions(imageInfo, width_size, height_size, depth_size))
258     {
259         log_error("ERROR: invalid image dimensions");
260         return CL_INVALID_VALUE;
261     }
262 
263     cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY;
264 
265     clMemWrapper xOffsets, yOffsets, zOffsets, results;
266     clSamplerWrapper actualSampler;
267     BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
268 
269     // Create offset data
270     BufferOwningPtr<cl_float> xOffsetValues(
271         malloc(sizeof(cl_float) * image_size));
272     BufferOwningPtr<cl_float> yOffsetValues(
273         malloc(sizeof(cl_float) * image_size));
274     BufferOwningPtr<cl_float> zOffsetValues(
275         malloc(sizeof(cl_float) * image_size));
276 
277     if (imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
278         if (DetectFloatToHalfRoundingMode(queue)) return 1;
279 
280     BufferOwningPtr<char> imageValues;
281     generate_random_image_data(imageInfo, imageValues, d);
282 
283     // Construct testing sources
284     clProtectedImage protImage;
285     clMemWrapper unprotImage;
286     cl_mem image;
287 
288     if (gtestTypesToRun & kReadTests)
289     {
290         image_read_write_flags = CL_MEM_READ_ONLY;
291     }
292     else
293     {
294         image_read_write_flags = CL_MEM_READ_WRITE;
295     }
296 
297     if (gMemFlagsToUse == CL_MEM_USE_HOST_PTR)
298     {
299         // clProtectedImage uses USE_HOST_PTR, so just rely on that for the
300         // testing (via Ian) Do not use protected images for max image size test
301         // since it rounds the row size to a page size
302         if (gTestMaxImages)
303         {
304             generate_random_image_data(imageInfo,
305                                        maxImageUseHostPtrBackingStore, d);
306             unprotImage = create_image_of_type(
307                 context, image_read_write_flags | CL_MEM_USE_HOST_PTR,
308                 imageInfo, (gEnablePitch ? imageInfo->rowPitch : 0),
309                 (gEnablePitch ? imageInfo->slicePitch : 0),
310                 maxImageUseHostPtrBackingStore, &error);
311         }
312         else
313         {
314             error = protImage.Create(context, imageInfo->type,
315                                      image_read_write_flags, imageInfo->format,
316                                      imageInfo->width, imageInfo->height,
317                                      imageInfo->depth, imageInfo->arraySize);
318         }
319         if (error != CL_SUCCESS)
320         {
321             log_error("ERROR: Unable to create image of size %d x %d x %d x %d "
322                       "(pitch %d, %d ) (%s)",
323                       (int)imageInfo->width, (int)imageInfo->height,
324                       (int)imageInfo->depth, (int)imageInfo->arraySize,
325                       (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
326                       IGetErrorString(error));
327             return error;
328         }
329         if (gTestMaxImages)
330             image = (cl_mem)unprotImage;
331         else
332             image = (cl_mem)protImage;
333     }
334     else if (gMemFlagsToUse == CL_MEM_COPY_HOST_PTR)
335     {
336         // Don't use clEnqueueWriteImage; just use copy host ptr to get the data
337         // in
338         unprotImage = create_image_of_type(
339             context, image_read_write_flags | CL_MEM_COPY_HOST_PTR, imageInfo,
340             (gEnablePitch ? imageInfo->rowPitch : 0),
341             (gEnablePitch ? imageInfo->slicePitch : 0), imageValues, &error);
342         if (error != CL_SUCCESS)
343         {
344             log_error("ERROR: Unable to create image of size %d x %d x %d x %d "
345                       "(pitch %d, %d ) (%s)",
346                       (int)imageInfo->width, (int)imageInfo->height,
347                       (int)imageInfo->depth, (int)imageInfo->arraySize,
348                       (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
349                       IGetErrorString(error));
350             return error;
351         }
352         image = unprotImage;
353     }
354     else // Either CL_MEM_ALLOC_HOST_PTR or none
355     {
356         // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can
357         // be accessed by the host, but otherwise it works just as if no flag is
358         // specified, so we just do the same thing either way
359         if (!gTestMipmaps)
360         {
361             unprotImage = create_image_of_type(
362                 context, image_read_write_flags | gMemFlagsToUse, imageInfo,
363                 (gEnablePitch ? imageInfo->rowPitch : 0),
364                 (gEnablePitch ? imageInfo->slicePitch : 0), imageValues,
365                 &error);
366             if (error != CL_SUCCESS)
367             {
368                 log_error("ERROR: Unable to create image of size %d x %d x "
369                           "%d x %d (pitch %d, %d ) (%s)",
370                           (int)imageInfo->width, (int)imageInfo->height,
371                           (int)imageInfo->depth, (int)imageInfo->arraySize,
372                           (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
373                           IGetErrorString(error));
374                 return error;
375             }
376             image = unprotImage;
377         }
378         else
379         {
380             cl_image_desc image_desc = { 0 };
381             image_desc.image_type = imageInfo->type;
382             image_desc.image_width = imageInfo->width;
383             image_desc.image_height = imageInfo->height;
384             image_desc.image_depth = imageInfo->depth;
385             image_desc.image_array_size = imageInfo->arraySize;
386             image_desc.num_mip_levels = imageInfo->num_mip_levels;
387 
388 
389             unprotImage =
390                 clCreateImage(context, image_read_write_flags,
391                               imageInfo->format, &image_desc, NULL, &error);
392             if (error != CL_SUCCESS)
393             {
394                 log_error("ERROR: Unable to create %d level mipmapped image "
395                           "of size %d x %d x %d x %d (pitch %d, %d ) (%s)",
396                           (int)imageInfo->num_mip_levels, (int)imageInfo->width,
397                           (int)imageInfo->height, (int)imageInfo->depth,
398                           (int)imageInfo->arraySize, (int)imageInfo->rowPitch,
399                           (int)imageInfo->slicePitch, IGetErrorString(error));
400                 return error;
401             }
402             image = unprotImage;
403         }
404     }
405 
406     test_assert_error(nullptr != image, "Image creation failed");
407 
408     if (gMemFlagsToUse != CL_MEM_COPY_HOST_PTR)
409     {
410         size_t origin[4] = { 0, 0, 0, 0 };
411         size_t region[3] = { width_size, height_size, depth_size };
412 
413         if (gDebugTrace) log_info(" - Writing image...\n");
414 
415         if (!gTestMipmaps)
416         {
417 
418             error =
419                 clEnqueueWriteImage(queue, image, CL_TRUE, origin, region,
420                                     gEnablePitch ? imageInfo->rowPitch : 0,
421                                     gEnablePitch ? imageInfo->slicePitch : 0,
422                                     imageValues, 0, NULL, NULL);
423 
424             if (error != CL_SUCCESS)
425             {
426                 log_error("ERROR: Unable to write to image of size %d x %d "
427                           "x %d x %d\n",
428                           (int)imageInfo->width, (int)imageInfo->height,
429                           (int)imageInfo->depth, (int)imageInfo->arraySize);
430                 return error;
431             }
432         }
433         else
434         {
435             int nextLevelOffset = 0;
436 
437             for (int i = 0; i < imageInfo->num_mip_levels; i++)
438             {
439                 origin[3] = i;
440                 error = clEnqueueWriteImage(
441                     queue, image, CL_TRUE, origin, region, 0, 0,
442                     ((char *)imageValues + nextLevelOffset), 0, NULL, NULL);
443                 if (error != CL_SUCCESS)
444                 {
445                     log_error("ERROR: Unable to write to %d level mipmapped "
446                               "image of size %d x %d x %d x %d\n",
447                               (int)imageInfo->num_mip_levels,
448                               (int)imageInfo->width, (int)imageInfo->height,
449                               (int)imageInfo->arraySize, (int)imageInfo->depth);
450                     return error;
451                 }
452                 nextLevelOffset += region[0] * region[1] * region[2]
453                     * get_pixel_size(imageInfo->format);
454                 // Subsequent mip level dimensions keep halving
455                 region[0] = region[0] >> 1 ? region[0] >> 1 : 1;
456                 region[1] = region[1] >> 1 ? region[1] >> 1 : 1;
457                 region[2] = region[2] >> 1 ? region[2] >> 1 : 1;
458             }
459         }
460     }
461 
462     xOffsets =
463         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
464                        sizeof(cl_float) * image_size, xOffsetValues, &error);
465     test_error(error, "Unable to create x offset buffer");
466     yOffsets =
467         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
468                        sizeof(cl_float) * image_size, yOffsetValues, &error);
469     test_error(error, "Unable to create y offset buffer");
470     zOffsets =
471         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
472                        sizeof(cl_float) * image_size, zOffsetValues, &error);
473     test_error(error, "Unable to create y offset buffer");
474     results = clCreateBuffer(
475         context, CL_MEM_READ_WRITE,
476         get_explicit_type_size(outputType) * 4 * image_size, NULL, &error);
477     test_error(error, "Unable to create result buffer");
478 
479     // Create sampler to use
480     actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
481     test_error(error, "Unable to create image sampler");
482 
483     // Set arguments
484     int idx = 0;
485     error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &image);
486     test_error(error, "Unable to set kernel arguments");
487     if (!gUseKernelSamplers)
488     {
489         error =
490             clSetKernelArg(kernel, idx++, sizeof(cl_sampler), &actualSampler);
491         test_error(error, "Unable to set kernel arguments");
492     }
493     error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &xOffsets);
494     test_error(error, "Unable to set kernel arguments");
495     error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &yOffsets);
496     test_error(error, "Unable to set kernel arguments");
497     error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &zOffsets);
498     test_error(error, "Unable to set kernel arguments");
499     error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &results);
500     test_error(error, "Unable to set kernel arguments");
501 
502     const float float_offsets[] = { 0.0f,
503                                     MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30),
504                                     0.25f,
505                                     0.3f,
506                                     0.5f - FLT_EPSILON / 4.0f,
507                                     0.5f,
508                                     0.9f,
509                                     1.0f - FLT_EPSILON / 2 };
510     int float_offset_count = sizeof(float_offsets) / sizeof(float_offsets[0]);
511     int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
512     int loopCount = 2 * float_offset_count;
513     if (!useFloatCoords) loopCount = 1;
514     if (gTestMaxImages)
515     {
516         loopCount = 1;
517         log_info("Testing each size only once with pixel offsets of %g for max "
518                  "sized images.\n",
519                  float_offsets[0]);
520     }
521 
522     // Get the maximum absolute error for this format
523     double formatAbsoluteError =
524         get_max_absolute_error(imageInfo->format, imageSampler);
525     if (gDebugTrace)
526         log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
527 
528     if (0 == initHalf
529         && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
530     {
531         initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode(queue);
532         if (initHalf)
533         {
534             log_info("Half rounding mode successfully detected.\n");
535         }
536     }
537 
538     int nextLevelOffset = 0;
539     size_t width_lod = width_size, height_lod = height_size,
540            depth_lod = depth_size;
541 
542     // Loop over all mipmap levels, if we are testing mipmapped images.
543     for (int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels)
544          || (!gTestMipmaps && lod < 1);
545          lod++)
546     {
547         size_t image_lod_size = get_image_num_pixels(
548             imageInfo, width_lod, height_lod, depth_lod, imageInfo->arraySize);
549         test_assert_error(0 != image_lod_size, "Invalid image size");
550         size_t resultValuesSize =
551             image_lod_size * get_explicit_type_size(outputType) * 4;
552         BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
553         float lod_float = (float)lod;
554         if (gTestMipmaps)
555         {
556             // Set the lod kernel arg
557             if (gDebugTrace) log_info(" - Working at mip level %d\n", lod);
558             error = clSetKernelArg(kernel, idx, sizeof(float), &lod_float);
559             test_error(error, "Unable to set kernel arguments");
560         }
561 
562         for (int q = 0; q < loopCount; q++)
563         {
564             float offset = float_offsets[q % float_offset_count];
565 
566             // Init the coordinates
567             error = InitFloatCoordsCommon(
568                 imageInfo, imageSampler, xOffsetValues, yOffsetValues,
569                 zOffsetValues, q >= float_offset_count ? -offset : offset,
570                 q >= float_offset_count ? offset : -offset,
571                 q >= float_offset_count ? -offset : offset,
572                 imageSampler->normalized_coords, d, lod);
573             test_error(error, "Unable to initialise coordinates");
574 
575             error = clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0,
576                                          sizeof(cl_float) * image_size,
577                                          xOffsetValues, 0, NULL, NULL);
578             test_error(error, "Unable to write x offsets");
579             error = clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0,
580                                          sizeof(cl_float) * image_size,
581                                          yOffsetValues, 0, NULL, NULL);
582             test_error(error, "Unable to write y offsets");
583             error = clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0,
584                                          sizeof(cl_float) * image_size,
585                                          zOffsetValues, 0, NULL, NULL);
586             test_error(error, "Unable to write z offsets");
587 
588 
589             memset(resultValues, 0xff, resultValuesSize);
590             clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, resultValuesSize,
591                                  resultValues, 0, NULL, NULL);
592 
593             // Figure out thread dimensions
594             threads[0] = (size_t)width_lod;
595             threads[1] = (size_t)height_lod;
596             threads[2] = (size_t)depth_lod;
597 
598             // Run the kernel
599             error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads,
600                                            NULL, 0, NULL, NULL);
601             test_error(error, "Unable to run kernel");
602 
603             // Get results
604             error = clEnqueueReadBuffer(
605                 queue, results, CL_TRUE, 0,
606                 image_lod_size * get_explicit_type_size(outputType) * 4,
607                 resultValues, 0, NULL, NULL);
608             test_error(error, "Unable to read results from kernel");
609             if (gDebugTrace) log_info("    results read\n");
610 
611             // Validate results element by element
612             char *imagePtr = (char *)imageValues + nextLevelOffset;
613             /*
614              * FLOAT output type
615              */
616             if (is_sRGBA_order(imageInfo->format->image_channel_order)
617                 && (outputType == kFloat))
618             {
619                 // Validate float results
620                 float *resultPtr = (float *)(char *)resultValues;
621                 float expected[4], error = 0.0f;
622                 float maxErr = get_max_relative_error(
623                     imageInfo->format, imageSampler, 1 /*3D*/,
624                     CL_FILTER_LINEAR == imageSampler->filter_mode);
625 
626                 for (size_t z = 0, j = 0; z < depth_lod; z++)
627                 {
628                     for (size_t y = 0; y < height_lod; y++)
629                     {
630                         for (size_t x = 0; x < width_lod; x++, j++)
631                         {
632                             // Step 1: go through and see if the results verify
633                             // for the pixel For the normalized case on a GPU we
634                             // put in offsets to the X, Y and Z to see if we
635                             // land on the right pixel. This addresses the
636                             // significant inaccuracy in GPU normalization in
637                             // OpenCL 1.0.
638                             int checkOnlyOnePixel = 0;
639                             int found_pixel = 0;
640                             float offset = NORM_OFFSET;
641                             if (!imageSampler->normalized_coords
642                                 || imageSampler->filter_mode
643                                     != CL_FILTER_NEAREST
644                                 || NORM_OFFSET == 0
645 #if defined(__APPLE__)
646                                 // Apple requires its CPU implementation to do
647                                 // correctly rounded address arithmetic in all
648                                 // modes
649                                 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
650 #endif
651                             )
652                                 offset = 0.0f; // Loop only once
653 
654                             for (float norm_offset_x = -offset;
655                                  norm_offset_x <= offset && !found_pixel;
656                                  norm_offset_x += NORM_OFFSET)
657                             {
658                                 for (float norm_offset_y = -offset;
659                                      norm_offset_y <= offset && !found_pixel;
660                                      norm_offset_y += NORM_OFFSET)
661                                 {
662                                     for (float norm_offset_z = -offset;
663                                          norm_offset_z <= NORM_OFFSET
664                                          && !found_pixel;
665                                          norm_offset_z += NORM_OFFSET)
666                                     {
667 
668                                         int hasDenormals = 0;
669                                         FloatPixel maxPixel =
670                                             sample_image_pixel_float_offset(
671                                                 imagePtr, imageInfo,
672                                                 xOffsetValues[j],
673                                                 yOffsetValues[j],
674                                                 zOffsetValues[j], norm_offset_x,
675                                                 norm_offset_y, norm_offset_z,
676                                                 imageSampler, expected, 0,
677                                                 &hasDenormals, lod);
678 
679                                         float err1 =
680                                             ABS_ERROR(sRGBmap(resultPtr[0]),
681                                                       sRGBmap(expected[0]));
682                                         float err2 =
683                                             ABS_ERROR(sRGBmap(resultPtr[1]),
684                                                       sRGBmap(expected[1]));
685                                         float err3 =
686                                             ABS_ERROR(sRGBmap(resultPtr[2]),
687                                                       sRGBmap(expected[2]));
688                                         float err4 = ABS_ERROR(resultPtr[3],
689                                                                expected[3]);
690                                         // Clamp to the minimum absolute error
691                                         // for the format
692                                         if (err1 > 0
693                                             && err1 < formatAbsoluteError)
694                                         {
695                                             err1 = 0.0f;
696                                         }
697                                         if (err2 > 0
698                                             && err2 < formatAbsoluteError)
699                                         {
700                                             err2 = 0.0f;
701                                         }
702                                         if (err3 > 0
703                                             && err3 < formatAbsoluteError)
704                                         {
705                                             err3 = 0.0f;
706                                         }
707                                         if (err4 > 0
708                                             && err4 < formatAbsoluteError)
709                                         {
710                                             err4 = 0.0f;
711                                         }
712                                         float maxErr = 0.5;
713 
714                                         if (!(err1 <= maxErr)
715                                             || !(err2 <= maxErr)
716                                             || !(err3 <= maxErr)
717                                             || !(err4 <= maxErr))
718                                         {
719                                             // Try flushing the denormals
720                                             if (hasDenormals)
721                                             {
722                                                 // If implementation decide to
723                                                 // flush subnormals to zero, max
724                                                 // error needs to be adjusted
725                                                 maxErr += 4 * FLT_MIN;
726 
727                                                 maxPixel =
728                                                     sample_image_pixel_float_offset(
729                                                         imagePtr, imageInfo,
730                                                         xOffsetValues[j],
731                                                         yOffsetValues[j],
732                                                         zOffsetValues[j],
733                                                         norm_offset_x,
734                                                         norm_offset_y,
735                                                         norm_offset_z,
736                                                         imageSampler, expected,
737                                                         0, NULL, lod);
738 
739                                                 err1 = ABS_ERROR(
740                                                     sRGBmap(resultPtr[0]),
741                                                     sRGBmap(expected[0]));
742                                                 err2 = ABS_ERROR(
743                                                     sRGBmap(resultPtr[1]),
744                                                     sRGBmap(expected[1]));
745                                                 err3 = ABS_ERROR(
746                                                     sRGBmap(resultPtr[2]),
747                                                     sRGBmap(expected[2]));
748                                                 err4 = ABS_ERROR(resultPtr[3],
749                                                                  expected[3]);
750                                             }
751                                         }
752 
753                                         found_pixel = (err1 <= maxErr)
754                                             && (err2 <= maxErr)
755                                             && (err3 <= maxErr)
756                                             && (err4 <= maxErr);
757                                     } // norm_offset_z
758                                 } // norm_offset_y
759                             } // norm_offset_x
760 
761                             // Step 2: If we did not find a match, then print
762                             // out debugging info.
763                             if (!found_pixel)
764                             {
765                                 // For the normalized case on a GPU we put in
766                                 // offsets to the X and Y to see if we land on
767                                 // the right pixel. This addresses the
768                                 // significant inaccuracy in GPU normalization
769                                 // in OpenCL 1.0.
770                                 checkOnlyOnePixel = 0;
771                                 int shouldReturn = 0;
772                                 for (float norm_offset_x = -offset;
773                                      norm_offset_x <= offset
774                                      && !checkOnlyOnePixel;
775                                      norm_offset_x += NORM_OFFSET)
776                                 {
777                                     for (float norm_offset_y = -offset;
778                                          norm_offset_y <= offset
779                                          && !checkOnlyOnePixel;
780                                          norm_offset_y += NORM_OFFSET)
781                                     {
782                                         for (float norm_offset_z = -offset;
783                                              norm_offset_z <= offset
784                                              && !checkOnlyOnePixel;
785                                              norm_offset_z += NORM_OFFSET)
786                                         {
787 
788                                             int hasDenormals = 0;
789                                             FloatPixel maxPixel =
790                                                 sample_image_pixel_float_offset(
791                                                     imagePtr, imageInfo,
792                                                     xOffsetValues[j],
793                                                     yOffsetValues[j],
794                                                     zOffsetValues[j],
795                                                     norm_offset_x,
796                                                     norm_offset_y,
797                                                     norm_offset_z, imageSampler,
798                                                     expected, 0, &hasDenormals,
799                                                     lod);
800 
801                                             float err1 =
802                                                 ABS_ERROR(sRGBmap(resultPtr[0]),
803                                                           sRGBmap(expected[0]));
804                                             float err2 =
805                                                 ABS_ERROR(sRGBmap(resultPtr[1]),
806                                                           sRGBmap(expected[1]));
807                                             float err3 =
808                                                 ABS_ERROR(sRGBmap(resultPtr[2]),
809                                                           sRGBmap(expected[2]));
810                                             float err4 = ABS_ERROR(resultPtr[3],
811                                                                    expected[3]);
812                                             float maxErr = 0.6;
813 
814                                             if (!(err1 <= maxErr)
815                                                 || !(err2 <= maxErr)
816                                                 || !(err3 <= maxErr)
817                                                 || !(err4 <= maxErr))
818                                             {
819                                                 // Try flushing the denormals
820                                                 if (hasDenormals)
821                                                 {
822                                                     // If implementation decide
823                                                     // to flush subnormals to
824                                                     // zero, max error needs to
825                                                     // be adjusted
826                                                     maxErr += 4 * FLT_MIN;
827 
828                                                     maxPixel =
829                                                         sample_image_pixel_float(
830                                                             imagePtr, imageInfo,
831                                                             xOffsetValues[j],
832                                                             yOffsetValues[j],
833                                                             zOffsetValues[j],
834                                                             imageSampler,
835                                                             expected, 0, NULL,
836                                                             lod);
837 
838                                                     err1 = ABS_ERROR(
839                                                         sRGBmap(resultPtr[0]),
840                                                         sRGBmap(expected[0]));
841                                                     err2 = ABS_ERROR(
842                                                         sRGBmap(resultPtr[1]),
843                                                         sRGBmap(expected[1]));
844                                                     err3 = ABS_ERROR(
845                                                         sRGBmap(resultPtr[2]),
846                                                         sRGBmap(expected[2]));
847                                                     err4 =
848                                                         ABS_ERROR(resultPtr[3],
849                                                                   expected[3]);
850                                                 }
851                                             }
852 
853                                             if (!(err1 <= maxErr)
854                                                 || !(err2 <= maxErr)
855                                                 || !(err3 <= maxErr)
856                                                 || !(err4 <= maxErr))
857                                             {
858                                                 log_error(
859                                                     "FAILED norm_offsets: %g , "
860                                                     "%g , %g:\n",
861                                                     norm_offset_x,
862                                                     norm_offset_y,
863                                                     norm_offset_z);
864 
865                                                 float tempOut[4];
866                                                 shouldReturn |=
867                                                     determine_validation_error_offset<
868                                                         float>(
869                                                         imagePtr, imageInfo,
870                                                         imageSampler, resultPtr,
871                                                         expected, error,
872                                                         xOffsetValues[j],
873                                                         yOffsetValues[j],
874                                                         zOffsetValues[j],
875                                                         norm_offset_x,
876                                                         norm_offset_y,
877                                                         norm_offset_z, j,
878                                                         numTries, numClamped,
879                                                         true, lod);
880                                                 log_error("Step by step:\n");
881                                                 FloatPixel temp =
882                                                     sample_image_pixel_float_offset(
883                                                         imagePtr, imageInfo,
884                                                         xOffsetValues[j],
885                                                         yOffsetValues[j],
886                                                         zOffsetValues[j],
887                                                         norm_offset_x,
888                                                         norm_offset_y,
889                                                         norm_offset_z,
890                                                         imageSampler, tempOut,
891                                                         1 /*verbose*/,
892                                                         &hasDenormals, lod);
893                                                 log_error(
894                                                     "\tulps: %2.2f, %2.2f, "
895                                                     "%2.2f, %2.2f  (max "
896                                                     "allowed: %2.2f)\n\n",
897                                                     Ulp_Error(resultPtr[0],
898                                                               expected[0]),
899                                                     Ulp_Error(resultPtr[1],
900                                                               expected[1]),
901                                                     Ulp_Error(resultPtr[2],
902                                                               expected[2]),
903                                                     Ulp_Error(resultPtr[3],
904                                                               expected[3]),
905                                                     Ulp_Error(
906                                                         MAKE_HEX_FLOAT(
907                                                             0x1.000002p0f,
908                                                             0x1000002L, -24)
909                                                             + maxErr,
910                                                         MAKE_HEX_FLOAT(
911                                                             0x1.000002p0f,
912                                                             0x1000002L, -24)));
913                                             }
914                                             else
915                                             {
916                                                 log_error(
917                                                     "Test error: we should "
918                                                     "have detected this "
919                                                     "passing above.\n");
920                                             }
921                                         } // norm_offset_z
922                                     } // norm_offset_y
923                                 } // norm_offset_x
924                                 if (shouldReturn) return 1;
925                             } // if (!found_pixel)
926 
927                             resultPtr += 4;
928                         }
929                     }
930                 }
931             }
932             /*
933              * FLOAT output type
934              */
935             else if (outputType == kFloat)
936             {
937                 // Validate float results
938                 float *resultPtr = (float *)(char *)resultValues;
939                 float expected[4], error = 0.0f;
940                 float maxErr = get_max_relative_error(
941                     imageInfo->format, imageSampler, 1 /*3D*/,
942                     CL_FILTER_LINEAR == imageSampler->filter_mode);
943 
944                 for (size_t z = 0, j = 0; z < depth_lod; z++)
945                 {
946                     for (size_t y = 0; y < height_lod; y++)
947                     {
948                         for (size_t x = 0; x < width_lod; x++, j++)
949                         {
950                             // Step 1: go through and see if the results verify
951                             // for the pixel For the normalized case on a GPU we
952                             // put in offsets to the X, Y and Z to see if we
953                             // land on the right pixel. This addresses the
954                             // significant inaccuracy in GPU normalization in
955                             // OpenCL 1.0.
956                             int checkOnlyOnePixel = 0;
957                             int found_pixel = 0;
958                             float offset = NORM_OFFSET;
959                             if (!imageSampler->normalized_coords
960                                 || imageSampler->filter_mode
961                                     != CL_FILTER_NEAREST
962                                 || NORM_OFFSET == 0
963 #if defined(__APPLE__)
964                                 // Apple requires its CPU implementation to do
965                                 // correctly rounded address arithmetic in all
966                                 // modes
967                                 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
968 #endif
969                             )
970                                 offset = 0.0f; // Loop only once
971 
972                             for (float norm_offset_x = -offset;
973                                  norm_offset_x <= offset && !found_pixel;
974                                  norm_offset_x += NORM_OFFSET)
975                             {
976                                 for (float norm_offset_y = -offset;
977                                      norm_offset_y <= offset && !found_pixel;
978                                      norm_offset_y += NORM_OFFSET)
979                                 {
980                                     for (float norm_offset_z = -offset;
981                                          norm_offset_z <= NORM_OFFSET
982                                          && !found_pixel;
983                                          norm_offset_z += NORM_OFFSET)
984                                     {
985 
986                                         int hasDenormals = 0;
987                                         FloatPixel maxPixel =
988                                             sample_image_pixel_float_offset(
989                                                 imagePtr, imageInfo,
990                                                 xOffsetValues[j],
991                                                 yOffsetValues[j],
992                                                 zOffsetValues[j], norm_offset_x,
993                                                 norm_offset_y, norm_offset_z,
994                                                 imageSampler, expected, 0,
995                                                 &hasDenormals, lod);
996 
997                                         float err1 = ABS_ERROR(resultPtr[0],
998                                                                expected[0]);
999                                         float err2 = ABS_ERROR(resultPtr[1],
1000                                                                expected[1]);
1001                                         float err3 = ABS_ERROR(resultPtr[2],
1002                                                                expected[2]);
1003                                         float err4 = ABS_ERROR(resultPtr[3],
1004                                                                expected[3]);
1005                                         // Clamp to the minimum absolute error
1006                                         // for the format
1007                                         if (err1 > 0
1008                                             && err1 < formatAbsoluteError)
1009                                         {
1010                                             err1 = 0.0f;
1011                                         }
1012                                         if (err2 > 0
1013                                             && err2 < formatAbsoluteError)
1014                                         {
1015                                             err2 = 0.0f;
1016                                         }
1017                                         if (err3 > 0
1018                                             && err3 < formatAbsoluteError)
1019                                         {
1020                                             err3 = 0.0f;
1021                                         }
1022                                         if (err4 > 0
1023                                             && err4 < formatAbsoluteError)
1024                                         {
1025                                             err4 = 0.0f;
1026                                         }
1027                                         float maxErr1 = std::max(
1028                                             maxErr * maxPixel.p[0], FLT_MIN);
1029                                         float maxErr2 = std::max(
1030                                             maxErr * maxPixel.p[1], FLT_MIN);
1031                                         float maxErr3 = std::max(
1032                                             maxErr * maxPixel.p[2], FLT_MIN);
1033                                         float maxErr4 = std::max(
1034                                             maxErr * maxPixel.p[3], FLT_MIN);
1035 
1036                                         if (!(err1 <= maxErr1)
1037                                             || !(err2 <= maxErr2)
1038                                             || !(err3 <= maxErr3)
1039                                             || !(err4 <= maxErr4))
1040                                         {
1041                                             // Try flushing the denormals
1042                                             if (hasDenormals)
1043                                             {
1044                                                 // If implementation decide to
1045                                                 // flush subnormals to zero, max
1046                                                 // error needs to be adjusted
1047                                                 maxErr1 += 4 * FLT_MIN;
1048                                                 maxErr2 += 4 * FLT_MIN;
1049                                                 maxErr3 += 4 * FLT_MIN;
1050                                                 maxErr4 += 4 * FLT_MIN;
1051 
1052                                                 maxPixel =
1053                                                     sample_image_pixel_float_offset(
1054                                                         imagePtr, imageInfo,
1055                                                         xOffsetValues[j],
1056                                                         yOffsetValues[j],
1057                                                         zOffsetValues[j],
1058                                                         norm_offset_x,
1059                                                         norm_offset_y,
1060                                                         norm_offset_z,
1061                                                         imageSampler, expected,
1062                                                         0, NULL, lod);
1063 
1064                                                 err1 = ABS_ERROR(resultPtr[0],
1065                                                                  expected[0]);
1066                                                 err2 = ABS_ERROR(resultPtr[1],
1067                                                                  expected[1]);
1068                                                 err3 = ABS_ERROR(resultPtr[2],
1069                                                                  expected[2]);
1070                                                 err4 = ABS_ERROR(resultPtr[3],
1071                                                                  expected[3]);
1072                                             }
1073                                         }
1074 
1075                                         found_pixel = (err1 <= maxErr1)
1076                                             && (err2 <= maxErr2)
1077                                             && (err3 <= maxErr3)
1078                                             && (err4 <= maxErr4);
1079                                     } // norm_offset_z
1080                                 } // norm_offset_y
1081                             } // norm_offset_x
1082 
1083                             // Step 2: If we did not find a match, then print
1084                             // out debugging info.
1085                             if (!found_pixel)
1086                             {
1087                                 // For the normalized case on a GPU we put in
1088                                 // offsets to the X and Y to see if we land on
1089                                 // the right pixel. This addresses the
1090                                 // significant inaccuracy in GPU normalization
1091                                 // in OpenCL 1.0.
1092                                 checkOnlyOnePixel = 0;
1093                                 int shouldReturn = 0;
1094                                 for (float norm_offset_x = -offset;
1095                                      norm_offset_x <= offset
1096                                      && !checkOnlyOnePixel;
1097                                      norm_offset_x += NORM_OFFSET)
1098                                 {
1099                                     for (float norm_offset_y = -offset;
1100                                          norm_offset_y <= offset
1101                                          && !checkOnlyOnePixel;
1102                                          norm_offset_y += NORM_OFFSET)
1103                                     {
1104                                         for (float norm_offset_z = -offset;
1105                                              norm_offset_z <= offset
1106                                              && !checkOnlyOnePixel;
1107                                              norm_offset_z += NORM_OFFSET)
1108                                         {
1109 
1110                                             int hasDenormals = 0;
1111                                             FloatPixel maxPixel =
1112                                                 sample_image_pixel_float_offset(
1113                                                     imagePtr, imageInfo,
1114                                                     xOffsetValues[j],
1115                                                     yOffsetValues[j],
1116                                                     zOffsetValues[j],
1117                                                     norm_offset_x,
1118                                                     norm_offset_y,
1119                                                     norm_offset_z, imageSampler,
1120                                                     expected, 0, &hasDenormals,
1121                                                     lod);
1122 
1123                                             float err1 = ABS_ERROR(resultPtr[0],
1124                                                                    expected[0]);
1125                                             float err2 = ABS_ERROR(resultPtr[1],
1126                                                                    expected[1]);
1127                                             float err3 = ABS_ERROR(resultPtr[2],
1128                                                                    expected[2]);
1129                                             float err4 = ABS_ERROR(resultPtr[3],
1130                                                                    expected[3]);
1131                                             float maxErr1 =
1132                                                 std::max(maxErr * maxPixel.p[0],
1133                                                          FLT_MIN);
1134                                             float maxErr2 =
1135                                                 std::max(maxErr * maxPixel.p[1],
1136                                                          FLT_MIN);
1137                                             float maxErr3 =
1138                                                 std::max(maxErr * maxPixel.p[2],
1139                                                          FLT_MIN);
1140                                             float maxErr4 =
1141                                                 std::max(maxErr * maxPixel.p[3],
1142                                                          FLT_MIN);
1143 
1144 
1145                                             if (!(err1 <= maxErr1)
1146                                                 || !(err2 <= maxErr2)
1147                                                 || !(err3 <= maxErr3)
1148                                                 || !(err4 <= maxErr4))
1149                                             {
1150                                                 // Try flushing the denormals
1151                                                 if (hasDenormals)
1152                                                 {
1153                                                     maxErr1 += 4 * FLT_MIN;
1154                                                     maxErr2 += 4 * FLT_MIN;
1155                                                     maxErr3 += 4 * FLT_MIN;
1156                                                     maxErr4 += 4 * FLT_MIN;
1157 
1158                                                     maxPixel =
1159                                                         sample_image_pixel_float(
1160                                                             imagePtr, imageInfo,
1161                                                             xOffsetValues[j],
1162                                                             yOffsetValues[j],
1163                                                             zOffsetValues[j],
1164                                                             imageSampler,
1165                                                             expected, 0, NULL,
1166                                                             lod);
1167 
1168                                                     err1 =
1169                                                         ABS_ERROR(resultPtr[0],
1170                                                                   expected[0]);
1171                                                     err2 =
1172                                                         ABS_ERROR(resultPtr[1],
1173                                                                   expected[1]);
1174                                                     err3 =
1175                                                         ABS_ERROR(resultPtr[2],
1176                                                                   expected[2]);
1177                                                     err4 =
1178                                                         ABS_ERROR(resultPtr[3],
1179                                                                   expected[3]);
1180                                                 }
1181                                             }
1182 
1183                                             if (!(err1 <= maxErr1)
1184                                                 || !(err2 <= maxErr2)
1185                                                 || !(err3 <= maxErr3)
1186                                                 || !(err4 <= maxErr4))
1187                                             {
1188                                                 log_error(
1189                                                     "FAILED norm_offsets: %g , "
1190                                                     "%g , %g:\n",
1191                                                     norm_offset_x,
1192                                                     norm_offset_y,
1193                                                     norm_offset_z);
1194 
1195                                                 float tempOut[4];
1196                                                 shouldReturn |=
1197                                                     determine_validation_error_offset<
1198                                                         float>(
1199                                                         imagePtr, imageInfo,
1200                                                         imageSampler, resultPtr,
1201                                                         expected, error,
1202                                                         xOffsetValues[j],
1203                                                         yOffsetValues[j],
1204                                                         zOffsetValues[j],
1205                                                         norm_offset_x,
1206                                                         norm_offset_y,
1207                                                         norm_offset_z, j,
1208                                                         numTries, numClamped,
1209                                                         true, lod);
1210                                                 log_error("Step by step:\n");
1211                                                 FloatPixel temp =
1212                                                     sample_image_pixel_float_offset(
1213                                                         imagePtr, imageInfo,
1214                                                         xOffsetValues[j],
1215                                                         yOffsetValues[j],
1216                                                         zOffsetValues[j],
1217                                                         norm_offset_x,
1218                                                         norm_offset_y,
1219                                                         norm_offset_z,
1220                                                         imageSampler, tempOut,
1221                                                         1 /*verbose*/,
1222                                                         &hasDenormals, lod);
1223                                                 log_error(
1224                                                     "\tulps: %2.2f, %2.2f, "
1225                                                     "%2.2f, %2.2f  (max "
1226                                                     "allowed: %2.2f)\n\n",
1227                                                     Ulp_Error(resultPtr[0],
1228                                                               expected[0]),
1229                                                     Ulp_Error(resultPtr[1],
1230                                                               expected[1]),
1231                                                     Ulp_Error(resultPtr[2],
1232                                                               expected[2]),
1233                                                     Ulp_Error(resultPtr[3],
1234                                                               expected[3]),
1235                                                     Ulp_Error(
1236                                                         MAKE_HEX_FLOAT(
1237                                                             0x1.000002p0f,
1238                                                             0x1000002L, -24)
1239                                                             + maxErr,
1240                                                         MAKE_HEX_FLOAT(
1241                                                             0x1.000002p0f,
1242                                                             0x1000002L, -24)));
1243                                             }
1244                                             else
1245                                             {
1246                                                 log_error(
1247                                                     "Test error: we should "
1248                                                     "have detected this "
1249                                                     "passing above.\n");
1250                                             }
1251                                         } // norm_offset_z
1252                                     } // norm_offset_y
1253                                 } // norm_offset_x
1254                                 if (shouldReturn) return 1;
1255                             } // if (!found_pixel)
1256 
1257                             resultPtr += 4;
1258                         }
1259                     }
1260                 }
1261             }
1262             /*
1263              * UINT output type
1264              */
1265             else if (outputType == kUInt)
1266             {
1267                 // Validate unsigned integer results
1268                 unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
1269                 unsigned int expected[4];
1270                 float error;
1271                 for (size_t z = 0, j = 0; z < depth_lod; z++)
1272                 {
1273                     for (size_t y = 0; y < height_lod; y++)
1274                     {
1275                         for (size_t x = 0; x < width_lod; x++, j++)
1276                         {
1277                             // Step 1: go through and see if the results verify
1278                             // for the pixel For the normalized case on a GPU we
1279                             // put in offsets to the X, Y and Z to see if we
1280                             // land on the right pixel. This addresses the
1281                             // significant inaccuracy in GPU normalization in
1282                             // OpenCL 1.0.
1283                             int checkOnlyOnePixel = 0;
1284                             int found_pixel = 0;
1285                             for (float norm_offset_x = -NORM_OFFSET;
1286                                  norm_offset_x <= NORM_OFFSET && !found_pixel
1287                                  && !checkOnlyOnePixel;
1288                                  norm_offset_x += NORM_OFFSET)
1289                             {
1290                                 for (float norm_offset_y = -NORM_OFFSET;
1291                                      norm_offset_y <= NORM_OFFSET
1292                                      && !found_pixel && !checkOnlyOnePixel;
1293                                      norm_offset_y += NORM_OFFSET)
1294                                 {
1295                                     for (float norm_offset_z = -NORM_OFFSET;
1296                                          norm_offset_z <= NORM_OFFSET
1297                                          && !found_pixel && !checkOnlyOnePixel;
1298                                          norm_offset_z += NORM_OFFSET)
1299                                     {
1300 
1301                                         // If we are not on a GPU, or we are not
1302                                         // normalized, then only test with
1303                                         // offsets (0.0, 0.0) E.g., test one
1304                                         // pixel.
1305                                         if (!imageSampler->normalized_coords
1306                                             || !(gDeviceType
1307                                                  & CL_DEVICE_TYPE_GPU)
1308                                             || NORM_OFFSET == 0)
1309                                         {
1310                                             norm_offset_x = 0.0f;
1311                                             norm_offset_y = 0.0f;
1312                                             norm_offset_z = 0.0f;
1313                                             checkOnlyOnePixel = 1;
1314                                         }
1315 
1316                                         sample_image_pixel_offset<unsigned int>(
1317                                             imagePtr, imageInfo,
1318                                             xOffsetValues[j], yOffsetValues[j],
1319                                             zOffsetValues[j], norm_offset_x,
1320                                             norm_offset_y, norm_offset_z,
1321                                             imageSampler, expected, lod);
1322 
1323                                         error = errMax(
1324                                             errMax(abs_diff_uint(expected[0],
1325                                                                  resultPtr[0]),
1326                                                    abs_diff_uint(expected[1],
1327                                                                  resultPtr[1])),
1328                                             errMax(
1329                                                 abs_diff_uint(expected[2],
1330                                                               resultPtr[2]),
1331                                                 abs_diff_uint(expected[3],
1332                                                               resultPtr[3])));
1333 
1334                                         if (error < MAX_ERR) found_pixel = 1;
1335                                     } // norm_offset_z
1336                                 } // norm_offset_y
1337                             } // norm_offset_x
1338 
1339                             // Step 2: If we did not find a match, then print
1340                             // out debugging info.
1341                             if (!found_pixel)
1342                             {
1343                                 // For the normalized case on a GPU we put in
1344                                 // offsets to the X and Y to see if we land on
1345                                 // the right pixel. This addresses the
1346                                 // significant inaccuracy in GPU normalization
1347                                 // in OpenCL 1.0.
1348                                 checkOnlyOnePixel = 0;
1349                                 int shouldReturn = 0;
1350                                 for (float norm_offset_x = -NORM_OFFSET;
1351                                      norm_offset_x <= NORM_OFFSET
1352                                      && !checkOnlyOnePixel;
1353                                      norm_offset_x += NORM_OFFSET)
1354                                 {
1355                                     for (float norm_offset_y = -NORM_OFFSET;
1356                                          norm_offset_y <= NORM_OFFSET
1357                                          && !checkOnlyOnePixel;
1358                                          norm_offset_y += NORM_OFFSET)
1359                                     {
1360                                         for (float norm_offset_z = -NORM_OFFSET;
1361                                              norm_offset_z <= NORM_OFFSET
1362                                              && !checkOnlyOnePixel;
1363                                              norm_offset_z += NORM_OFFSET)
1364                                         {
1365 
1366                                             // If we are not on a GPU, or we are
1367                                             // not normalized, then only test
1368                                             // with offsets (0.0, 0.0) E.g.,
1369                                             // test one pixel.
1370                                             if (!imageSampler->normalized_coords
1371                                                 || gDeviceType
1372                                                     != CL_DEVICE_TYPE_GPU
1373                                                 || NORM_OFFSET == 0)
1374                                             {
1375                                                 norm_offset_x = 0.0f;
1376                                                 norm_offset_y = 0.0f;
1377                                                 norm_offset_z = 0.0f;
1378                                                 checkOnlyOnePixel = 1;
1379                                             }
1380 
1381                                             sample_image_pixel_offset<
1382                                                 unsigned int>(
1383                                                 imagePtr, imageInfo,
1384                                                 xOffsetValues[j],
1385                                                 yOffsetValues[j],
1386                                                 zOffsetValues[j], norm_offset_x,
1387                                                 norm_offset_y, norm_offset_z,
1388                                                 imageSampler, expected, lod);
1389 
1390                                             error = errMax(
1391                                                 errMax(
1392                                                     abs_diff_uint(expected[0],
1393                                                                   resultPtr[0]),
1394                                                     abs_diff_uint(
1395                                                         expected[1],
1396                                                         resultPtr[1])),
1397                                                 errMax(
1398                                                     abs_diff_uint(expected[2],
1399                                                                   resultPtr[2]),
1400                                                     abs_diff_uint(
1401                                                         expected[3],
1402                                                         resultPtr[3])));
1403 
1404                                             if (error > MAX_ERR)
1405                                             {
1406                                                 log_error(
1407                                                     "FAILED norm_offsets: %g , "
1408                                                     "%g , %g:\n",
1409                                                     norm_offset_x,
1410                                                     norm_offset_y,
1411                                                     norm_offset_z);
1412                                                 shouldReturn |=
1413                                                     determine_validation_error_offset<
1414                                                         unsigned int>(
1415                                                         imagePtr, imageInfo,
1416                                                         imageSampler, resultPtr,
1417                                                         expected, error,
1418                                                         xOffsetValues[j],
1419                                                         yOffsetValues[j],
1420                                                         zOffsetValues[j],
1421                                                         norm_offset_x,
1422                                                         norm_offset_y,
1423                                                         norm_offset_z, j,
1424                                                         numTries, numClamped,
1425                                                         false, lod);
1426                                             }
1427                                             else
1428                                             {
1429                                                 log_error(
1430                                                     "Test error: we should "
1431                                                     "have detected this "
1432                                                     "passing above.\n");
1433                                             }
1434                                         } // norm_offset_z
1435                                     } // norm_offset_y
1436                                 } // norm_offset_x
1437                                 if (shouldReturn) return 1;
1438                             } // if (!found_pixel)
1439 
1440                             resultPtr += 4;
1441                         }
1442                     }
1443                 }
1444             }
1445             else
1446             /*
1447              * INT output type
1448              */
1449             {
1450                 // Validate integer results
1451                 int *resultPtr = (int *)(char *)resultValues;
1452                 int expected[4];
1453                 float error;
1454                 for (size_t z = 0, j = 0; z < depth_lod; z++)
1455                 {
1456                     for (size_t y = 0; y < height_lod; y++)
1457                     {
1458                         for (size_t x = 0; x < width_lod; x++, j++)
1459                         {
1460                             // Step 1: go through and see if the results verify
1461                             // for the pixel For the normalized case on a GPU we
1462                             // put in offsets to the X, Y and Z to see if we
1463                             // land on the right pixel. This addresses the
1464                             // significant inaccuracy in GPU normalization in
1465                             // OpenCL 1.0.
1466                             int checkOnlyOnePixel = 0;
1467                             int found_pixel = 0;
1468                             for (float norm_offset_x = -NORM_OFFSET;
1469                                  norm_offset_x <= NORM_OFFSET && !found_pixel
1470                                  && !checkOnlyOnePixel;
1471                                  norm_offset_x += NORM_OFFSET)
1472                             {
1473                                 for (float norm_offset_y = -NORM_OFFSET;
1474                                      norm_offset_y <= NORM_OFFSET
1475                                      && !found_pixel && !checkOnlyOnePixel;
1476                                      norm_offset_y += NORM_OFFSET)
1477                                 {
1478                                     for (float norm_offset_z = -NORM_OFFSET;
1479                                          norm_offset_z <= NORM_OFFSET
1480                                          && !found_pixel && !checkOnlyOnePixel;
1481                                          norm_offset_z += NORM_OFFSET)
1482                                     {
1483 
1484                                         // If we are not on a GPU, or we are not
1485                                         // normalized, then only test with
1486                                         // offsets (0.0, 0.0) E.g., test one
1487                                         // pixel.
1488                                         if (!imageSampler->normalized_coords
1489                                             || !(gDeviceType
1490                                                  & CL_DEVICE_TYPE_GPU)
1491                                             || NORM_OFFSET == 0)
1492                                         {
1493                                             norm_offset_x = 0.0f;
1494                                             norm_offset_y = 0.0f;
1495                                             norm_offset_z = 0.0f;
1496                                             checkOnlyOnePixel = 1;
1497                                         }
1498 
1499                                         sample_image_pixel_offset<int>(
1500                                             imagePtr, imageInfo,
1501                                             xOffsetValues[j], yOffsetValues[j],
1502                                             zOffsetValues[j], norm_offset_x,
1503                                             norm_offset_y, norm_offset_z,
1504                                             imageSampler, expected, lod);
1505 
1506                                         error = errMax(
1507                                             errMax(abs_diff_int(expected[0],
1508                                                                 resultPtr[0]),
1509                                                    abs_diff_int(expected[1],
1510                                                                 resultPtr[1])),
1511                                             errMax(abs_diff_int(expected[2],
1512                                                                 resultPtr[2]),
1513                                                    abs_diff_int(expected[3],
1514                                                                 resultPtr[3])));
1515 
1516                                         if (error < MAX_ERR) found_pixel = 1;
1517                                     } // norm_offset_z
1518                                 } // norm_offset_y
1519                             } // norm_offset_x
1520 
1521                             // Step 2: If we did not find a match, then print
1522                             // out debugging info.
1523                             if (!found_pixel)
1524                             {
1525                                 // For the normalized case on a GPU we put in
1526                                 // offsets to the X and Y to see if we land on
1527                                 // the right pixel. This addresses the
1528                                 // significant inaccuracy in GPU normalization
1529                                 // in OpenCL 1.0.
1530                                 checkOnlyOnePixel = 0;
1531                                 int shouldReturn = 0;
1532                                 for (float norm_offset_x = -NORM_OFFSET;
1533                                      norm_offset_x <= NORM_OFFSET
1534                                      && !checkOnlyOnePixel;
1535                                      norm_offset_x += NORM_OFFSET)
1536                                 {
1537                                     for (float norm_offset_y = -NORM_OFFSET;
1538                                          norm_offset_y <= NORM_OFFSET
1539                                          && !checkOnlyOnePixel;
1540                                          norm_offset_y += NORM_OFFSET)
1541                                     {
1542                                         for (float norm_offset_z = -NORM_OFFSET;
1543                                              norm_offset_z <= NORM_OFFSET
1544                                              && !checkOnlyOnePixel;
1545                                              norm_offset_z += NORM_OFFSET)
1546                                         {
1547 
1548                                             // If we are not on a GPU, or we are
1549                                             // not normalized, then only test
1550                                             // with offsets (0.0, 0.0) E.g.,
1551                                             // test one pixel.
1552                                             if (!imageSampler->normalized_coords
1553                                                 || gDeviceType
1554                                                     != CL_DEVICE_TYPE_GPU
1555                                                 || NORM_OFFSET == 0
1556                                                 || NORM_OFFSET == 0
1557                                                 || NORM_OFFSET == 0)
1558                                             {
1559                                                 norm_offset_x = 0.0f;
1560                                                 norm_offset_y = 0.0f;
1561                                                 norm_offset_z = 0.0f;
1562                                                 checkOnlyOnePixel = 1;
1563                                             }
1564 
1565                                             sample_image_pixel_offset<int>(
1566                                                 imagePtr, imageInfo,
1567                                                 xOffsetValues[j],
1568                                                 yOffsetValues[j],
1569                                                 zOffsetValues[j], norm_offset_x,
1570                                                 norm_offset_y, norm_offset_z,
1571                                                 imageSampler, expected, lod);
1572 
1573                                             error = errMax(
1574                                                 errMax(
1575                                                     abs_diff_int(expected[0],
1576                                                                  resultPtr[0]),
1577                                                     abs_diff_int(expected[1],
1578                                                                  resultPtr[1])),
1579                                                 errMax(
1580                                                     abs_diff_int(expected[2],
1581                                                                  resultPtr[2]),
1582                                                     abs_diff_int(
1583                                                         expected[3],
1584                                                         resultPtr[3])));
1585 
1586                                             if (error > MAX_ERR)
1587                                             {
1588                                                 log_error(
1589                                                     "FAILED norm_offsets: %g , "
1590                                                     "%g , %g:\n",
1591                                                     norm_offset_x,
1592                                                     norm_offset_y,
1593                                                     norm_offset_z);
1594                                                 shouldReturn |=
1595                                                     determine_validation_error_offset<
1596                                                         int>(
1597                                                         imagePtr, imageInfo,
1598                                                         imageSampler, resultPtr,
1599                                                         expected, error,
1600                                                         xOffsetValues[j],
1601                                                         yOffsetValues[j],
1602                                                         zOffsetValues[j],
1603                                                         norm_offset_x,
1604                                                         norm_offset_y,
1605                                                         norm_offset_z, j,
1606                                                         numTries, numClamped,
1607                                                         false, lod);
1608                                             }
1609                                             else
1610                                             {
1611                                                 log_error(
1612                                                     "Test error: we should "
1613                                                     "have detected this "
1614                                                     "passing above.\n");
1615                                             }
1616                                         } // norm_offset_z
1617                                     } // norm_offset_y
1618                                 } // norm_offset_x
1619                                 if (shouldReturn) return 1;
1620                             } // if (!found_pixel)
1621 
1622                             resultPtr += 4;
1623                         }
1624                     }
1625                 }
1626             }
1627         }
1628         {
1629             nextLevelOffset += width_lod * height_lod * depth_lod
1630                 * get_pixel_size(imageInfo->format);
1631             width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
1632             if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
1633             {
1634                 height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
1635             }
1636             if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
1637             {
1638                 depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1;
1639             }
1640         }
1641     }
1642 
1643     return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
1644 }
1645 
filter_undefined_bits(image_descriptor * imageInfo,char * resultPtr)1646 void filter_undefined_bits(image_descriptor *imageInfo, char *resultPtr)
1647 {
1648     // mask off the top bit (bit 15) if the image format is (CL_UNORM_SHORT_555,
1649     // CL_RGB). (Note: OpenCL says: the top bit is undefined meaning it can be
1650     // either 0 or 1.)
1651     if (imageInfo->format->image_channel_data_type == CL_UNORM_SHORT_555)
1652     {
1653         cl_ushort *temp = (cl_ushort *)resultPtr;
1654         temp[0] &= 0x7fff;
1655     }
1656 }
1657 
filter_rounding_errors(int forceCorrectlyRoundedWrites,image_descriptor * imageInfo,float * errors)1658 int filter_rounding_errors(int forceCorrectlyRoundedWrites,
1659                            image_descriptor *imageInfo, float *errors)
1660 {
1661     // We are allowed 0.6 absolute error vs. infinitely precise for some
1662     // normalized formats
1663     if (0 == forceCorrectlyRoundedWrites
1664         && (imageInfo->format->image_channel_data_type == CL_UNORM_INT8
1665             || imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010
1666             || imageInfo->format->image_channel_data_type == CL_UNORM_INT16
1667             || imageInfo->format->image_channel_data_type == CL_SNORM_INT8
1668             || imageInfo->format->image_channel_data_type == CL_SNORM_INT16
1669             || imageInfo->format->image_channel_data_type == CL_UNORM_SHORT_555
1670             || imageInfo->format->image_channel_data_type
1671                 == CL_UNORM_SHORT_565))
1672     {
1673         if (!(fabsf(errors[0]) > 0.6f) && !(fabsf(errors[1]) > 0.6f)
1674             && !(fabsf(errors[2]) > 0.6f) && !(fabsf(errors[3]) > 0.6f))
1675             return 0;
1676     }
1677 
1678     return 1;
1679 }
1680