1 //
2 // Copyright (c) 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "test_common.h"
18
19 #include <algorithm>
20
create_sampler(cl_context context,image_sampler_data * sdata,bool test_mipmaps,cl_int * error)21 cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool test_mipmaps, cl_int *error) {
22 cl_sampler sampler = nullptr;
23 if (test_mipmaps) {
24 cl_sampler_properties properties[] = {
25 CL_SAMPLER_NORMALIZED_COORDS, sdata->normalized_coords,
26 CL_SAMPLER_ADDRESSING_MODE, sdata->addressing_mode,
27 CL_SAMPLER_FILTER_MODE, sdata->filter_mode,
28 CL_SAMPLER_MIP_FILTER_MODE, sdata->filter_mode,
29 0};
30 sampler = clCreateSamplerWithProperties(context, properties, error);
31 } else {
32 sampler = clCreateSampler(context, sdata->normalized_coords, sdata->addressing_mode, sdata->filter_mode, error);
33 }
34 return sampler;
35 }
36
get_image_dimensions(image_descriptor * imageInfo,size_t & width,size_t & height,size_t & depth)37 bool get_image_dimensions(image_descriptor *imageInfo, size_t &width,
38 size_t &height, size_t &depth)
39 {
40 width = imageInfo->width;
41 height = 1;
42 depth = 1;
43 switch (imageInfo->type)
44 {
45 case CL_MEM_OBJECT_IMAGE1D: break;
46 case CL_MEM_OBJECT_IMAGE1D_ARRAY: height = imageInfo->arraySize; break;
47 case CL_MEM_OBJECT_IMAGE2D: height = imageInfo->height; break;
48 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
49 height = imageInfo->height;
50 depth = imageInfo->arraySize;
51 break;
52 case CL_MEM_OBJECT_IMAGE3D:
53 height = imageInfo->height;
54 depth = imageInfo->depth;
55 break;
56 default:
57 log_error("ERROR: Test does not support image type");
58 return TEST_FAIL;
59 }
60 return 0;
61 }
62
InitFloatCoordsCommon(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float * zOffsets,float xfract,float yfract,float zfract,int normalized_coords,MTdata d,int lod)63 static bool InitFloatCoordsCommon(image_descriptor *imageInfo,
64 image_sampler_data *imageSampler,
65 float *xOffsets, float *yOffsets,
66 float *zOffsets, float xfract, float yfract,
67 float zfract, int normalized_coords, MTdata d,
68 int lod)
69 {
70 size_t i = 0;
71 size_t width_loop, height_loop, depth_loop;
72 bool error =
73 get_image_dimensions(imageInfo, width_loop, height_loop, depth_loop);
74 if (!error)
75 {
76 if (gDisableOffsets)
77 {
78 for (size_t z = 0; z < depth_loop; z++)
79 {
80 for (size_t y = 0; y < height_loop; y++)
81 {
82 for (size_t x = 0; x < width_loop; x++, i++)
83 {
84 xOffsets[i] = (float)(xfract + (double)x);
85 yOffsets[i] = (float)(yfract + (double)y);
86 zOffsets[i] = (float)(zfract + (double)z);
87 }
88 }
89 }
90 }
91 else
92 {
93 for (size_t z = 0; z < depth_loop; z++)
94 {
95 for (size_t y = 0; y < height_loop; y++)
96 {
97 for (size_t x = 0; x < width_loop; x++, i++)
98 {
99 xOffsets[i] =
100 (float)(xfract
101 + (double)((int)x
102 + random_in_range(-10, 10, d)));
103 yOffsets[i] =
104 (float)(yfract
105 + (double)((int)y
106 + random_in_range(-10, 10, d)));
107 zOffsets[i] =
108 (float)(zfract
109 + (double)((int)z
110 + random_in_range(-10, 10, d)));
111 }
112 }
113 }
114 }
115
116 if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
117 {
118 i = 0;
119 for (size_t z = 0; z < depth_loop; z++)
120 {
121 for (size_t y = 0; y < height_loop; y++)
122 {
123 for (size_t x = 0; x < width_loop; x++, i++)
124 {
125 xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0,
126 (double)width_loop - 1.0);
127 yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0,
128 (double)height_loop - 1.0);
129 zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0,
130 (double)depth_loop - 1.0);
131 }
132 }
133 }
134 }
135
136 if (normalized_coords || gTestMipmaps)
137 {
138 i = 0;
139 if (lod == 0)
140 {
141 for (size_t z = 0; z < depth_loop; z++)
142 {
143 for (size_t y = 0; y < height_loop; y++)
144 {
145 for (size_t x = 0; x < width_loop; x++, i++)
146 {
147 xOffsets[i] = (float)((double)xOffsets[i]
148 / (double)width_loop);
149 if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
150 {
151 yOffsets[i] = (float)((double)yOffsets[i]
152 / (double)height_loop);
153 }
154 if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
155 {
156 zOffsets[i] = (float)((double)zOffsets[i]
157 / (double)depth_loop);
158 }
159 }
160 }
161 }
162 }
163 else if (gTestMipmaps)
164 {
165 size_t width_lod =
166 (width_loop >> lod) ? (width_loop >> lod) : 1;
167 size_t height_lod = height_loop;
168 size_t depth_lod = depth_loop;
169 if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
170 {
171 height_lod =
172 (height_loop >> lod) ? (height_loop >> lod) : 1;
173 }
174 if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
175 {
176 depth_lod = (depth_loop >> lod) ? (depth_loop >> lod) : 1;
177 }
178
179 for (size_t z = 0; z < depth_lod; z++)
180 {
181 for (size_t y = 0; y < height_lod; y++)
182 {
183 for (size_t x = 0; x < width_lod; x++, i++)
184 {
185 xOffsets[i] = (float)((double)xOffsets[i]
186 / (double)width_lod);
187 if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
188 {
189 yOffsets[i] = (float)((double)yOffsets[i]
190 / (double)height_lod);
191 }
192 if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
193 {
194 zOffsets[i] = (float)((double)zOffsets[i]
195 / (double)depth_lod);
196 }
197 }
198 }
199 }
200 }
201 }
202 }
203 return error;
204 }
205
create_image_of_type(cl_context context,cl_mem_flags mem_flags,image_descriptor * imageInfo,size_t row_pitch,size_t slice_pitch,void * host_ptr,cl_int * error)206 cl_mem create_image_of_type(cl_context context, cl_mem_flags mem_flags,
207 image_descriptor *imageInfo, size_t row_pitch,
208 size_t slice_pitch, void *host_ptr, cl_int *error)
209 {
210 cl_mem image;
211 switch (imageInfo->type)
212 {
213 case CL_MEM_OBJECT_IMAGE3D:
214 image = create_image_3d(context, mem_flags, imageInfo->format,
215 imageInfo->width, imageInfo->height,
216 imageInfo->depth, row_pitch, slice_pitch,
217 host_ptr, error);
218 break;
219 default:
220 log_error("Implementation is incomplete, only 3D images are "
221 "supported so far");
222 return nullptr;
223 }
224 return image;
225 }
226
get_image_num_pixels(image_descriptor * imageInfo,size_t width,size_t height,size_t depth,size_t array_size)227 static size_t get_image_num_pixels(image_descriptor *imageInfo, size_t width,
228 size_t height, size_t depth,
229 size_t array_size)
230 {
231 size_t image_size;
232 switch (imageInfo->type)
233 {
234 case CL_MEM_OBJECT_IMAGE3D: image_size = width * height * depth; break;
235 default:
236 log_error("Implementation is incomplete, only 3D images are "
237 "supported so far");
238 return 0;
239 }
240 return image_size;
241 }
242
test_read_image(cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,image_sampler_data * imageSampler,bool useFloatCoords,ExplicitType outputType,MTdata d)243 int test_read_image(cl_context context, cl_command_queue queue,
244 cl_kernel kernel, image_descriptor *imageInfo,
245 image_sampler_data *imageSampler, bool useFloatCoords,
246 ExplicitType outputType, MTdata d)
247 {
248 int error;
249 size_t threads[3];
250 static int initHalf = 0;
251
252 size_t image_size =
253 get_image_num_pixels(imageInfo, imageInfo->width, imageInfo->height,
254 imageInfo->depth, imageInfo->arraySize);
255 test_assert_error(0 != image_size, "Invalid image size");
256 size_t width_size, height_size, depth_size;
257 if (get_image_dimensions(imageInfo, width_size, height_size, depth_size))
258 {
259 log_error("ERROR: invalid image dimensions");
260 return CL_INVALID_VALUE;
261 }
262
263 cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY;
264
265 clMemWrapper xOffsets, yOffsets, zOffsets, results;
266 clSamplerWrapper actualSampler;
267 BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
268
269 // Create offset data
270 BufferOwningPtr<cl_float> xOffsetValues(
271 malloc(sizeof(cl_float) * image_size));
272 BufferOwningPtr<cl_float> yOffsetValues(
273 malloc(sizeof(cl_float) * image_size));
274 BufferOwningPtr<cl_float> zOffsetValues(
275 malloc(sizeof(cl_float) * image_size));
276
277 if (imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
278 if (DetectFloatToHalfRoundingMode(queue)) return 1;
279
280 BufferOwningPtr<char> imageValues;
281 generate_random_image_data(imageInfo, imageValues, d);
282
283 // Construct testing sources
284 clProtectedImage protImage;
285 clMemWrapper unprotImage;
286 cl_mem image;
287
288 if (gtestTypesToRun & kReadTests)
289 {
290 image_read_write_flags = CL_MEM_READ_ONLY;
291 }
292 else
293 {
294 image_read_write_flags = CL_MEM_READ_WRITE;
295 }
296
297 if (gMemFlagsToUse == CL_MEM_USE_HOST_PTR)
298 {
299 // clProtectedImage uses USE_HOST_PTR, so just rely on that for the
300 // testing (via Ian) Do not use protected images for max image size test
301 // since it rounds the row size to a page size
302 if (gTestMaxImages)
303 {
304 generate_random_image_data(imageInfo,
305 maxImageUseHostPtrBackingStore, d);
306 unprotImage = create_image_of_type(
307 context, image_read_write_flags | CL_MEM_USE_HOST_PTR,
308 imageInfo, (gEnablePitch ? imageInfo->rowPitch : 0),
309 (gEnablePitch ? imageInfo->slicePitch : 0),
310 maxImageUseHostPtrBackingStore, &error);
311 }
312 else
313 {
314 error = protImage.Create(context, imageInfo->type,
315 image_read_write_flags, imageInfo->format,
316 imageInfo->width, imageInfo->height,
317 imageInfo->depth, imageInfo->arraySize);
318 }
319 if (error != CL_SUCCESS)
320 {
321 log_error("ERROR: Unable to create image of size %d x %d x %d x %d "
322 "(pitch %d, %d ) (%s)",
323 (int)imageInfo->width, (int)imageInfo->height,
324 (int)imageInfo->depth, (int)imageInfo->arraySize,
325 (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
326 IGetErrorString(error));
327 return error;
328 }
329 if (gTestMaxImages)
330 image = (cl_mem)unprotImage;
331 else
332 image = (cl_mem)protImage;
333 }
334 else if (gMemFlagsToUse == CL_MEM_COPY_HOST_PTR)
335 {
336 // Don't use clEnqueueWriteImage; just use copy host ptr to get the data
337 // in
338 unprotImage = create_image_of_type(
339 context, image_read_write_flags | CL_MEM_COPY_HOST_PTR, imageInfo,
340 (gEnablePitch ? imageInfo->rowPitch : 0),
341 (gEnablePitch ? imageInfo->slicePitch : 0), imageValues, &error);
342 if (error != CL_SUCCESS)
343 {
344 log_error("ERROR: Unable to create image of size %d x %d x %d x %d "
345 "(pitch %d, %d ) (%s)",
346 (int)imageInfo->width, (int)imageInfo->height,
347 (int)imageInfo->depth, (int)imageInfo->arraySize,
348 (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
349 IGetErrorString(error));
350 return error;
351 }
352 image = unprotImage;
353 }
354 else // Either CL_MEM_ALLOC_HOST_PTR or none
355 {
356 // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can
357 // be accessed by the host, but otherwise it works just as if no flag is
358 // specified, so we just do the same thing either way
359 if (!gTestMipmaps)
360 {
361 unprotImage = create_image_of_type(
362 context, image_read_write_flags | gMemFlagsToUse, imageInfo,
363 (gEnablePitch ? imageInfo->rowPitch : 0),
364 (gEnablePitch ? imageInfo->slicePitch : 0), imageValues,
365 &error);
366 if (error != CL_SUCCESS)
367 {
368 log_error("ERROR: Unable to create image of size %d x %d x "
369 "%d x %d (pitch %d, %d ) (%s)",
370 (int)imageInfo->width, (int)imageInfo->height,
371 (int)imageInfo->depth, (int)imageInfo->arraySize,
372 (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
373 IGetErrorString(error));
374 return error;
375 }
376 image = unprotImage;
377 }
378 else
379 {
380 cl_image_desc image_desc = { 0 };
381 image_desc.image_type = imageInfo->type;
382 image_desc.image_width = imageInfo->width;
383 image_desc.image_height = imageInfo->height;
384 image_desc.image_depth = imageInfo->depth;
385 image_desc.image_array_size = imageInfo->arraySize;
386 image_desc.num_mip_levels = imageInfo->num_mip_levels;
387
388
389 unprotImage =
390 clCreateImage(context, image_read_write_flags,
391 imageInfo->format, &image_desc, NULL, &error);
392 if (error != CL_SUCCESS)
393 {
394 log_error("ERROR: Unable to create %d level mipmapped image "
395 "of size %d x %d x %d x %d (pitch %d, %d ) (%s)",
396 (int)imageInfo->num_mip_levels, (int)imageInfo->width,
397 (int)imageInfo->height, (int)imageInfo->depth,
398 (int)imageInfo->arraySize, (int)imageInfo->rowPitch,
399 (int)imageInfo->slicePitch, IGetErrorString(error));
400 return error;
401 }
402 image = unprotImage;
403 }
404 }
405
406 test_assert_error(nullptr != image, "Image creation failed");
407
408 if (gMemFlagsToUse != CL_MEM_COPY_HOST_PTR)
409 {
410 size_t origin[4] = { 0, 0, 0, 0 };
411 size_t region[3] = { width_size, height_size, depth_size };
412
413 if (gDebugTrace) log_info(" - Writing image...\n");
414
415 if (!gTestMipmaps)
416 {
417
418 error =
419 clEnqueueWriteImage(queue, image, CL_TRUE, origin, region,
420 gEnablePitch ? imageInfo->rowPitch : 0,
421 gEnablePitch ? imageInfo->slicePitch : 0,
422 imageValues, 0, NULL, NULL);
423
424 if (error != CL_SUCCESS)
425 {
426 log_error("ERROR: Unable to write to image of size %d x %d "
427 "x %d x %d\n",
428 (int)imageInfo->width, (int)imageInfo->height,
429 (int)imageInfo->depth, (int)imageInfo->arraySize);
430 return error;
431 }
432 }
433 else
434 {
435 int nextLevelOffset = 0;
436
437 for (int i = 0; i < imageInfo->num_mip_levels; i++)
438 {
439 origin[3] = i;
440 error = clEnqueueWriteImage(
441 queue, image, CL_TRUE, origin, region, 0, 0,
442 ((char *)imageValues + nextLevelOffset), 0, NULL, NULL);
443 if (error != CL_SUCCESS)
444 {
445 log_error("ERROR: Unable to write to %d level mipmapped "
446 "image of size %d x %d x %d x %d\n",
447 (int)imageInfo->num_mip_levels,
448 (int)imageInfo->width, (int)imageInfo->height,
449 (int)imageInfo->arraySize, (int)imageInfo->depth);
450 return error;
451 }
452 nextLevelOffset += region[0] * region[1] * region[2]
453 * get_pixel_size(imageInfo->format);
454 // Subsequent mip level dimensions keep halving
455 region[0] = region[0] >> 1 ? region[0] >> 1 : 1;
456 region[1] = region[1] >> 1 ? region[1] >> 1 : 1;
457 region[2] = region[2] >> 1 ? region[2] >> 1 : 1;
458 }
459 }
460 }
461
462 xOffsets =
463 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
464 sizeof(cl_float) * image_size, xOffsetValues, &error);
465 test_error(error, "Unable to create x offset buffer");
466 yOffsets =
467 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
468 sizeof(cl_float) * image_size, yOffsetValues, &error);
469 test_error(error, "Unable to create y offset buffer");
470 zOffsets =
471 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
472 sizeof(cl_float) * image_size, zOffsetValues, &error);
473 test_error(error, "Unable to create y offset buffer");
474 results = clCreateBuffer(
475 context, CL_MEM_READ_WRITE,
476 get_explicit_type_size(outputType) * 4 * image_size, NULL, &error);
477 test_error(error, "Unable to create result buffer");
478
479 // Create sampler to use
480 actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
481 test_error(error, "Unable to create image sampler");
482
483 // Set arguments
484 int idx = 0;
485 error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &image);
486 test_error(error, "Unable to set kernel arguments");
487 if (!gUseKernelSamplers)
488 {
489 error =
490 clSetKernelArg(kernel, idx++, sizeof(cl_sampler), &actualSampler);
491 test_error(error, "Unable to set kernel arguments");
492 }
493 error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &xOffsets);
494 test_error(error, "Unable to set kernel arguments");
495 error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &yOffsets);
496 test_error(error, "Unable to set kernel arguments");
497 error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &zOffsets);
498 test_error(error, "Unable to set kernel arguments");
499 error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &results);
500 test_error(error, "Unable to set kernel arguments");
501
502 const float float_offsets[] = { 0.0f,
503 MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30),
504 0.25f,
505 0.3f,
506 0.5f - FLT_EPSILON / 4.0f,
507 0.5f,
508 0.9f,
509 1.0f - FLT_EPSILON / 2 };
510 int float_offset_count = sizeof(float_offsets) / sizeof(float_offsets[0]);
511 int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
512 int loopCount = 2 * float_offset_count;
513 if (!useFloatCoords) loopCount = 1;
514 if (gTestMaxImages)
515 {
516 loopCount = 1;
517 log_info("Testing each size only once with pixel offsets of %g for max "
518 "sized images.\n",
519 float_offsets[0]);
520 }
521
522 // Get the maximum absolute error for this format
523 double formatAbsoluteError =
524 get_max_absolute_error(imageInfo->format, imageSampler);
525 if (gDebugTrace)
526 log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
527
528 if (0 == initHalf
529 && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
530 {
531 initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode(queue);
532 if (initHalf)
533 {
534 log_info("Half rounding mode successfully detected.\n");
535 }
536 }
537
538 int nextLevelOffset = 0;
539 size_t width_lod = width_size, height_lod = height_size,
540 depth_lod = depth_size;
541
542 // Loop over all mipmap levels, if we are testing mipmapped images.
543 for (int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels)
544 || (!gTestMipmaps && lod < 1);
545 lod++)
546 {
547 size_t image_lod_size = get_image_num_pixels(
548 imageInfo, width_lod, height_lod, depth_lod, imageInfo->arraySize);
549 test_assert_error(0 != image_lod_size, "Invalid image size");
550 size_t resultValuesSize =
551 image_lod_size * get_explicit_type_size(outputType) * 4;
552 BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
553 float lod_float = (float)lod;
554 if (gTestMipmaps)
555 {
556 // Set the lod kernel arg
557 if (gDebugTrace) log_info(" - Working at mip level %d\n", lod);
558 error = clSetKernelArg(kernel, idx, sizeof(float), &lod_float);
559 test_error(error, "Unable to set kernel arguments");
560 }
561
562 for (int q = 0; q < loopCount; q++)
563 {
564 float offset = float_offsets[q % float_offset_count];
565
566 // Init the coordinates
567 error = InitFloatCoordsCommon(
568 imageInfo, imageSampler, xOffsetValues, yOffsetValues,
569 zOffsetValues, q >= float_offset_count ? -offset : offset,
570 q >= float_offset_count ? offset : -offset,
571 q >= float_offset_count ? -offset : offset,
572 imageSampler->normalized_coords, d, lod);
573 test_error(error, "Unable to initialise coordinates");
574
575 error = clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0,
576 sizeof(cl_float) * image_size,
577 xOffsetValues, 0, NULL, NULL);
578 test_error(error, "Unable to write x offsets");
579 error = clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0,
580 sizeof(cl_float) * image_size,
581 yOffsetValues, 0, NULL, NULL);
582 test_error(error, "Unable to write y offsets");
583 error = clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0,
584 sizeof(cl_float) * image_size,
585 zOffsetValues, 0, NULL, NULL);
586 test_error(error, "Unable to write z offsets");
587
588
589 memset(resultValues, 0xff, resultValuesSize);
590 clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, resultValuesSize,
591 resultValues, 0, NULL, NULL);
592
593 // Figure out thread dimensions
594 threads[0] = (size_t)width_lod;
595 threads[1] = (size_t)height_lod;
596 threads[2] = (size_t)depth_lod;
597
598 // Run the kernel
599 error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads,
600 NULL, 0, NULL, NULL);
601 test_error(error, "Unable to run kernel");
602
603 // Get results
604 error = clEnqueueReadBuffer(
605 queue, results, CL_TRUE, 0,
606 image_lod_size * get_explicit_type_size(outputType) * 4,
607 resultValues, 0, NULL, NULL);
608 test_error(error, "Unable to read results from kernel");
609 if (gDebugTrace) log_info(" results read\n");
610
611 // Validate results element by element
612 char *imagePtr = (char *)imageValues + nextLevelOffset;
613 /*
614 * FLOAT output type
615 */
616 if (is_sRGBA_order(imageInfo->format->image_channel_order)
617 && (outputType == kFloat))
618 {
619 // Validate float results
620 float *resultPtr = (float *)(char *)resultValues;
621 float expected[4], error = 0.0f;
622 float maxErr = get_max_relative_error(
623 imageInfo->format, imageSampler, 1 /*3D*/,
624 CL_FILTER_LINEAR == imageSampler->filter_mode);
625
626 for (size_t z = 0, j = 0; z < depth_lod; z++)
627 {
628 for (size_t y = 0; y < height_lod; y++)
629 {
630 for (size_t x = 0; x < width_lod; x++, j++)
631 {
632 // Step 1: go through and see if the results verify
633 // for the pixel For the normalized case on a GPU we
634 // put in offsets to the X, Y and Z to see if we
635 // land on the right pixel. This addresses the
636 // significant inaccuracy in GPU normalization in
637 // OpenCL 1.0.
638 int checkOnlyOnePixel = 0;
639 int found_pixel = 0;
640 float offset = NORM_OFFSET;
641 if (!imageSampler->normalized_coords
642 || imageSampler->filter_mode
643 != CL_FILTER_NEAREST
644 || NORM_OFFSET == 0
645 #if defined(__APPLE__)
646 // Apple requires its CPU implementation to do
647 // correctly rounded address arithmetic in all
648 // modes
649 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
650 #endif
651 )
652 offset = 0.0f; // Loop only once
653
654 for (float norm_offset_x = -offset;
655 norm_offset_x <= offset && !found_pixel;
656 norm_offset_x += NORM_OFFSET)
657 {
658 for (float norm_offset_y = -offset;
659 norm_offset_y <= offset && !found_pixel;
660 norm_offset_y += NORM_OFFSET)
661 {
662 for (float norm_offset_z = -offset;
663 norm_offset_z <= NORM_OFFSET
664 && !found_pixel;
665 norm_offset_z += NORM_OFFSET)
666 {
667
668 int hasDenormals = 0;
669 FloatPixel maxPixel =
670 sample_image_pixel_float_offset(
671 imagePtr, imageInfo,
672 xOffsetValues[j],
673 yOffsetValues[j],
674 zOffsetValues[j], norm_offset_x,
675 norm_offset_y, norm_offset_z,
676 imageSampler, expected, 0,
677 &hasDenormals, lod);
678
679 float err1 =
680 ABS_ERROR(sRGBmap(resultPtr[0]),
681 sRGBmap(expected[0]));
682 float err2 =
683 ABS_ERROR(sRGBmap(resultPtr[1]),
684 sRGBmap(expected[1]));
685 float err3 =
686 ABS_ERROR(sRGBmap(resultPtr[2]),
687 sRGBmap(expected[2]));
688 float err4 = ABS_ERROR(resultPtr[3],
689 expected[3]);
690 // Clamp to the minimum absolute error
691 // for the format
692 if (err1 > 0
693 && err1 < formatAbsoluteError)
694 {
695 err1 = 0.0f;
696 }
697 if (err2 > 0
698 && err2 < formatAbsoluteError)
699 {
700 err2 = 0.0f;
701 }
702 if (err3 > 0
703 && err3 < formatAbsoluteError)
704 {
705 err3 = 0.0f;
706 }
707 if (err4 > 0
708 && err4 < formatAbsoluteError)
709 {
710 err4 = 0.0f;
711 }
712 float maxErr = 0.5;
713
714 if (!(err1 <= maxErr)
715 || !(err2 <= maxErr)
716 || !(err3 <= maxErr)
717 || !(err4 <= maxErr))
718 {
719 // Try flushing the denormals
720 if (hasDenormals)
721 {
722 // If implementation decide to
723 // flush subnormals to zero, max
724 // error needs to be adjusted
725 maxErr += 4 * FLT_MIN;
726
727 maxPixel =
728 sample_image_pixel_float_offset(
729 imagePtr, imageInfo,
730 xOffsetValues[j],
731 yOffsetValues[j],
732 zOffsetValues[j],
733 norm_offset_x,
734 norm_offset_y,
735 norm_offset_z,
736 imageSampler, expected,
737 0, NULL, lod);
738
739 err1 = ABS_ERROR(
740 sRGBmap(resultPtr[0]),
741 sRGBmap(expected[0]));
742 err2 = ABS_ERROR(
743 sRGBmap(resultPtr[1]),
744 sRGBmap(expected[1]));
745 err3 = ABS_ERROR(
746 sRGBmap(resultPtr[2]),
747 sRGBmap(expected[2]));
748 err4 = ABS_ERROR(resultPtr[3],
749 expected[3]);
750 }
751 }
752
753 found_pixel = (err1 <= maxErr)
754 && (err2 <= maxErr)
755 && (err3 <= maxErr)
756 && (err4 <= maxErr);
757 } // norm_offset_z
758 } // norm_offset_y
759 } // norm_offset_x
760
761 // Step 2: If we did not find a match, then print
762 // out debugging info.
763 if (!found_pixel)
764 {
765 // For the normalized case on a GPU we put in
766 // offsets to the X and Y to see if we land on
767 // the right pixel. This addresses the
768 // significant inaccuracy in GPU normalization
769 // in OpenCL 1.0.
770 checkOnlyOnePixel = 0;
771 int shouldReturn = 0;
772 for (float norm_offset_x = -offset;
773 norm_offset_x <= offset
774 && !checkOnlyOnePixel;
775 norm_offset_x += NORM_OFFSET)
776 {
777 for (float norm_offset_y = -offset;
778 norm_offset_y <= offset
779 && !checkOnlyOnePixel;
780 norm_offset_y += NORM_OFFSET)
781 {
782 for (float norm_offset_z = -offset;
783 norm_offset_z <= offset
784 && !checkOnlyOnePixel;
785 norm_offset_z += NORM_OFFSET)
786 {
787
788 int hasDenormals = 0;
789 FloatPixel maxPixel =
790 sample_image_pixel_float_offset(
791 imagePtr, imageInfo,
792 xOffsetValues[j],
793 yOffsetValues[j],
794 zOffsetValues[j],
795 norm_offset_x,
796 norm_offset_y,
797 norm_offset_z, imageSampler,
798 expected, 0, &hasDenormals,
799 lod);
800
801 float err1 =
802 ABS_ERROR(sRGBmap(resultPtr[0]),
803 sRGBmap(expected[0]));
804 float err2 =
805 ABS_ERROR(sRGBmap(resultPtr[1]),
806 sRGBmap(expected[1]));
807 float err3 =
808 ABS_ERROR(sRGBmap(resultPtr[2]),
809 sRGBmap(expected[2]));
810 float err4 = ABS_ERROR(resultPtr[3],
811 expected[3]);
812 float maxErr = 0.6;
813
814 if (!(err1 <= maxErr)
815 || !(err2 <= maxErr)
816 || !(err3 <= maxErr)
817 || !(err4 <= maxErr))
818 {
819 // Try flushing the denormals
820 if (hasDenormals)
821 {
822 // If implementation decide
823 // to flush subnormals to
824 // zero, max error needs to
825 // be adjusted
826 maxErr += 4 * FLT_MIN;
827
828 maxPixel =
829 sample_image_pixel_float(
830 imagePtr, imageInfo,
831 xOffsetValues[j],
832 yOffsetValues[j],
833 zOffsetValues[j],
834 imageSampler,
835 expected, 0, NULL,
836 lod);
837
838 err1 = ABS_ERROR(
839 sRGBmap(resultPtr[0]),
840 sRGBmap(expected[0]));
841 err2 = ABS_ERROR(
842 sRGBmap(resultPtr[1]),
843 sRGBmap(expected[1]));
844 err3 = ABS_ERROR(
845 sRGBmap(resultPtr[2]),
846 sRGBmap(expected[2]));
847 err4 =
848 ABS_ERROR(resultPtr[3],
849 expected[3]);
850 }
851 }
852
853 if (!(err1 <= maxErr)
854 || !(err2 <= maxErr)
855 || !(err3 <= maxErr)
856 || !(err4 <= maxErr))
857 {
858 log_error(
859 "FAILED norm_offsets: %g , "
860 "%g , %g:\n",
861 norm_offset_x,
862 norm_offset_y,
863 norm_offset_z);
864
865 float tempOut[4];
866 shouldReturn |=
867 determine_validation_error_offset<
868 float>(
869 imagePtr, imageInfo,
870 imageSampler, resultPtr,
871 expected, error,
872 xOffsetValues[j],
873 yOffsetValues[j],
874 zOffsetValues[j],
875 norm_offset_x,
876 norm_offset_y,
877 norm_offset_z, j,
878 numTries, numClamped,
879 true, lod);
880 log_error("Step by step:\n");
881 FloatPixel temp =
882 sample_image_pixel_float_offset(
883 imagePtr, imageInfo,
884 xOffsetValues[j],
885 yOffsetValues[j],
886 zOffsetValues[j],
887 norm_offset_x,
888 norm_offset_y,
889 norm_offset_z,
890 imageSampler, tempOut,
891 1 /*verbose*/,
892 &hasDenormals, lod);
893 log_error(
894 "\tulps: %2.2f, %2.2f, "
895 "%2.2f, %2.2f (max "
896 "allowed: %2.2f)\n\n",
897 Ulp_Error(resultPtr[0],
898 expected[0]),
899 Ulp_Error(resultPtr[1],
900 expected[1]),
901 Ulp_Error(resultPtr[2],
902 expected[2]),
903 Ulp_Error(resultPtr[3],
904 expected[3]),
905 Ulp_Error(
906 MAKE_HEX_FLOAT(
907 0x1.000002p0f,
908 0x1000002L, -24)
909 + maxErr,
910 MAKE_HEX_FLOAT(
911 0x1.000002p0f,
912 0x1000002L, -24)));
913 }
914 else
915 {
916 log_error(
917 "Test error: we should "
918 "have detected this "
919 "passing above.\n");
920 }
921 } // norm_offset_z
922 } // norm_offset_y
923 } // norm_offset_x
924 if (shouldReturn) return 1;
925 } // if (!found_pixel)
926
927 resultPtr += 4;
928 }
929 }
930 }
931 }
932 /*
933 * FLOAT output type
934 */
935 else if (outputType == kFloat)
936 {
937 // Validate float results
938 float *resultPtr = (float *)(char *)resultValues;
939 float expected[4], error = 0.0f;
940 float maxErr = get_max_relative_error(
941 imageInfo->format, imageSampler, 1 /*3D*/,
942 CL_FILTER_LINEAR == imageSampler->filter_mode);
943
944 for (size_t z = 0, j = 0; z < depth_lod; z++)
945 {
946 for (size_t y = 0; y < height_lod; y++)
947 {
948 for (size_t x = 0; x < width_lod; x++, j++)
949 {
950 // Step 1: go through and see if the results verify
951 // for the pixel For the normalized case on a GPU we
952 // put in offsets to the X, Y and Z to see if we
953 // land on the right pixel. This addresses the
954 // significant inaccuracy in GPU normalization in
955 // OpenCL 1.0.
956 int checkOnlyOnePixel = 0;
957 int found_pixel = 0;
958 float offset = NORM_OFFSET;
959 if (!imageSampler->normalized_coords
960 || imageSampler->filter_mode
961 != CL_FILTER_NEAREST
962 || NORM_OFFSET == 0
963 #if defined(__APPLE__)
964 // Apple requires its CPU implementation to do
965 // correctly rounded address arithmetic in all
966 // modes
967 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
968 #endif
969 )
970 offset = 0.0f; // Loop only once
971
972 for (float norm_offset_x = -offset;
973 norm_offset_x <= offset && !found_pixel;
974 norm_offset_x += NORM_OFFSET)
975 {
976 for (float norm_offset_y = -offset;
977 norm_offset_y <= offset && !found_pixel;
978 norm_offset_y += NORM_OFFSET)
979 {
980 for (float norm_offset_z = -offset;
981 norm_offset_z <= NORM_OFFSET
982 && !found_pixel;
983 norm_offset_z += NORM_OFFSET)
984 {
985
986 int hasDenormals = 0;
987 FloatPixel maxPixel =
988 sample_image_pixel_float_offset(
989 imagePtr, imageInfo,
990 xOffsetValues[j],
991 yOffsetValues[j],
992 zOffsetValues[j], norm_offset_x,
993 norm_offset_y, norm_offset_z,
994 imageSampler, expected, 0,
995 &hasDenormals, lod);
996
997 float err1 = ABS_ERROR(resultPtr[0],
998 expected[0]);
999 float err2 = ABS_ERROR(resultPtr[1],
1000 expected[1]);
1001 float err3 = ABS_ERROR(resultPtr[2],
1002 expected[2]);
1003 float err4 = ABS_ERROR(resultPtr[3],
1004 expected[3]);
1005 // Clamp to the minimum absolute error
1006 // for the format
1007 if (err1 > 0
1008 && err1 < formatAbsoluteError)
1009 {
1010 err1 = 0.0f;
1011 }
1012 if (err2 > 0
1013 && err2 < formatAbsoluteError)
1014 {
1015 err2 = 0.0f;
1016 }
1017 if (err3 > 0
1018 && err3 < formatAbsoluteError)
1019 {
1020 err3 = 0.0f;
1021 }
1022 if (err4 > 0
1023 && err4 < formatAbsoluteError)
1024 {
1025 err4 = 0.0f;
1026 }
1027 float maxErr1 = std::max(
1028 maxErr * maxPixel.p[0], FLT_MIN);
1029 float maxErr2 = std::max(
1030 maxErr * maxPixel.p[1], FLT_MIN);
1031 float maxErr3 = std::max(
1032 maxErr * maxPixel.p[2], FLT_MIN);
1033 float maxErr4 = std::max(
1034 maxErr * maxPixel.p[3], FLT_MIN);
1035
1036 if (!(err1 <= maxErr1)
1037 || !(err2 <= maxErr2)
1038 || !(err3 <= maxErr3)
1039 || !(err4 <= maxErr4))
1040 {
1041 // Try flushing the denormals
1042 if (hasDenormals)
1043 {
1044 // If implementation decide to
1045 // flush subnormals to zero, max
1046 // error needs to be adjusted
1047 maxErr1 += 4 * FLT_MIN;
1048 maxErr2 += 4 * FLT_MIN;
1049 maxErr3 += 4 * FLT_MIN;
1050 maxErr4 += 4 * FLT_MIN;
1051
1052 maxPixel =
1053 sample_image_pixel_float_offset(
1054 imagePtr, imageInfo,
1055 xOffsetValues[j],
1056 yOffsetValues[j],
1057 zOffsetValues[j],
1058 norm_offset_x,
1059 norm_offset_y,
1060 norm_offset_z,
1061 imageSampler, expected,
1062 0, NULL, lod);
1063
1064 err1 = ABS_ERROR(resultPtr[0],
1065 expected[0]);
1066 err2 = ABS_ERROR(resultPtr[1],
1067 expected[1]);
1068 err3 = ABS_ERROR(resultPtr[2],
1069 expected[2]);
1070 err4 = ABS_ERROR(resultPtr[3],
1071 expected[3]);
1072 }
1073 }
1074
1075 found_pixel = (err1 <= maxErr1)
1076 && (err2 <= maxErr2)
1077 && (err3 <= maxErr3)
1078 && (err4 <= maxErr4);
1079 } // norm_offset_z
1080 } // norm_offset_y
1081 } // norm_offset_x
1082
1083 // Step 2: If we did not find a match, then print
1084 // out debugging info.
1085 if (!found_pixel)
1086 {
1087 // For the normalized case on a GPU we put in
1088 // offsets to the X and Y to see if we land on
1089 // the right pixel. This addresses the
1090 // significant inaccuracy in GPU normalization
1091 // in OpenCL 1.0.
1092 checkOnlyOnePixel = 0;
1093 int shouldReturn = 0;
1094 for (float norm_offset_x = -offset;
1095 norm_offset_x <= offset
1096 && !checkOnlyOnePixel;
1097 norm_offset_x += NORM_OFFSET)
1098 {
1099 for (float norm_offset_y = -offset;
1100 norm_offset_y <= offset
1101 && !checkOnlyOnePixel;
1102 norm_offset_y += NORM_OFFSET)
1103 {
1104 for (float norm_offset_z = -offset;
1105 norm_offset_z <= offset
1106 && !checkOnlyOnePixel;
1107 norm_offset_z += NORM_OFFSET)
1108 {
1109
1110 int hasDenormals = 0;
1111 FloatPixel maxPixel =
1112 sample_image_pixel_float_offset(
1113 imagePtr, imageInfo,
1114 xOffsetValues[j],
1115 yOffsetValues[j],
1116 zOffsetValues[j],
1117 norm_offset_x,
1118 norm_offset_y,
1119 norm_offset_z, imageSampler,
1120 expected, 0, &hasDenormals,
1121 lod);
1122
1123 float err1 = ABS_ERROR(resultPtr[0],
1124 expected[0]);
1125 float err2 = ABS_ERROR(resultPtr[1],
1126 expected[1]);
1127 float err3 = ABS_ERROR(resultPtr[2],
1128 expected[2]);
1129 float err4 = ABS_ERROR(resultPtr[3],
1130 expected[3]);
1131 float maxErr1 =
1132 std::max(maxErr * maxPixel.p[0],
1133 FLT_MIN);
1134 float maxErr2 =
1135 std::max(maxErr * maxPixel.p[1],
1136 FLT_MIN);
1137 float maxErr3 =
1138 std::max(maxErr * maxPixel.p[2],
1139 FLT_MIN);
1140 float maxErr4 =
1141 std::max(maxErr * maxPixel.p[3],
1142 FLT_MIN);
1143
1144
1145 if (!(err1 <= maxErr1)
1146 || !(err2 <= maxErr2)
1147 || !(err3 <= maxErr3)
1148 || !(err4 <= maxErr4))
1149 {
1150 // Try flushing the denormals
1151 if (hasDenormals)
1152 {
1153 maxErr1 += 4 * FLT_MIN;
1154 maxErr2 += 4 * FLT_MIN;
1155 maxErr3 += 4 * FLT_MIN;
1156 maxErr4 += 4 * FLT_MIN;
1157
1158 maxPixel =
1159 sample_image_pixel_float(
1160 imagePtr, imageInfo,
1161 xOffsetValues[j],
1162 yOffsetValues[j],
1163 zOffsetValues[j],
1164 imageSampler,
1165 expected, 0, NULL,
1166 lod);
1167
1168 err1 =
1169 ABS_ERROR(resultPtr[0],
1170 expected[0]);
1171 err2 =
1172 ABS_ERROR(resultPtr[1],
1173 expected[1]);
1174 err3 =
1175 ABS_ERROR(resultPtr[2],
1176 expected[2]);
1177 err4 =
1178 ABS_ERROR(resultPtr[3],
1179 expected[3]);
1180 }
1181 }
1182
1183 if (!(err1 <= maxErr1)
1184 || !(err2 <= maxErr2)
1185 || !(err3 <= maxErr3)
1186 || !(err4 <= maxErr4))
1187 {
1188 log_error(
1189 "FAILED norm_offsets: %g , "
1190 "%g , %g:\n",
1191 norm_offset_x,
1192 norm_offset_y,
1193 norm_offset_z);
1194
1195 float tempOut[4];
1196 shouldReturn |=
1197 determine_validation_error_offset<
1198 float>(
1199 imagePtr, imageInfo,
1200 imageSampler, resultPtr,
1201 expected, error,
1202 xOffsetValues[j],
1203 yOffsetValues[j],
1204 zOffsetValues[j],
1205 norm_offset_x,
1206 norm_offset_y,
1207 norm_offset_z, j,
1208 numTries, numClamped,
1209 true, lod);
1210 log_error("Step by step:\n");
1211 FloatPixel temp =
1212 sample_image_pixel_float_offset(
1213 imagePtr, imageInfo,
1214 xOffsetValues[j],
1215 yOffsetValues[j],
1216 zOffsetValues[j],
1217 norm_offset_x,
1218 norm_offset_y,
1219 norm_offset_z,
1220 imageSampler, tempOut,
1221 1 /*verbose*/,
1222 &hasDenormals, lod);
1223 log_error(
1224 "\tulps: %2.2f, %2.2f, "
1225 "%2.2f, %2.2f (max "
1226 "allowed: %2.2f)\n\n",
1227 Ulp_Error(resultPtr[0],
1228 expected[0]),
1229 Ulp_Error(resultPtr[1],
1230 expected[1]),
1231 Ulp_Error(resultPtr[2],
1232 expected[2]),
1233 Ulp_Error(resultPtr[3],
1234 expected[3]),
1235 Ulp_Error(
1236 MAKE_HEX_FLOAT(
1237 0x1.000002p0f,
1238 0x1000002L, -24)
1239 + maxErr,
1240 MAKE_HEX_FLOAT(
1241 0x1.000002p0f,
1242 0x1000002L, -24)));
1243 }
1244 else
1245 {
1246 log_error(
1247 "Test error: we should "
1248 "have detected this "
1249 "passing above.\n");
1250 }
1251 } // norm_offset_z
1252 } // norm_offset_y
1253 } // norm_offset_x
1254 if (shouldReturn) return 1;
1255 } // if (!found_pixel)
1256
1257 resultPtr += 4;
1258 }
1259 }
1260 }
1261 }
1262 /*
1263 * UINT output type
1264 */
1265 else if (outputType == kUInt)
1266 {
1267 // Validate unsigned integer results
1268 unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
1269 unsigned int expected[4];
1270 float error;
1271 for (size_t z = 0, j = 0; z < depth_lod; z++)
1272 {
1273 for (size_t y = 0; y < height_lod; y++)
1274 {
1275 for (size_t x = 0; x < width_lod; x++, j++)
1276 {
1277 // Step 1: go through and see if the results verify
1278 // for the pixel For the normalized case on a GPU we
1279 // put in offsets to the X, Y and Z to see if we
1280 // land on the right pixel. This addresses the
1281 // significant inaccuracy in GPU normalization in
1282 // OpenCL 1.0.
1283 int checkOnlyOnePixel = 0;
1284 int found_pixel = 0;
1285 for (float norm_offset_x = -NORM_OFFSET;
1286 norm_offset_x <= NORM_OFFSET && !found_pixel
1287 && !checkOnlyOnePixel;
1288 norm_offset_x += NORM_OFFSET)
1289 {
1290 for (float norm_offset_y = -NORM_OFFSET;
1291 norm_offset_y <= NORM_OFFSET
1292 && !found_pixel && !checkOnlyOnePixel;
1293 norm_offset_y += NORM_OFFSET)
1294 {
1295 for (float norm_offset_z = -NORM_OFFSET;
1296 norm_offset_z <= NORM_OFFSET
1297 && !found_pixel && !checkOnlyOnePixel;
1298 norm_offset_z += NORM_OFFSET)
1299 {
1300
1301 // If we are not on a GPU, or we are not
1302 // normalized, then only test with
1303 // offsets (0.0, 0.0) E.g., test one
1304 // pixel.
1305 if (!imageSampler->normalized_coords
1306 || !(gDeviceType
1307 & CL_DEVICE_TYPE_GPU)
1308 || NORM_OFFSET == 0)
1309 {
1310 norm_offset_x = 0.0f;
1311 norm_offset_y = 0.0f;
1312 norm_offset_z = 0.0f;
1313 checkOnlyOnePixel = 1;
1314 }
1315
1316 sample_image_pixel_offset<unsigned int>(
1317 imagePtr, imageInfo,
1318 xOffsetValues[j], yOffsetValues[j],
1319 zOffsetValues[j], norm_offset_x,
1320 norm_offset_y, norm_offset_z,
1321 imageSampler, expected, lod);
1322
1323 error = errMax(
1324 errMax(abs_diff_uint(expected[0],
1325 resultPtr[0]),
1326 abs_diff_uint(expected[1],
1327 resultPtr[1])),
1328 errMax(
1329 abs_diff_uint(expected[2],
1330 resultPtr[2]),
1331 abs_diff_uint(expected[3],
1332 resultPtr[3])));
1333
1334 if (error < MAX_ERR) found_pixel = 1;
1335 } // norm_offset_z
1336 } // norm_offset_y
1337 } // norm_offset_x
1338
1339 // Step 2: If we did not find a match, then print
1340 // out debugging info.
1341 if (!found_pixel)
1342 {
1343 // For the normalized case on a GPU we put in
1344 // offsets to the X and Y to see if we land on
1345 // the right pixel. This addresses the
1346 // significant inaccuracy in GPU normalization
1347 // in OpenCL 1.0.
1348 checkOnlyOnePixel = 0;
1349 int shouldReturn = 0;
1350 for (float norm_offset_x = -NORM_OFFSET;
1351 norm_offset_x <= NORM_OFFSET
1352 && !checkOnlyOnePixel;
1353 norm_offset_x += NORM_OFFSET)
1354 {
1355 for (float norm_offset_y = -NORM_OFFSET;
1356 norm_offset_y <= NORM_OFFSET
1357 && !checkOnlyOnePixel;
1358 norm_offset_y += NORM_OFFSET)
1359 {
1360 for (float norm_offset_z = -NORM_OFFSET;
1361 norm_offset_z <= NORM_OFFSET
1362 && !checkOnlyOnePixel;
1363 norm_offset_z += NORM_OFFSET)
1364 {
1365
1366 // If we are not on a GPU, or we are
1367 // not normalized, then only test
1368 // with offsets (0.0, 0.0) E.g.,
1369 // test one pixel.
1370 if (!imageSampler->normalized_coords
1371 || gDeviceType
1372 != CL_DEVICE_TYPE_GPU
1373 || NORM_OFFSET == 0)
1374 {
1375 norm_offset_x = 0.0f;
1376 norm_offset_y = 0.0f;
1377 norm_offset_z = 0.0f;
1378 checkOnlyOnePixel = 1;
1379 }
1380
1381 sample_image_pixel_offset<
1382 unsigned int>(
1383 imagePtr, imageInfo,
1384 xOffsetValues[j],
1385 yOffsetValues[j],
1386 zOffsetValues[j], norm_offset_x,
1387 norm_offset_y, norm_offset_z,
1388 imageSampler, expected, lod);
1389
1390 error = errMax(
1391 errMax(
1392 abs_diff_uint(expected[0],
1393 resultPtr[0]),
1394 abs_diff_uint(
1395 expected[1],
1396 resultPtr[1])),
1397 errMax(
1398 abs_diff_uint(expected[2],
1399 resultPtr[2]),
1400 abs_diff_uint(
1401 expected[3],
1402 resultPtr[3])));
1403
1404 if (error > MAX_ERR)
1405 {
1406 log_error(
1407 "FAILED norm_offsets: %g , "
1408 "%g , %g:\n",
1409 norm_offset_x,
1410 norm_offset_y,
1411 norm_offset_z);
1412 shouldReturn |=
1413 determine_validation_error_offset<
1414 unsigned int>(
1415 imagePtr, imageInfo,
1416 imageSampler, resultPtr,
1417 expected, error,
1418 xOffsetValues[j],
1419 yOffsetValues[j],
1420 zOffsetValues[j],
1421 norm_offset_x,
1422 norm_offset_y,
1423 norm_offset_z, j,
1424 numTries, numClamped,
1425 false, lod);
1426 }
1427 else
1428 {
1429 log_error(
1430 "Test error: we should "
1431 "have detected this "
1432 "passing above.\n");
1433 }
1434 } // norm_offset_z
1435 } // norm_offset_y
1436 } // norm_offset_x
1437 if (shouldReturn) return 1;
1438 } // if (!found_pixel)
1439
1440 resultPtr += 4;
1441 }
1442 }
1443 }
1444 }
1445 else
1446 /*
1447 * INT output type
1448 */
1449 {
1450 // Validate integer results
1451 int *resultPtr = (int *)(char *)resultValues;
1452 int expected[4];
1453 float error;
1454 for (size_t z = 0, j = 0; z < depth_lod; z++)
1455 {
1456 for (size_t y = 0; y < height_lod; y++)
1457 {
1458 for (size_t x = 0; x < width_lod; x++, j++)
1459 {
1460 // Step 1: go through and see if the results verify
1461 // for the pixel For the normalized case on a GPU we
1462 // put in offsets to the X, Y and Z to see if we
1463 // land on the right pixel. This addresses the
1464 // significant inaccuracy in GPU normalization in
1465 // OpenCL 1.0.
1466 int checkOnlyOnePixel = 0;
1467 int found_pixel = 0;
1468 for (float norm_offset_x = -NORM_OFFSET;
1469 norm_offset_x <= NORM_OFFSET && !found_pixel
1470 && !checkOnlyOnePixel;
1471 norm_offset_x += NORM_OFFSET)
1472 {
1473 for (float norm_offset_y = -NORM_OFFSET;
1474 norm_offset_y <= NORM_OFFSET
1475 && !found_pixel && !checkOnlyOnePixel;
1476 norm_offset_y += NORM_OFFSET)
1477 {
1478 for (float norm_offset_z = -NORM_OFFSET;
1479 norm_offset_z <= NORM_OFFSET
1480 && !found_pixel && !checkOnlyOnePixel;
1481 norm_offset_z += NORM_OFFSET)
1482 {
1483
1484 // If we are not on a GPU, or we are not
1485 // normalized, then only test with
1486 // offsets (0.0, 0.0) E.g., test one
1487 // pixel.
1488 if (!imageSampler->normalized_coords
1489 || !(gDeviceType
1490 & CL_DEVICE_TYPE_GPU)
1491 || NORM_OFFSET == 0)
1492 {
1493 norm_offset_x = 0.0f;
1494 norm_offset_y = 0.0f;
1495 norm_offset_z = 0.0f;
1496 checkOnlyOnePixel = 1;
1497 }
1498
1499 sample_image_pixel_offset<int>(
1500 imagePtr, imageInfo,
1501 xOffsetValues[j], yOffsetValues[j],
1502 zOffsetValues[j], norm_offset_x,
1503 norm_offset_y, norm_offset_z,
1504 imageSampler, expected, lod);
1505
1506 error = errMax(
1507 errMax(abs_diff_int(expected[0],
1508 resultPtr[0]),
1509 abs_diff_int(expected[1],
1510 resultPtr[1])),
1511 errMax(abs_diff_int(expected[2],
1512 resultPtr[2]),
1513 abs_diff_int(expected[3],
1514 resultPtr[3])));
1515
1516 if (error < MAX_ERR) found_pixel = 1;
1517 } // norm_offset_z
1518 } // norm_offset_y
1519 } // norm_offset_x
1520
1521 // Step 2: If we did not find a match, then print
1522 // out debugging info.
1523 if (!found_pixel)
1524 {
1525 // For the normalized case on a GPU we put in
1526 // offsets to the X and Y to see if we land on
1527 // the right pixel. This addresses the
1528 // significant inaccuracy in GPU normalization
1529 // in OpenCL 1.0.
1530 checkOnlyOnePixel = 0;
1531 int shouldReturn = 0;
1532 for (float norm_offset_x = -NORM_OFFSET;
1533 norm_offset_x <= NORM_OFFSET
1534 && !checkOnlyOnePixel;
1535 norm_offset_x += NORM_OFFSET)
1536 {
1537 for (float norm_offset_y = -NORM_OFFSET;
1538 norm_offset_y <= NORM_OFFSET
1539 && !checkOnlyOnePixel;
1540 norm_offset_y += NORM_OFFSET)
1541 {
1542 for (float norm_offset_z = -NORM_OFFSET;
1543 norm_offset_z <= NORM_OFFSET
1544 && !checkOnlyOnePixel;
1545 norm_offset_z += NORM_OFFSET)
1546 {
1547
1548 // If we are not on a GPU, or we are
1549 // not normalized, then only test
1550 // with offsets (0.0, 0.0) E.g.,
1551 // test one pixel.
1552 if (!imageSampler->normalized_coords
1553 || gDeviceType
1554 != CL_DEVICE_TYPE_GPU
1555 || NORM_OFFSET == 0
1556 || NORM_OFFSET == 0
1557 || NORM_OFFSET == 0)
1558 {
1559 norm_offset_x = 0.0f;
1560 norm_offset_y = 0.0f;
1561 norm_offset_z = 0.0f;
1562 checkOnlyOnePixel = 1;
1563 }
1564
1565 sample_image_pixel_offset<int>(
1566 imagePtr, imageInfo,
1567 xOffsetValues[j],
1568 yOffsetValues[j],
1569 zOffsetValues[j], norm_offset_x,
1570 norm_offset_y, norm_offset_z,
1571 imageSampler, expected, lod);
1572
1573 error = errMax(
1574 errMax(
1575 abs_diff_int(expected[0],
1576 resultPtr[0]),
1577 abs_diff_int(expected[1],
1578 resultPtr[1])),
1579 errMax(
1580 abs_diff_int(expected[2],
1581 resultPtr[2]),
1582 abs_diff_int(
1583 expected[3],
1584 resultPtr[3])));
1585
1586 if (error > MAX_ERR)
1587 {
1588 log_error(
1589 "FAILED norm_offsets: %g , "
1590 "%g , %g:\n",
1591 norm_offset_x,
1592 norm_offset_y,
1593 norm_offset_z);
1594 shouldReturn |=
1595 determine_validation_error_offset<
1596 int>(
1597 imagePtr, imageInfo,
1598 imageSampler, resultPtr,
1599 expected, error,
1600 xOffsetValues[j],
1601 yOffsetValues[j],
1602 zOffsetValues[j],
1603 norm_offset_x,
1604 norm_offset_y,
1605 norm_offset_z, j,
1606 numTries, numClamped,
1607 false, lod);
1608 }
1609 else
1610 {
1611 log_error(
1612 "Test error: we should "
1613 "have detected this "
1614 "passing above.\n");
1615 }
1616 } // norm_offset_z
1617 } // norm_offset_y
1618 } // norm_offset_x
1619 if (shouldReturn) return 1;
1620 } // if (!found_pixel)
1621
1622 resultPtr += 4;
1623 }
1624 }
1625 }
1626 }
1627 }
1628 {
1629 nextLevelOffset += width_lod * height_lod * depth_lod
1630 * get_pixel_size(imageInfo->format);
1631 width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
1632 if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
1633 {
1634 height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
1635 }
1636 if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
1637 {
1638 depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1;
1639 }
1640 }
1641 }
1642
1643 return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
1644 }
1645
filter_undefined_bits(image_descriptor * imageInfo,char * resultPtr)1646 void filter_undefined_bits(image_descriptor *imageInfo, char *resultPtr)
1647 {
1648 // mask off the top bit (bit 15) if the image format is (CL_UNORM_SHORT_555,
1649 // CL_RGB). (Note: OpenCL says: the top bit is undefined meaning it can be
1650 // either 0 or 1.)
1651 if (imageInfo->format->image_channel_data_type == CL_UNORM_SHORT_555)
1652 {
1653 cl_ushort *temp = (cl_ushort *)resultPtr;
1654 temp[0] &= 0x7fff;
1655 }
1656 }
1657
filter_rounding_errors(int forceCorrectlyRoundedWrites,image_descriptor * imageInfo,float * errors)1658 int filter_rounding_errors(int forceCorrectlyRoundedWrites,
1659 image_descriptor *imageInfo, float *errors)
1660 {
1661 // We are allowed 0.6 absolute error vs. infinitely precise for some
1662 // normalized formats
1663 if (0 == forceCorrectlyRoundedWrites
1664 && (imageInfo->format->image_channel_data_type == CL_UNORM_INT8
1665 || imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010
1666 || imageInfo->format->image_channel_data_type == CL_UNORM_INT16
1667 || imageInfo->format->image_channel_data_type == CL_SNORM_INT8
1668 || imageInfo->format->image_channel_data_type == CL_SNORM_INT16
1669 || imageInfo->format->image_channel_data_type == CL_UNORM_SHORT_555
1670 || imageInfo->format->image_channel_data_type
1671 == CL_UNORM_SHORT_565))
1672 {
1673 if (!(fabsf(errors[0]) > 0.6f) && !(fabsf(errors[1]) > 0.6f)
1674 && !(fabsf(errors[2]) > 0.6f) && !(fabsf(errors[3]) > 0.6f))
1675 return 0;
1676 }
1677
1678 return 1;
1679 }
1680