1 //
2 // Copyright (c) 2017, 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "test_common.h"
17 #include <float.h>
18
19 #include <algorithm>
20
21 #if defined( __APPLE__ )
22 #include <signal.h>
23 #include <sys/signal.h>
24 #include <setjmp.h>
25 #endif
26
27 extern bool gTestImage2DFromBuffer;
28
29 // Utility function to clamp down image sizes for certain tests to avoid
30 // using too much memory.
reduceImageSizeRange(size_t maxDimSize)31 static size_t reduceImageSizeRange(size_t maxDimSize) {
32 size_t DimSize = maxDimSize/32;
33 if (DimSize < (size_t) 16)
34 return 16;
35 else if (DimSize > (size_t) 256)
36 return 256;
37 else
38 return DimSize;
39 }
40
41 const char *read2DKernelSourcePattern =
42 "%s\n"
43 "__kernel void sample_kernel( read_only %s input,%s __global float "
44 "*xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
45 "{\n"
46 "%s"
47 " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
48 "%s"
49 "%s"
50 " results[offset] = read_image%s( input, imageSampler, coords %s);\n"
51 "}";
52
53 const char *read_write2DKernelSourcePattern =
54 "%s\n"
55 "__kernel void sample_kernel( read_write %s input,%s __global float "
56 "*xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
57 "{\n"
58 "%s"
59 " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
60 "%s"
61 "%s"
62 " results[offset] = read_image%s( input, coords %s);\n"
63 "}";
64
65 const char *intCoordKernelSource =
66 " int2 coords = (int2)( xOffsets[offset], yOffsets[offset]);\n";
67
68 const char *floatKernelSource =
69 " float2 coords = (float2)( (float)( xOffsets[offset] ), (float)( yOffsets[offset] ) );\n";
70
71 static const char *samplerKernelArg = " sampler_t imageSampler,";
72
73 static const char *lodOffsetSource =
74 " unsigned int lod_int = (unsigned int) lod;\n"
75 " int width_lod = (get_image_width(input) >> lod_int) ?(get_image_width(input) >> lod_int):1 ;\n"
76 " int offset = tidY*width_lod + tidX;\n";
77
78 static const char *offsetSource =
79 " int offset = tidY*get_image_width(input) + tidX;\n";
80
determine_validation_error(void * imagePtr,image_descriptor * imageInfo,image_sampler_data * imageSampler,T * resultPtr,T * expected,float error,float x,float y,float xAddressOffset,float yAddressOffset,size_t j,int & numTries,int & numClamped,bool printAsFloat,int lod=0)81 template <class T> int determine_validation_error( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
82 T *resultPtr, T * expected, float error,
83 float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod = 0 )
84 {
85 int actualX, actualY;
86 int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, NULL, lod );
87 bool clampingErr = false, clamped = false, otherClampingBug = false;
88 int clampedX, clampedY, ignoreMe;
89
90 clamped = get_integer_coords_offset( x, y, 0.f, xAddressOffset, yAddressOffset, 0.0f, imageInfo->width, imageInfo->height, 0, imageSampler, imageInfo, clampedX, clampedY, ignoreMe );
91
92 if( found )
93 {
94 // Is it a clamping bug?
95 if( clamped && clampedX == actualX && clampedY == actualY )
96 {
97 if( (--numClamped) == 0 )
98 {
99 log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
100 if (imageInfo->format->image_channel_order == CL_DEPTH)
101 {
102 if( printAsFloat )
103 {
104 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot (%g),\n\terror of %g\n",
105 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
106 }
107 else
108 {
109 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot (%x)\n",
110 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
111 }
112 }
113 else
114 {
115 if( printAsFloat )
116 {
117 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g),\n\terror of %g\n",
118 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
119 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
120 }
121 else
122 {
123 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n",
124 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
125 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
126 }
127 }
128 return 1;
129 }
130 clampingErr = true;
131 otherClampingBug = true;
132 }
133 }
134 if( clamped && !otherClampingBug )
135 {
136 // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
137 if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
138 {
139 if( (--numClamped) == 0 )
140 {
141 log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
142 if (imageInfo->format->image_channel_order == CL_DEPTH)
143 {
144 if( printAsFloat )
145 {
146 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot (%g),\n\terror of %g\n",
147 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
148 }
149 else
150 {
151 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot (%x)\n",
152 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
153 }
154 }
155 else
156 {
157 if( printAsFloat )
158 {
159 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g),\n\terror of %g\n",
160 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
161 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
162 }
163 else
164 {
165 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n",
166 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
167 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
168 }
169 }
170 return 1;
171 }
172 clampingErr = true;
173 }
174 }
175 if( !clampingErr )
176 {
177 if (imageInfo->format->image_channel_order == CL_DEPTH)
178 {
179 if( printAsFloat )
180 {
181 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot (%g),\n\terror of %g\n",
182 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
183 }
184 else
185 {
186 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot (%x)\n",
187 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
188 }
189 }
190 else
191 {
192 if( printAsFloat )
193 {
194 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g), error of %g\n",
195 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
196 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
197 }
198 else
199 {
200 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n",
201 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
202 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
203 }
204 }
205 log_error( "img size %ld,%ld (pitch %ld)", imageInfo->width, imageInfo->height, imageInfo->rowPitch );
206 if( clamped )
207 {
208 log_error( " which would clamp to %d,%d\n", clampedX, clampedY );
209 }
210 if( printAsFloat && gExtraValidateInfo)
211 {
212 log_error( "Nearby values:\n" );
213 log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
214 for( int yOff = -2; yOff <= 1; yOff++ )
215 {
216 float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
217 read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
218 read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
219 read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
220 read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
221 if (imageInfo->format->image_channel_order == CL_DEPTH)
222 {
223 log_error( "%d\t(%g)",clampedY + yOff, top[0] );
224 log_error( " (%g)", real[0] );
225 log_error( " (%g)",bot[0] );
226 log_error( " (%g)\n",bot2[0] );
227 }
228 else
229 {
230 log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
231 log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
232 log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
233 log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
234 }
235 }
236
237 if( clampedY < 1 )
238 {
239 log_error( "Nearby values:\n" );
240 log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
241 for( int yOff = (int)imageInfo->height - 2; yOff <= (int)imageInfo->height + 1; yOff++ )
242 {
243 float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
244 read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
245 read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
246 read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
247 read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
248 if (imageInfo->format->image_channel_order == CL_DEPTH)
249 {
250 log_error( "%d\t(%g)",clampedY + yOff, top[0] );
251 log_error( " (%g)", real[0] );
252 log_error( " (%g)",bot[0] );
253 log_error( " (%g)\n",bot2[0] );
254 }
255 else
256 {
257 log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
258 log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
259 log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
260 log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
261 }
262 }
263 }
264 }
265
266 if( imageSampler->filter_mode != CL_FILTER_LINEAR )
267 {
268 if( found )
269 log_error( "\tValue really found in image at %d,%d (%s)\n", actualX, actualY, ( found > 1 ) ? "NOT unique!!" : "unique" );
270 else
271 log_error( "\tValue not actually found in image\n" );
272 }
273 log_error( "\n" );
274
275 numClamped = -1; // We force the clamped counter to never work
276 if( ( --numTries ) == 0 )
277 {
278 return 1;
279 }
280 }
281 return 0;
282 }
283
InitFloatCoords(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float xfract,float yfract,int normalized_coords,MTdata d,size_t lod)284 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d, size_t lod)
285 {
286 size_t i = 0;
287 size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
288
289 if( gTestMipmaps )
290 {
291 width_lod = (imageInfo->width >> lod)?(imageInfo->width >> lod):1;
292 height_lod = (imageInfo->height >> lod)?(imageInfo->height >> lod):1;
293 }
294 if( gDisableOffsets )
295 {
296 for( size_t y = 0; y < height_lod; y++ )
297 {
298 for( size_t x = 0; x < width_lod; x++, i++ )
299 {
300 xOffsets[ i ] = (float) (xfract + (float) x);
301 yOffsets[ i ] = (float) (yfract + (float) y);
302 }
303 }
304 }
305 else
306 {
307 for( size_t y = 0; y < height_lod; y++ )
308 {
309 for( size_t x = 0; x < width_lod; x++, i++ )
310 {
311 xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
312 yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
313 }
314 }
315 }
316
317 if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
318 {
319 i = 0;
320 for( size_t y = 0; y < height_lod; y++ )
321 {
322 for( size_t x = 0; x < width_lod; x++, i++ )
323 {
324 xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) width_lod - 1.0);
325 yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)height_lod - 1.0);
326 }
327 }
328 }
329
330 if( normalized_coords )
331 {
332 i = 0;
333 for( size_t y = 0; y < height_lod; y++ )
334 {
335 for( size_t x = 0; x < width_lod; x++, i++ )
336 {
337 xOffsets[ i ] = (float) ((float) xOffsets[ i ] / (float) width_lod);
338 yOffsets[ i ] = (float) ((float) yOffsets[ i ] / (float) height_lod);
339 }
340 }
341 }
342 }
343
validate_image_2D_depth_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)344 int validate_image_2D_depth_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
345 ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
346 {
347 // Validate results element by element
348 size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
349 size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
350 /*
351 * FLOAT output type
352 */
353 if( outputType == kFloat )
354 {
355 // Validate float results
356 float *resultPtr = (float *)(char *)resultValues;
357 float expected[4], error=0.0f;
358 float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
359 for( size_t y = 0, j = 0; y < height_lod; y++ )
360 {
361 for( size_t x = 0; x < width_lod; x++, j++ )
362 {
363 // Step 1: go through and see if the results verify for the pixel
364 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
365 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
366 int checkOnlyOnePixel = 0;
367 int found_pixel = 0;
368 float offset = NORM_OFFSET;
369 if (!imageSampler->normalized_coords
370 || imageSampler->filter_mode != CL_FILTER_NEAREST
371 || NORM_OFFSET == 0
372 #if defined( __APPLE__ )
373 // Apple requires its CPU implementation to do correctly
374 // rounded address arithmetic in all modes
375 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
376 #endif
377 )
378 offset = 0.0f; // Loop only once
379
380 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
381 for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
382
383 // Try sampling the pixel, without flushing denormals.
384 int containsDenormals = 0;
385 FloatPixel maxPixel;
386 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
387 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
388 imageSampler, expected, 0, &containsDenormals );
389
390 float err1 = ABS_ERROR(resultPtr[0], expected[0]);
391 // Clamp to the minimum absolute error for the format
392 if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
393 float maxErr1 =
394 std::max(maxErr * maxPixel.p[0], FLT_MIN);
395
396 // Check if the result matches.
397 if( ! (err1 <= maxErr1) )
398 {
399 //try flushing the denormals, if there is a failure.
400 if( containsDenormals )
401 {
402 // If implementation decide to flush subnormals to zero,
403 // max error needs to be adjusted
404 maxErr1 += 4 * FLT_MIN;
405
406 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
407 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
408 imageSampler, expected, 0, NULL );
409
410 err1 = ABS_ERROR(resultPtr[0], expected[0]);
411 }
412 }
413
414 // If the final result DOES match, then we've found a valid result and we're done with this pixel.
415 found_pixel = (err1 <= maxErr1);
416 }//norm_offset_x
417 }//norm_offset_y
418
419
420 // Step 2: If we did not find a match, then print out debugging info.
421 if (!found_pixel) {
422 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
423 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
424 checkOnlyOnePixel = 0;
425 int shouldReturn = 0;
426 for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
427 for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
428
429 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
430 // E.g., test one pixel.
431 if (!imageSampler->normalized_coords
432 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
433 || NORM_OFFSET == 0)
434 {
435 norm_offset_x = 0.0f;
436 norm_offset_y = 0.0f;
437 checkOnlyOnePixel = 1;
438 }
439
440 int containsDenormals = 0;
441 FloatPixel maxPixel;
442 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
443 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
444 imageSampler, expected, 0, &containsDenormals );
445
446 float err1 = ABS_ERROR(resultPtr[0], expected[0]);
447 float maxErr1 =
448 std::max(maxErr * maxPixel.p[0], FLT_MIN);
449
450
451 if( ! (err1 <= maxErr1) )
452 {
453 //try flushing the denormals, if there is a failure.
454 if( containsDenormals )
455 {
456 maxErr1 += 4 * FLT_MIN;
457
458 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
459 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
460 imageSampler, expected, 0, NULL );
461
462 err1 = ABS_ERROR(resultPtr[0], expected[0]);
463 }
464 }
465 if( ! (err1 <= maxErr1) )
466 {
467 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
468
469 float tempOut[4];
470 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
471 expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
472
473 log_error( "Step by step:\n" );
474 FloatPixel temp;
475 temp = sample_image_pixel_float_offset( imageValues, imageInfo,
476 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
477 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
478 log_error( "\tulps: %2.2f (max allowed: %2.2f)\n\n",
479 Ulp_Error( resultPtr[0], expected[0] ),
480 Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
481
482 } else {
483 log_error("Test error: we should have detected this passing above.\n");
484 }
485
486 }//norm_offset_x
487 }//norm_offset_y
488 if( shouldReturn )
489 return 1;
490 } // if (!found_pixel)
491
492 resultPtr += 1;
493 }
494 }
495 }
496 else
497 {
498 log_error("Test error: Not supported format.\n");
499 return 1;
500 }
501 return 0;
502 }
503
validate_image_2D_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)504 int validate_image_2D_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
505 ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
506 {
507 // Validate results element by element
508 size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
509 size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
510 /*
511 * FLOAT output type
512 */
513 if( outputType == kFloat )
514 {
515 // Validate float results
516 float *resultPtr = (float *)(char *)resultValues;
517 float expected[4], error=0.0f;
518 float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
519 for( size_t y = 0, j = 0; y < height_lod; y++ )
520 {
521 for( size_t x = 0; x < width_lod; x++, j++ )
522 {
523 // Step 1: go through and see if the results verify for the pixel
524 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
525 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
526 int checkOnlyOnePixel = 0;
527 int found_pixel = 0;
528 float offset = NORM_OFFSET;
529 if (!imageSampler->normalized_coords
530 || imageSampler->filter_mode != CL_FILTER_NEAREST
531 || NORM_OFFSET == 0
532 #if defined( __APPLE__ )
533 // Apple requires its CPU implementation to do correctly
534 // rounded address arithmetic in all modes
535 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
536 #endif
537 )
538 offset = 0.0f; // Loop only once
539
540 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
541 for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
542
543
544 // Try sampling the pixel, without flushing denormals.
545 int containsDenormals = 0;
546 FloatPixel maxPixel;
547 if ( gTestMipmaps )
548 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
549 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
550 imageSampler, expected, 0, &containsDenormals, lod );
551 else
552 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
553 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
554 imageSampler, expected, 0, &containsDenormals );
555
556 float err1 = ABS_ERROR(resultPtr[0], expected[0]);
557 float err2 = ABS_ERROR(resultPtr[1], expected[1]);
558 float err3 = ABS_ERROR(resultPtr[2], expected[2]);
559 float err4 = ABS_ERROR(resultPtr[3], expected[3]);
560 // Clamp to the minimum absolute error for the format
561 if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
562 if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
563 if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
564 if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
565 float maxErr1 =
566 std::max(maxErr * maxPixel.p[0], FLT_MIN);
567 float maxErr2 =
568 std::max(maxErr * maxPixel.p[1], FLT_MIN);
569 float maxErr3 =
570 std::max(maxErr * maxPixel.p[2], FLT_MIN);
571 float maxErr4 =
572 std::max(maxErr * maxPixel.p[3], FLT_MIN);
573
574 // Check if the result matches.
575 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
576 ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
577 {
578 //try flushing the denormals, if there is a failure.
579 if( containsDenormals )
580 {
581 // If implementation decide to flush subnormals to zero,
582 // max error needs to be adjusted
583 maxErr1 += 4 * FLT_MIN;
584 maxErr2 += 4 * FLT_MIN;
585 maxErr3 += 4 * FLT_MIN;
586 maxErr4 += 4 * FLT_MIN;
587
588 if(gTestMipmaps)
589 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
590 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
591 imageSampler, expected, 0, NULL,lod );
592 else
593 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
594 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
595 imageSampler, expected, 0, NULL );
596
597 err1 = ABS_ERROR(resultPtr[0], expected[0]);
598 err2 = ABS_ERROR(resultPtr[1], expected[1]);
599 err3 = ABS_ERROR(resultPtr[2], expected[2]);
600 err4 = ABS_ERROR(resultPtr[3], expected[3]);
601 }
602 }
603
604 // If the final result DOES match, then we've found a valid result and we're done with this pixel.
605 found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2) && (err3 <= maxErr3) && (err4 <= maxErr4);
606 }//norm_offset_x
607 }//norm_offset_y
608
609
610 // Step 2: If we did not find a match, then print out debugging info.
611 if (!found_pixel) {
612 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
613 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
614 checkOnlyOnePixel = 0;
615 int shouldReturn = 0;
616 for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
617 for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
618
619 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
620 // E.g., test one pixel.
621 if (!imageSampler->normalized_coords
622 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
623 || NORM_OFFSET == 0)
624 {
625 norm_offset_x = 0.0f;
626 norm_offset_y = 0.0f;
627 checkOnlyOnePixel = 1;
628 }
629
630 int containsDenormals = 0;
631 FloatPixel maxPixel;
632 if(gTestMipmaps)
633 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
634 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
635 imageSampler, expected, 0, &containsDenormals, lod );
636 else
637 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
638 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
639 imageSampler, expected, 0, &containsDenormals );
640
641 float err1 = ABS_ERROR(resultPtr[0], expected[0]);
642 float err2 = ABS_ERROR(resultPtr[1], expected[1]);
643 float err3 = ABS_ERROR(resultPtr[2], expected[2]);
644 float err4 = ABS_ERROR(resultPtr[3], expected[3]);
645 float maxErr1 =
646 std::max(maxErr * maxPixel.p[0], FLT_MIN);
647 float maxErr2 =
648 std::max(maxErr * maxPixel.p[1], FLT_MIN);
649 float maxErr3 =
650 std::max(maxErr * maxPixel.p[2], FLT_MIN);
651 float maxErr4 =
652 std::max(maxErr * maxPixel.p[3], FLT_MIN);
653
654
655 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
656 ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
657 {
658 //try flushing the denormals, if there is a failure.
659 if( containsDenormals )
660 {
661 maxErr1 += 4 * FLT_MIN;
662 maxErr2 += 4 * FLT_MIN;
663 maxErr3 += 4 * FLT_MIN;
664 maxErr4 += 4 * FLT_MIN;
665
666 if(gTestMipmaps)
667 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
668 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
669 imageSampler, expected, 0, NULL, lod );
670 else
671 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
672 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
673 imageSampler, expected, 0, NULL );
674
675 err1 = ABS_ERROR(resultPtr[0], expected[0]);
676 err2 = ABS_ERROR(resultPtr[1], expected[1]);
677 err3 = ABS_ERROR(resultPtr[2], expected[2]);
678 err4 = ABS_ERROR(resultPtr[3], expected[3]);
679 }
680 }
681 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
682 ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
683 {
684 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
685
686 float tempOut[4];
687 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
688 expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
689
690 log_error( "Step by step:\n" );
691 FloatPixel temp;
692 if( gTestMipmaps )
693 temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
694 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
695 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
696 else
697 temp = sample_image_pixel_float_offset( imageValues, imageInfo,
698 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
699 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
700 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n",
701 Ulp_Error( resultPtr[0], expected[0] ),
702 Ulp_Error( resultPtr[1], expected[1] ),
703 Ulp_Error( resultPtr[2], expected[2] ),
704 Ulp_Error( resultPtr[3], expected[3] ),
705 Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
706
707 } else {
708 log_error("Test error: we should have detected this passing above.\n");
709 }
710
711 }//norm_offset_x
712 }//norm_offset_y
713 if( shouldReturn )
714 return 1;
715 } // if (!found_pixel)
716
717 resultPtr += 4;
718 }
719 }
720 }
721 /*
722 * UINT output type
723 */
724 else if( outputType == kUInt )
725 {
726 // Validate unsigned integer results
727 unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
728 unsigned int expected[4];
729 float error;
730 for( size_t y = 0, j = 0; y < height_lod ; y++ )
731 {
732 for( size_t x = 0; x < width_lod ; x++, j++ )
733 {
734 // Step 1: go through and see if the results verify for the pixel
735 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
736 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
737 int checkOnlyOnePixel = 0;
738 int found_pixel = 0;
739 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
740 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
741
742 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
743 // E.g., test one pixel.
744 if (!imageSampler->normalized_coords
745 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
746 || NORM_OFFSET == 0)
747 {
748 norm_offset_x = 0.0f;
749 norm_offset_y = 0.0f;
750 checkOnlyOnePixel = 1;
751 }
752
753 if ( gTestMipmaps )
754 sample_image_pixel_offset<unsigned int>( (char*)imagePtr, imageInfo,
755 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
756 imageSampler, expected, lod );
757 else
758 sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
759 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
760 imageSampler, expected);
761
762
763 error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
764 errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
765
766 if (error <= MAX_ERR)
767 found_pixel = 1;
768 }//norm_offset_x
769 }//norm_offset_y
770
771 // Step 2: If we did not find a match, then print out debugging info.
772 if (!found_pixel) {
773 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
774 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
775 checkOnlyOnePixel = 0;
776 int shouldReturn = 0;
777 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
778 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
779
780 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
781 // E.g., test one pixel.
782 if (!imageSampler->normalized_coords
783 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
784 || NORM_OFFSET == 0)
785 {
786 norm_offset_x = 0.0f;
787 norm_offset_y = 0.0f;
788 checkOnlyOnePixel = 1;
789 }
790
791 if( gTestMipmaps )
792 sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
793 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
794 imageSampler, expected, lod );
795 else
796 sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
797 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
798 imageSampler, expected);
799
800
801 error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
802 errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
803
804 if( error > MAX_ERR )
805 {
806 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
807
808 shouldReturn |= determine_validation_error<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
809 expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
810 } else {
811 log_error("Test error: we should have detected this passing above.\n");
812 }
813 }//norm_offset_x
814 }//norm_offset_y
815 if( shouldReturn )
816 return 1;
817 } // if (!found_pixel)
818
819 resultPtr += 4;
820 }
821 }
822 }
823 /*
824 * INT output type
825 */
826 else
827 {
828 // Validate integer results
829 int *resultPtr = (int *)(char *)resultValues;
830 int expected[4];
831 float error;
832 for( size_t y = 0, j = 0; y < height_lod ; y++ )
833 {
834 for( size_t x = 0; x < width_lod; x++, j++ )
835 {
836 // Step 1: go through and see if the results verify for the pixel
837 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
838 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
839 int checkOnlyOnePixel = 0;
840 int found_pixel = 0;
841 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
842 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
843
844 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
845 // E.g., test one pixel.
846 if (!imageSampler->normalized_coords
847 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
848 || NORM_OFFSET == 0)
849 {
850 norm_offset_x = 0.0f;
851 norm_offset_y = 0.0f;
852 checkOnlyOnePixel = 1;
853 }
854
855 if ( gTestMipmaps )
856 sample_image_pixel_offset<int>( imagePtr, imageInfo,
857 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
858 imageSampler, expected , lod);
859 else
860 sample_image_pixel_offset<int>( imageValues, imageInfo,
861 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
862 imageSampler, expected );
863
864
865 error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
866 errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
867
868 if (error <= MAX_ERR)
869 found_pixel = 1;
870 }//norm_offset_x
871 }//norm_offset_y
872
873 // Step 2: If we did not find a match, then print out debugging info.
874 if (!found_pixel) {
875 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
876 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
877 checkOnlyOnePixel = 0;
878 int shouldReturn = 0;
879 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
880 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
881
882 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
883 // E.g., test one pixel.
884 if (!imageSampler->normalized_coords
885 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
886 || NORM_OFFSET == 0)
887 {
888 norm_offset_x = 0.0f;
889 norm_offset_y = 0.0f;
890 checkOnlyOnePixel = 1;
891 }
892
893 if ( gTestMipmaps )
894 sample_image_pixel_offset<int>( imageValues, imageInfo,
895 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
896 imageSampler, expected, lod );
897 else
898 sample_image_pixel_offset<int>( imageValues, imageInfo,
899 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
900 imageSampler, expected );
901
902
903 error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
904 errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
905
906 if( error > MAX_ERR )
907 {
908 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
909
910 shouldReturn |= determine_validation_error<int>( imagePtr, imageInfo, imageSampler, resultPtr,
911 expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
912 } else {
913 log_error("Test error: we should have detected this passing above.\n");
914 }
915 }//norm_offset_x
916 }//norm_offset_y
917 if( shouldReturn )
918 return 1;
919 } // if (!found_pixel)
920
921 resultPtr += 4;
922 }
923 }
924 }
925 return 0;
926 }
927
validate_image_2D_sRGB_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)928 int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
929 ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
930 {
931 // Validate results element by element
932 size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
933 size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
934 /*
935 * FLOAT output type
936 */
937 if( outputType == kFloat )
938 {
939 // Validate float results
940 float *resultPtr = (float *)(char *)resultValues;
941 float expected[4], error=0.0f;
942 float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
943 for( size_t y = 0, j = 0; y < height_lod; y++ )
944 {
945 for( size_t x = 0; x < width_lod; x++, j++ )
946 {
947 // Step 1: go through and see if the results verify for the pixel
948 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
949 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
950 int checkOnlyOnePixel = 0;
951 int found_pixel = 0;
952 float offset = NORM_OFFSET;
953 if (!imageSampler->normalized_coords
954 || imageSampler->filter_mode != CL_FILTER_NEAREST
955 || NORM_OFFSET == 0
956 #if defined( __APPLE__ )
957 // Apple requires its CPU implementation to do correctly
958 // rounded address arithmetic in all modes
959 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
960 #endif
961 )
962 offset = 0.0f; // Loop only once
963
964 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
965 for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
966
967
968 // Try sampling the pixel, without flushing denormals.
969 int containsDenormals = 0;
970 FloatPixel maxPixel;
971 if ( gTestMipmaps )
972 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
973 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
974 imageSampler, expected, 0, &containsDenormals, lod );
975 else
976 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
977 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
978 imageSampler, expected, 0, &containsDenormals );
979 float err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
980 sRGBmap(expected[0]));
981 float err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
982 sRGBmap(expected[1]));
983 float err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
984 sRGBmap(expected[2]));
985 float err4 = ABS_ERROR(resultPtr[3], expected[3]);
986 float maxErr = 0.5;
987
988 // Check if the result matches.
989 if( ! (err1 <= maxErr) || ! (err2 <= maxErr) ||
990 ! (err3 <= maxErr) || ! (err4 <= maxErr) )
991 {
992 //try flushing the denormals, if there is a failure.
993 if( containsDenormals )
994 {
995 // If implementation decide to flush subnormals to zero,
996 // max error needs to be adjusted
997 maxErr += 4 * FLT_MIN;
998
999 if(gTestMipmaps)
1000 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1001 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1002 imageSampler, expected, 0, NULL,lod );
1003 else
1004 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1005 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1006 imageSampler, expected, 0, NULL );
1007
1008 err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1009 sRGBmap(expected[0]));
1010 err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1011 sRGBmap(expected[1]));
1012 err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1013 sRGBmap(expected[2]));
1014 err4 = ABS_ERROR(resultPtr[3], expected[3]);
1015 }
1016 }
1017
1018 // If the final result DOES match, then we've found a valid result and we're done with this pixel.
1019 found_pixel = (err1 <= maxErr) && (err2 <= maxErr) && (err3 <= maxErr) && (err4 <= maxErr);
1020 }//norm_offset_x
1021 }//norm_offset_y
1022
1023
1024 // Step 2: If we did not find a match, then print out debugging info.
1025 if (!found_pixel) {
1026 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
1027 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
1028 checkOnlyOnePixel = 0;
1029 int shouldReturn = 0;
1030 for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
1031 for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
1032
1033 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
1034 // E.g., test one pixel.
1035 if (!imageSampler->normalized_coords
1036 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
1037 || NORM_OFFSET == 0)
1038 {
1039 norm_offset_x = 0.0f;
1040 norm_offset_y = 0.0f;
1041 checkOnlyOnePixel = 1;
1042 }
1043
1044 int containsDenormals = 0;
1045 FloatPixel maxPixel;
1046 if(gTestMipmaps)
1047 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1048 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1049 imageSampler, expected, 0, &containsDenormals, lod );
1050 else
1051 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1052 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1053 imageSampler, expected, 0, &containsDenormals );
1054
1055 float err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1056 sRGBmap(expected[0]));
1057 float err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1058 sRGBmap(expected[1]));
1059 float err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1060 sRGBmap(expected[2]));
1061 float err4 = ABS_ERROR(resultPtr[3], expected[3]);
1062 float maxErr = 0.6;
1063
1064 if( ! (err1 <= maxErr) || ! (err2 <= maxErr) ||
1065 ! (err3 <= maxErr) || ! (err4 <= maxErr) )
1066 {
1067 //try flushing the denormals, if there is a failure.
1068 if( containsDenormals )
1069 {
1070 // If implementation decide to flush subnormals to zero,
1071 // max error needs to be adjusted
1072 maxErr += 4 * FLT_MIN;
1073 if(gTestMipmaps)
1074 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1075 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1076 imageSampler, expected, 0, NULL, lod );
1077 else
1078 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1079 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1080 imageSampler, expected, 0, NULL );
1081
1082 err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1083 sRGBmap(expected[0]));
1084 err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1085 sRGBmap(expected[1]));
1086 err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1087 sRGBmap(expected[2]));
1088 err4 = ABS_ERROR(resultPtr[3], expected[3]);
1089 }
1090 }
1091 if( ! (err1 <= maxErr) || ! (err2 <= maxErr) ||
1092 ! (err3 <= maxErr) || ! (err4 <= maxErr) )
1093 {
1094 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
1095
1096 float tempOut[4];
1097 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
1098 expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
1099
1100 log_error( "Step by step:\n" );
1101 FloatPixel temp;
1102 if( gTestMipmaps )
1103 temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
1104 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1105 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
1106 else
1107 temp = sample_image_pixel_float_offset( imageValues, imageInfo,
1108 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1109 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
1110 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n",
1111 Ulp_Error( resultPtr[0], expected[0] ),
1112 Ulp_Error( resultPtr[1], expected[1] ),
1113 Ulp_Error( resultPtr[2], expected[2] ),
1114 Ulp_Error( resultPtr[3], expected[3] ),
1115 Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
1116
1117 } else {
1118 log_error("Test error: we should have detected this passing above.\n");
1119 }
1120
1121 }//norm_offset_x
1122 }//norm_offset_y
1123 if( shouldReturn )
1124 return 1;
1125 } // if (!found_pixel)
1126
1127 resultPtr += 4;
1128 }
1129 }
1130 }
1131 else {
1132 log_error("Test error: NOT SUPPORTED.\n");
1133 }
1134 return 0;
1135 }
1136
validate_float_write_results(float * expected,float * actual,image_descriptor * imageInfo)1137 bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo )
1138 {
1139 bool pass = true;
1140 // Compare floats
1141 if( memcmp( expected, actual, sizeof( cl_float ) * get_format_channel_count( imageInfo->format ) ) != 0 )
1142 {
1143 // 8.3.3 Fix up cases where we have NaNs or flushed denorms; "all other values must be preserved"
1144 for ( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
1145 {
1146 if ( isnan( expected[j] ) && isnan( actual[j] ) )
1147 continue;
1148 if ( IsFloatSubnormal( expected[j] ) && actual[j] == 0.0f )
1149 continue;
1150 if (expected[j] != actual[j])
1151 {
1152 pass = false;
1153 break;
1154 }
1155 }
1156 }
1157 return pass;
1158 }
1159
validate_half_write_results(cl_half * expected,cl_half * actual,image_descriptor * imageInfo)1160 bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo )
1161 {
1162 bool pass = true;
1163 // Compare half floats
1164 if (memcmp(expected, actual, sizeof( cl_half ) * get_format_channel_count(imageInfo->format)) != 0) {
1165
1166 // 8.3.2 Fix up cases where we have NaNs or generated half denormals
1167 for ( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ ) {
1168 if ( is_half_nan( expected[j] ) && is_half_nan( actual[j] ) )
1169 continue;
1170 if ( is_half_denorm( expected[j] ) && is_half_zero( actual[j] ) )
1171 continue;
1172 if (expected[j] != actual[j])
1173 {
1174 pass = false;
1175 break;
1176 }
1177 }
1178 }
1179 return pass;
1180 }
1181
test_read_image_2D(cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,image_sampler_data * imageSampler,bool useFloatCoords,ExplicitType outputType,MTdata d)1182 int test_read_image_2D( cl_context context, cl_command_queue queue, cl_kernel kernel,
1183 image_descriptor *imageInfo, image_sampler_data *imageSampler,
1184 bool useFloatCoords, ExplicitType outputType, MTdata d )
1185 {
1186 int error;
1187 static int initHalf = 0;
1188 cl_mem imageBuffer;
1189 cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY;
1190 size_t threads[2];
1191
1192 clMemWrapper xOffsets, yOffsets, results;
1193 clSamplerWrapper actualSampler;
1194 BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
1195
1196 // The DataBuffer template class really does use delete[], not free -- IRO
1197 BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
1198 BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
1199
1200 if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
1201 if( DetectFloatToHalfRoundingMode(queue) )
1202 return 1;
1203
1204 // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
1205 BufferOwningPtr<char> imageValues;
1206 generate_random_image_data( imageInfo, imageValues, d );
1207
1208 if( gDebugTrace )
1209 {
1210 log_info( " - Creating image %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height );
1211 if( gTestMipmaps )
1212 {
1213 log_info( " - with %d mip levels", (int) imageInfo->num_mip_levels );
1214 }
1215 }
1216
1217 // Construct testing sources
1218 clProtectedImage protImage;
1219 clMemWrapper unprotImage;
1220 cl_mem image;
1221
1222 if(gtestTypesToRun & kReadTests)
1223 {
1224 image_read_write_flags = CL_MEM_READ_ONLY;
1225 }
1226 else
1227 {
1228 image_read_write_flags = CL_MEM_READ_WRITE;
1229 }
1230
1231 if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
1232 {
1233 if (gTestImage2DFromBuffer)
1234 {
1235 generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
1236 imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
1237 imageInfo->rowPitch * imageInfo->height, maxImageUseHostPtrBackingStore, &error);
1238 test_error( error, "Unable to create buffer" );
1239 unprotImage = create_image_2d_buffer( context,
1240 image_read_write_flags,
1241 imageInfo->format,
1242 imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1243 imageBuffer, &error );
1244
1245 }
1246 else
1247 {
1248 // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
1249 // Do not use protected images for max image size test since it rounds the row size to a page size
1250 if (gTestMaxImages) {
1251 generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
1252 unprotImage = create_image_2d( context,
1253 image_read_write_flags | CL_MEM_USE_HOST_PTR,
1254 imageInfo->format,
1255 imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1256 maxImageUseHostPtrBackingStore, &error );
1257 }
1258 else
1259 {
1260 error = protImage.Create( context,
1261 image_read_write_flags,
1262 imageInfo->format, imageInfo->width, imageInfo->height );
1263 }
1264 }
1265
1266 if( error != CL_SUCCESS )
1267 {
1268 if (gTestImage2DFromBuffer) {
1269 clReleaseMemObject(imageBuffer);
1270 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1271 log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1272 return 0;
1273 }
1274 }
1275
1276 log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1277 return error;
1278 }
1279
1280 if (gTestMaxImages || gTestImage2DFromBuffer)
1281 image = (cl_mem)unprotImage;
1282 else
1283 image = (cl_mem)protImage;
1284 }
1285 else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
1286 {
1287 if (gTestImage2DFromBuffer)
1288 {
1289 imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1290 imageInfo->rowPitch * imageInfo->height, imageValues, &error);
1291 test_error( error, "Unable to create buffer" );
1292 unprotImage = create_image_2d_buffer( context,
1293 image_read_write_flags,
1294 imageInfo->format,
1295 imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1296 imageBuffer, &error );
1297
1298 }
1299 else
1300 {
1301 // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
1302 unprotImage = create_image_2d( context,
1303 image_read_write_flags | CL_MEM_COPY_HOST_PTR,
1304 imageInfo->format,
1305 imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1306 imageValues, &error );
1307 }
1308 if( error != CL_SUCCESS )
1309 {
1310 if (gTestImage2DFromBuffer) {
1311 clReleaseMemObject(imageBuffer);
1312 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1313 log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1314 return 0;
1315 }
1316 }
1317
1318 log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1319 return error;
1320 }
1321 image = unprotImage;
1322 }
1323 else // Either CL_MEM_ALLOC_HOST_PTR or none
1324 {
1325 if( gTestMipmaps )
1326 {
1327 cl_image_desc image_desc = {0};
1328 image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
1329 image_desc.image_width = imageInfo->width;
1330 image_desc.image_height = imageInfo->height;
1331 image_desc.num_mip_levels = imageInfo->num_mip_levels;
1332 unprotImage = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
1333 }
1334 else if (gTestImage2DFromBuffer)
1335 {
1336 imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | gMemFlagsToUse,
1337 imageInfo->rowPitch * imageInfo->height, imageValues, &error);
1338 test_error( error, "Unable to create buffer" );
1339 unprotImage = create_image_2d_buffer( context,
1340 image_read_write_flags,
1341 imageInfo->format,
1342 imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1343 imageBuffer, &error );
1344
1345 }
1346 else
1347 {
1348 // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
1349 // it works just as if no flag is specified, so we just do the same thing either way
1350 unprotImage = create_image_2d( context,
1351 image_read_write_flags | gMemFlagsToUse,
1352 imageInfo->format,
1353 imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1354 imageValues, &error );
1355 }
1356 if( error != CL_SUCCESS )
1357 {
1358 if (gTestImage2DFromBuffer) {
1359 clReleaseMemObject(imageBuffer);
1360 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1361 log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1362 return 0;
1363 }
1364 }
1365
1366 log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1367 return error;
1368 }
1369 image = unprotImage;
1370 }
1371
1372 if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
1373 {
1374 if( gDebugTrace )
1375 log_info( " - Writing image...\n" );
1376
1377 size_t origin[ 3 ] = { 0, 0, 0 };
1378 size_t region[ 3 ] = { imageInfo->width, imageInfo->height, 1 };
1379
1380 if(!gTestMipmaps)
1381 {
1382 error = clEnqueueWriteImage(queue, image, CL_TRUE,
1383 origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), 0,
1384 imageValues, 0, NULL, NULL);
1385 if (error != CL_SUCCESS)
1386 {
1387 log_error( "ERROR: Unable to write to 2D image of size %d x %d\n", (int)imageInfo->width, (int)imageInfo->height );
1388 return error;
1389 }
1390 }
1391 else
1392 {
1393 size_t tmpNextLevelOffset = 0;
1394 for(size_t level = 0; level < imageInfo->num_mip_levels; level++)
1395 {
1396 origin[2] = level;
1397 error = clEnqueueWriteImage(queue, image, CL_TRUE,
1398 origin, region, (( gEnablePitch || gTestImage2DFromBuffer) ? imageInfo->rowPitch : 0 ), 0,
1399 (char*)imageValues + tmpNextLevelOffset, 0, NULL, NULL);
1400 tmpNextLevelOffset += region[0]*region[1]*get_pixel_size(imageInfo->format);
1401 region[0] = (region[0] >> 1) ? (region[0] >> 1) : 1;
1402 region[1] = (region[1] >> 1) ? (region[1] >> 1) : 1;
1403 }
1404 }
1405 }
1406
1407 if( gDebugTrace )
1408 log_info( " - Creating kernel arguments...\n" );
1409
1410 xOffsets =
1411 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
1412 sizeof(cl_float) * imageInfo->width * imageInfo->height,
1413 xOffsetValues, &error);
1414 test_error( error, "Unable to create x offset buffer" );
1415 yOffsets =
1416 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
1417 sizeof(cl_float) * imageInfo->width * imageInfo->height,
1418 yOffsetValues, &error);
1419 test_error( error, "Unable to create y offset buffer" );
1420 results = clCreateBuffer(context, CL_MEM_READ_WRITE,
1421 get_explicit_type_size(outputType) * 4
1422 * imageInfo->width * imageInfo->height,
1423 NULL, &error);
1424 test_error( error, "Unable to create result buffer" );
1425
1426 // Create sampler to use
1427 actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
1428 test_error(error, "Unable to create image sampler");
1429
1430 // Set arguments
1431 int idx = 0;
1432 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
1433 test_error( error, "Unable to set kernel arguments" );
1434 if( !gUseKernelSamplers )
1435 {
1436 error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
1437 test_error( error, "Unable to set kernel arguments" );
1438 }
1439 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
1440 test_error( error, "Unable to set kernel arguments" );
1441 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
1442 test_error( error, "Unable to set kernel arguments" );
1443 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
1444 test_error( error, "Unable to set kernel arguments" );
1445
1446 // A cast of troublesome offsets. The first one has to be zero.
1447 const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
1448 int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
1449 int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
1450 int loopCount = 2 * float_offset_count;
1451 if( ! useFloatCoords )
1452 loopCount = 1;
1453 if (gTestMaxImages) {
1454 loopCount = 1;
1455 log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
1456 }
1457
1458 if(gtestTypesToRun & kReadWriteTests)
1459 {
1460 loopCount = 1;
1461 }
1462
1463 // Get the maximum absolute error for this format
1464 double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler);
1465 if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
1466
1467 if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
1468 initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
1469 if (initHalf) {
1470 log_info("Half rounding mode successfully detected.\n");
1471 }
1472 }
1473
1474 size_t nextLevelOffset = 0;
1475 size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
1476 for( size_t lod = 0; (gTestMipmaps && (lod < imageInfo->num_mip_levels))|| (!gTestMipmaps && lod < 1); lod ++)
1477 {
1478 size_t resultValuesSize = width_lod * height_lod * get_explicit_type_size( outputType ) * 4;
1479 BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
1480 float lod_float = (float)lod;
1481 char *imagePtr = (char *)imageValues + nextLevelOffset;
1482 if( gTestMipmaps )
1483 {
1484 if(gDebugTrace)
1485 log_info("\t- Working at mip level %d\n", lod);
1486 error = clSetKernelArg( kernel, idx, sizeof(float), &lod_float);
1487 }
1488
1489 // Validate results element by element
1490 for( int q = 0; q < loopCount; q++ )
1491 {
1492 float offset = float_offsets[ q % float_offset_count ];
1493
1494 // Init the coordinates
1495 InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues,
1496 q>=float_offset_count ? -offset: offset,
1497 q>=float_offset_count ? offset: -offset, imageSampler->normalized_coords, d, lod );
1498
1499 error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, xOffsetValues, 0, NULL, NULL );
1500 test_error( error, "Unable to write x offsets" );
1501 error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, yOffsetValues, 0, NULL, NULL );
1502 test_error( error, "Unable to write y offsets" );
1503
1504 // Get results
1505 memset( resultValues, 0xff, resultValuesSize );
1506 clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
1507
1508 // Run the kernel
1509 threads[0] = (size_t)width_lod;
1510 threads[1] = (size_t)height_lod;
1511 error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
1512 test_error( error, "Unable to run kernel" );
1513
1514 if( gDebugTrace )
1515 log_info( " reading results, %ld kbytes\n", (unsigned long)( width_lod * height_lod * get_explicit_type_size( outputType ) * 4 / 1024 ) );
1516
1517 error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * height_lod * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL ); //XXX check
1518 test_error( error, "Unable to read results from kernel" );
1519 if( gDebugTrace )
1520 log_info( " results read\n" );
1521
1522 int retCode;
1523 switch (imageInfo->format->image_channel_order) {
1524 case CL_DEPTH:
1525 retCode = validate_image_2D_depth_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1526 break;
1527 case CL_sRGB:
1528 case CL_sRGBx:
1529 case CL_sRGBA:
1530 case CL_sBGRA:
1531 retCode = validate_image_2D_sRGB_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1532 break;
1533 default:
1534 retCode = validate_image_2D_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1535 }
1536 if (retCode)
1537 return retCode;
1538 }
1539 if ( gTestMipmaps )
1540 {
1541 nextLevelOffset += width_lod * height_lod * get_pixel_size( imageInfo->format );
1542 width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
1543 height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
1544 }
1545 }
1546
1547 if (gTestImage2DFromBuffer) clReleaseMemObject(imageBuffer);
1548
1549 return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
1550 }
1551
test_read_image_set_2D(cl_device_id device,cl_context context,cl_command_queue queue,const cl_image_format * format,image_sampler_data * imageSampler,bool floatCoords,ExplicitType outputType)1552 int test_read_image_set_2D(cl_device_id device, cl_context context,
1553 cl_command_queue queue,
1554 const cl_image_format *format,
1555 image_sampler_data *imageSampler, bool floatCoords,
1556 ExplicitType outputType)
1557 {
1558 char programSrc[10240];
1559 const char *ptr;
1560 const char *readFormat;
1561 clProgramWrapper program;
1562 clKernelWrapper kernel;
1563 const char *KernelSourcePattern = NULL;
1564
1565 if (gTestImage2DFromBuffer)
1566 {
1567 if (format->image_channel_order == CL_RGB || format->image_channel_order == CL_RGBx)
1568 {
1569 switch (format->image_channel_data_type)
1570 {
1571 case CL_UNORM_INT8:
1572 case CL_UNORM_INT16:
1573 case CL_SNORM_INT8:
1574 case CL_SNORM_INT16:
1575 case CL_HALF_FLOAT:
1576 case CL_FLOAT:
1577 case CL_SIGNED_INT8:
1578 case CL_SIGNED_INT16:
1579 case CL_SIGNED_INT32:
1580 case CL_UNSIGNED_INT8:
1581 case CL_UNSIGNED_INT16:
1582 case CL_UNSIGNED_INT32:
1583 log_info( "Skipping image format: %s %s\n", GetChannelOrderName( format->image_channel_order ),
1584 GetChannelTypeName( format->image_channel_data_type ));
1585 return 0;
1586 default:
1587 break;
1588 }
1589 }
1590 }
1591
1592
1593 RandomSeed seed( gRandomSeed );
1594 int error;
1595
1596 // Get our operating params
1597 size_t maxWidth, maxHeight;
1598 cl_ulong maxAllocSize, memSize;
1599 image_descriptor imageInfo = { 0x0 };
1600 size_t pixelSize;
1601
1602 imageInfo.format = format;
1603 imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
1604 imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
1605 pixelSize = get_pixel_size( imageInfo.format );
1606
1607 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
1608 error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
1609 error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
1610 error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
1611 test_error( error, "Unable to get max image 2D size from device" );
1612
1613 if (memSize > (cl_ulong)SIZE_MAX) {
1614 memSize = (cl_ulong)SIZE_MAX;
1615 }
1616
1617 // Determine types
1618 if( outputType == kInt )
1619 readFormat = "i";
1620 else if( outputType == kUInt )
1621 readFormat = "ui";
1622 else // kFloat
1623 readFormat = "f";
1624
1625 // Construct the source
1626 const char *samplerArg = samplerKernelArg;
1627 char samplerVar[ 1024 ] = "";
1628 if( gUseKernelSamplers )
1629 {
1630 get_sampler_kernel_code( imageSampler, samplerVar );
1631 samplerArg = "";
1632 }
1633
1634 if(gtestTypesToRun & kReadTests)
1635 {
1636 KernelSourcePattern = read2DKernelSourcePattern;
1637 }
1638 else
1639 {
1640 KernelSourcePattern = read_write2DKernelSourcePattern;
1641 }
1642
1643
1644 sprintf(programSrc, KernelSourcePattern,
1645 gTestMipmaps
1646 ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable"
1647 : "",
1648 (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t"
1649 : "image2d_t",
1650 samplerArg, get_explicit_type_name(outputType),
1651 (format->image_channel_order == CL_DEPTH) ? "" : "4",
1652 gTestMipmaps ? ", float lod" : " ", samplerVar,
1653 gTestMipmaps ? lodOffsetSource : offsetSource,
1654 floatCoords ? floatKernelSource : intCoordKernelSource, readFormat,
1655 gTestMipmaps ? ", lod" : " ");
1656
1657 ptr = programSrc;
1658 error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
1659 "sample_kernel");
1660 test_error( error, "Unable to create testing kernel" );
1661
1662 if( gTestSmallImages )
1663 {
1664 for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
1665 {
1666 imageInfo.rowPitch = imageInfo.width * pixelSize;
1667 for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
1668 {
1669 if( gTestMipmaps )
1670 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1671
1672 if( gDebugTrace )
1673 log_info( " at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
1674
1675 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1676 if( retCode )
1677 return retCode;
1678 }
1679 }
1680 }
1681 else if( gTestMaxImages )
1682 {
1683 // Try a specific set of maximum sizes
1684 size_t numbeOfSizes;
1685 size_t sizes[100][3];
1686
1687 get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format, CL_TRUE);
1688
1689 for( size_t idx = 0; idx < numbeOfSizes; idx++ )
1690 {
1691 imageInfo.width = sizes[ idx ][ 0 ];
1692 imageInfo.height = sizes[ idx ][ 1 ];
1693 imageInfo.rowPitch = imageInfo.width * pixelSize;
1694 log_info("Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
1695
1696 if( gTestMipmaps )
1697 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1698
1699 if( gDebugTrace )
1700 log_info( " at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
1701 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1702 if( retCode )
1703 return retCode;
1704 }
1705 }
1706 else if( gTestRounding )
1707 {
1708 uint64_t typeRange = 1LL << ( get_format_type_size( imageInfo.format ) * 8 );
1709 typeRange /= pixelSize / get_format_type_size( imageInfo.format );
1710 imageInfo.height = (size_t)( ( typeRange + 255LL ) / 256LL );
1711 imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
1712 while( imageInfo.height >= maxHeight / 2 )
1713 {
1714 imageInfo.width <<= 1;
1715 imageInfo.height >>= 1;
1716 }
1717
1718 while( imageInfo.width >= maxWidth / 2 )
1719 imageInfo.width >>= 1;
1720 imageInfo.rowPitch = imageInfo.width * pixelSize;
1721
1722 gRoundingStartValue = 0;
1723 do
1724 {
1725 if( gDebugTrace )
1726 log_info( " at size %d,%d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, (int)imageInfo.height, gRoundingStartValue, typeRange );
1727 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1728 if( retCode )
1729 return retCode;
1730
1731 gRoundingStartValue += imageInfo.width * imageInfo.height * pixelSize / get_format_type_size( imageInfo.format );
1732
1733 } while( gRoundingStartValue < typeRange );
1734 }
1735 else
1736 {
1737 cl_uint imagePitchAlign = 0;
1738 if (gTestImage2DFromBuffer)
1739 {
1740 #if defined(CL_DEVICE_IMAGE_PITCH_ALIGNMENT)
1741 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof( cl_uint ), &imagePitchAlign, NULL );
1742 #endif
1743 if (!imagePitchAlign || error) {
1744 test_error( error, "Unable to get CL_DEVICE_IMAGE_PITCH_ALIGNMENT from device" );
1745 imagePitchAlign = 1;
1746 }
1747 }
1748
1749 int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
1750 int maxHeightRange = (int) reduceImageSizeRange(maxHeight);
1751
1752 for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
1753 {
1754 cl_ulong size;
1755 // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
1756 // image, the result array, plus offset arrays, will fit in the global ram space
1757 do
1758 {
1759 imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, seed );
1760 imageInfo.height = (size_t)random_log_in_range( 16, maxHeightRange, seed );
1761
1762 imageInfo.rowPitch = imageInfo.width * pixelSize;
1763 if( gTestMipmaps )
1764 {
1765 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1766 size = 4 * compute_mipmapped_image_size(imageInfo);
1767 }
1768 else
1769 {
1770 if( gEnablePitch )
1771 {
1772 size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
1773 imageInfo.rowPitch += extraWidth * pixelSize;
1774 }
1775
1776 // if we are creating a 2D image from a buffer, make sure that the rowpitch is aligned to CL_DEVICE_IMAGE_PITCH_ALIGNMENT_APPLE
1777 if (gTestImage2DFromBuffer)
1778 {
1779 size_t pitch = imagePitchAlign * pixelSize;
1780 imageInfo.rowPitch = ((imageInfo.rowPitch + pitch - 1) / pitch ) * pitch;
1781 }
1782
1783 size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
1784 }
1785 } while( size > maxAllocSize || ( size * 3 ) > memSize );
1786
1787 if( gDebugTrace )
1788 log_info( " at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
1789 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1790 if( retCode )
1791 return retCode;
1792 }
1793 }
1794
1795 return 0;
1796 }
1797