xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/images/kernel_read_write/test_write_3D.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "../testBase.h"
17 #include "test_common.h"
18 
19 #if !defined(_WIN32)
20 #include <sys/mman.h>
21 #endif
22 
23 extern cl_mem_flags gMemFlagsToUse;
24 extern int gtestTypesToRun;
25 
26 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
27 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
28 
29 // Utility function to clamp down image sizes for certain tests to avoid
30 // using too much memory.
reduceImageSizeRange(size_t maxDimSize,MTdata & seed)31 static size_t reduceImageSizeRange(size_t maxDimSize, MTdata& seed) {
32   size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
33   if (DimSize > (size_t) 128)
34     return 128;
35   else
36     return DimSize;
37 }
38 
reduceImageDepth(size_t maxDimSize,MTdata & seed)39 static size_t reduceImageDepth(size_t maxDimSize, MTdata& seed) {
40   size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
41   if (DimSize > (size_t) 32)
42     return 32;
43   else
44     return DimSize;
45 }
46 
47 
48 const char *write3DKernelSourcePattern =
49     "%s"
50     "%s\n"
51     "__kernel void sample_kernel( __global %s4 *input, write_only image3d_t "
52     "output %s )\n"
53     "{\n"
54     "   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
55     "get_global_id(2);\n"
56     "%s"
57     "   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset "
58     "]);\n"
59     "}";
60 
61 const char *readwrite3DKernelSourcePattern =
62     "%s"
63     "%s\n"
64     "__kernel void sample_kernel( __global %s4 *input, read_write image3d_t "
65     "output %s )\n"
66     "{\n"
67     "   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
68     "get_global_id(2);\n"
69     "%s"
70     "   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset "
71     "]);\n"
72     "}";
73 
74 const char *khr3DWritesPragma =
75 "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
76 
77 const char *offset3DSource=
78 "   int offset = tidZ*get_image_width(output)*get_image_height(output) + tidY*get_image_width(output) + tidX;\n";
79 
80 const char *offset3DLodSource =
81 "   int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
82 "   int height_lod = ( get_image_height(output) >> lod ) ? ( get_image_height(output) >> lod ) : 1;\n"
83 "   int offset = tidZ*width_lod*height_lod + tidY*width_lod + tidX;\n";
84 
test_write_image_3D(cl_device_id device,cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,ExplicitType inputType,MTdata d)85 int test_write_image_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
86                         image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
87 {
88     int                 totalErrors = 0;
89 
90     size_t              num_flags   = 0;
91     const cl_mem_flags  *mem_flag_types = NULL;
92     const char *        *mem_flag_names = NULL;
93     const cl_mem_flags  write_only_mem_flag_types[2] = {  CL_MEM_WRITE_ONLY,   CL_MEM_READ_WRITE };
94     const char *        write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
95     const cl_mem_flags  read_write_mem_flag_types[1] = {  CL_MEM_READ_WRITE};
96     const char *        read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
97 
98     if(gtestTypesToRun & kWriteTests)
99     {
100         mem_flag_types = write_only_mem_flag_types;
101         mem_flag_names = write_only_mem_flag_names;
102         num_flags      = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
103     }
104     else
105     {
106         mem_flag_types = read_write_mem_flag_types;
107         mem_flag_names = read_write_mem_flag_names;
108         num_flags      = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
109     }
110 
111     size_t pixelSize = get_pixel_size( imageInfo->format );
112 
113     for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
114     {
115         int error;
116         size_t threads[3];
117         bool verifyRounding = false;
118         int forceCorrectlyRoundedWrites = 0;
119 
120 #if defined( __APPLE__ )
121         // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
122         if( GetDeviceType(device) == CL_DEVICE_TYPE_CPU )
123             forceCorrectlyRoundedWrites = 1;
124 #endif
125 
126         if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
127             if( DetectFloatToHalfRoundingMode(queue) )
128                 return 1;
129 
130         BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
131 
132         create_random_image_data( inputType, imageInfo, imageValues, d );
133 
134         if(!gTestMipmaps)
135         {
136             if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT )
137             {
138                 /* Pilot data for sRGB images */
139                 if(is_sRGBA_order(imageInfo->format->image_channel_order))
140                 {
141                     // We want to generate ints (mostly) in range of the target format which should be [0,255]
142                     // However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
143                     // it can test some out-of-range data points
144                     const unsigned int test_range_ext = 16;
145                     int formatMin = 0 - test_range_ext;
146                     int formatMax = 255 + test_range_ext;
147                     int pixel_value = 0;
148 
149                     // First, fill with arbitrary floats
150                     for( size_t z = 0; z < imageInfo->depth; z++ )
151                     {
152                         for( size_t y = 0; y < imageInfo->height; y++ )
153                         {
154                             float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
155                             for( size_t i = 0; i < imageInfo->width * 4; i++ )
156                             {
157                                 pixel_value = random_in_range( formatMin, (int)formatMax, d );
158                                 inputValues[ i ] = (float)(pixel_value/255.0f);
159                             }
160                         }
161                     }
162 
163                     // Throw a few extra test values in there
164                     float *inputValues = (float *)(char*)imageValues;
165                     size_t i = 0;
166 
167                     // Piloting some debug inputs.
168                     inputValues[ i++ ] = -0.5f;
169                     inputValues[ i++ ] = 0.5f;
170                     inputValues[ i++ ] = 2.f;
171                     inputValues[ i++ ] = 0.5f;
172 
173                     // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
174                     // is correct
175                     if( imageInfo->width > 12 )
176                     {
177                         float formatMax = (float)get_format_max_int( imageInfo->format );
178                         inputValues[ i++ ] = 4.0f / formatMax;
179                         inputValues[ i++ ] = 4.3f / formatMax;
180                         inputValues[ i++ ] = 4.5f / formatMax;
181                         inputValues[ i++ ] = 4.7f / formatMax;
182                         inputValues[ i++ ] = 5.0f / formatMax;
183                         inputValues[ i++ ] = 5.3f / formatMax;
184                         inputValues[ i++ ] = 5.5f / formatMax;
185                         inputValues[ i++ ] = 5.7f / formatMax;
186                     }
187                 }
188                 else
189                 {
190                     // First, fill with arbitrary floats
191                     for( size_t z = 0; z < imageInfo->depth; z++ )
192                     {
193                         for( size_t y = 0; y < imageInfo->height; y++ )
194                         {
195                             float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
196                             for( size_t i = 0; i < imageInfo->width * 4; i++ )
197                                 inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
198                         }
199                     }
200 
201                     // Throw a few extra test values in there
202                     float *inputValues = (float *)(char*)imageValues;
203                     size_t i = 0;
204                     inputValues[ i++ ] = -0.0000000000009f;
205                     inputValues[ i++ ] = 1.f;
206                     inputValues[ i++ ] = -1.f;
207                     inputValues[ i++ ] = 2.f;
208 
209                     // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
210                     // is correct
211                     if( imageInfo->width > 12 )
212                     {
213                         float formatMax = (float)get_format_max_int( imageInfo->format );
214                         inputValues[ i++ ] = 4.0f / formatMax;
215                         inputValues[ i++ ] = 4.3f / formatMax;
216                         inputValues[ i++ ] = 4.5f / formatMax;
217                         inputValues[ i++ ] = 4.7f / formatMax;
218                         inputValues[ i++ ] = 5.0f / formatMax;
219                         inputValues[ i++ ] = 5.3f / formatMax;
220                         inputValues[ i++ ] = 5.5f / formatMax;
221                         inputValues[ i++ ] = 5.7f / formatMax;
222                         verifyRounding = true;
223                     }
224                 }
225             }
226             else if( inputType == kUInt )
227             {
228                 unsigned int *inputValues = (unsigned int*)(char*)imageValues;
229                 size_t i = 0;
230                 inputValues[ i++ ] = 0;
231                 inputValues[ i++ ] = 65535;
232                 inputValues[ i++ ] = 7271820;
233                 inputValues[ i++ ] = 0;
234             }
235         }
236 
237         // Construct testing sources
238         clProtectedImage protImage;
239         clMemWrapper unprotImage;
240         cl_mem image;
241 
242         if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
243         {
244             create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
245 
246             unprotImage = create_image_3d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
247                                           imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0,
248                                           maxImageUseHostPtrBackingStore, &error );
249 
250             if( error != CL_SUCCESS )
251             {
252                 log_error( "ERROR: Unable to create 3D image of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->depth, imageInfo->rowPitch, IGetErrorString( error ) );
253                 return error;
254             }
255 
256             image = (cl_mem)unprotImage;
257         }
258         else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
259         {
260             // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
261             // it works just as if no flag is specified, so we just do the same thing either way
262             // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
263             if(gTestMipmaps)
264             {
265                 cl_image_desc image_desc = {0};
266                 image_desc.image_type = imageInfo->type;
267                 image_desc.num_mip_levels = imageInfo->num_mip_levels;
268                 image_desc.image_width = imageInfo->width;
269                 image_desc.image_height = imageInfo->height;
270                 image_desc.image_depth = imageInfo->depth;
271 
272                 unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
273                                              imageInfo->format, &image_desc, NULL, &error);
274                 if( error != CL_SUCCESS )
275                 {
276                     log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %ld x %ld *%ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->height, imageInfo->depth,
277                                IGetErrorString( error ), mem_flag_names[mem_flag_index] );
278                     return error;
279                 }
280             }
281             else
282             {
283                 unprotImage = create_image_3d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
284                                               imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0, imageValues, &error );
285                 if( error != CL_SUCCESS )
286                 {
287                     log_error( "ERROR: Unable to create 3D image of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->depth, imageInfo->rowPitch, IGetErrorString( error ) );
288                     return error;
289                 }
290             }
291             image = unprotImage;
292         }
293 
294         error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
295         test_error( error, "Unable to set kernel arguments" );
296 
297         size_t width_lod = imageInfo->width;
298         size_t height_lod = imageInfo->height;
299         size_t depth_lod = imageInfo->depth;
300         size_t nextLevelOffset = 0;
301         size_t origin[ 4 ] = { 0, 0, 0, 0 };
302         size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->depth };
303 
304         int num_lod_loops = (gTestMipmaps)? imageInfo->num_mip_levels : 1;
305         for( int lod = 0; lod < num_lod_loops; lod++)
306         {
307             if(gTestMipmaps)
308             {
309                 error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
310             }
311             // Run the kernel
312             threads[0] = (size_t)width_lod;
313             threads[1] = (size_t)height_lod;
314             threads[2] = (size_t)depth_lod;
315 
316             clMemWrapper inputStream;
317 
318             char *imagePtrOffset = imageValues + nextLevelOffset;
319             inputStream =
320                 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
321                                get_explicit_type_size(inputType) * 4 * width_lod
322                                    * height_lod * depth_lod,
323                                imagePtrOffset, &error);
324             test_error( error, "Unable to create input buffer" );
325 
326             // Set arguments
327             error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
328             test_error( error, "Unable to set kernel arguments" );
329 
330             error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
331             test_error( error, "Unable to run kernel" );
332 
333             // Get results
334             size_t resultSize;
335             if(gTestMipmaps)
336                 resultSize = width_lod * height_lod * depth_lod * pixelSize;
337             else
338                 resultSize = imageInfo->slicePitch *imageInfo->depth;
339             clProtectedArray PA(resultSize);
340             char *resultValues = (char *)((void *)PA);
341 
342             if( gDebugTrace )
343                 log_info( "    reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
344 
345             origin[3] = lod;
346             region[0] = width_lod;
347             region[1] = height_lod;
348             region[2] = depth_lod;
349             error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
350             test_error( error, "Unable to read results from kernel" );
351             if( gDebugTrace )
352                 log_info( "    results read\n" );
353 
354             // Validate results element by element
355             char *imagePtr = (char*)imageValues + nextLevelOffset;
356             int numTries = 5;
357             for( size_t z = 0, i = 0; z < depth_lod; z++ )
358             {
359                 for( size_t y = 0; y < height_lod; y++ )
360                 {
361                     char *resultPtr;
362                     if( gTestMipmaps )
363                         resultPtr = (char *)resultValues + y * width_lod * pixelSize + z * width_lod * height_lod * pixelSize;
364                     else
365                         resultPtr = (char *)resultValues + y * imageInfo->rowPitch + z * imageInfo->slicePitch;
366                     for( size_t x = 0; x < width_lod; x++, i++ )
367                     {
368                         char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
369 
370                         // Convert this pixel
371                         if( inputType == kFloat )
372                             pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
373                         else if( inputType == kInt )
374                             pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
375                         else // if( inputType == kUInt )
376                             pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
377 
378                         // Compare against the results
379                         if(is_sRGBA_order(imageInfo->format->image_channel_order))
380                         {
381                             // Compare sRGB-mapped values
382                             cl_float expected[4]    = {0};
383                             cl_float* input_values  = (float*)imagePtr;
384                             cl_uchar *actual        = (cl_uchar*)resultPtr;
385                             float max_err           = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
386                             float err[4]            = {0.0f};
387 
388                             for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
389                             {
390                                 if(j < 3)
391                                 {
392                                     expected[j] = sRGBmap(input_values[j]);
393                                 }
394                                 else // there is no sRGB conversion for alpha component if it exists
395                                 {
396                                     expected[j] = NORMALIZE(input_values[j], 255.0f);
397                                 }
398 
399                                 err[j] = fabsf( expected[ j ] - actual[ j ] );
400                             }
401 
402                             if ((err[0] > max_err) ||
403                                 (err[1] > max_err) ||
404                                 (err[2] > max_err) ||
405                                 (err[3] > FLT_EPSILON)) // there is no conversion for alpha
406                             {
407                                 log_error( "       Error:     %g %g %g %g\n", err[0], err[1], err[2], err[3]);
408                                 log_error( "       Input:     %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
409                                 log_error( "       Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
410                                 log_error( "       Actual:   %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
411                                 return 1;
412                             }
413                         }
414                         else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
415                         {
416                             float *expected = (float *)resultBuffer;
417                             float *actual = (float *)resultPtr;
418 
419                             if( !validate_float_write_results( expected, actual, imageInfo ) )
420                             {
421                                 unsigned int *e = (unsigned int *)resultBuffer;
422                                 unsigned int *a = (unsigned int *)resultPtr;
423                                 log_error( "ERROR: Sample %ld (%ld,%ld,%ld) did not validate! (%s)\n", i, x, y, z, mem_flag_names[ mem_flag_index ] );
424                                 log_error( "       Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
425                                 log_error( "       Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
426                                 log_error( "       Actual:   %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
427                                 log_error( "       Actual:   %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
428                                 totalErrors++;
429                                 if( ( --numTries ) == 0 )
430                                     return 1;
431                             }
432                         }
433                         else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
434                         {
435                             cl_half *e = (cl_half *)resultBuffer;
436                             cl_half *a = (cl_half *)resultPtr;
437                             if( !validate_half_write_results( e, a, imageInfo ) )
438                             {
439                                 totalErrors++;
440                                 log_error( "ERROR: Sample %ld (%ld,%ld,%ld) did not validate! (%s)\n", i, x, y, z, mem_flag_names[ mem_flag_index ] );
441                                 unsigned short *e = (unsigned short *)resultBuffer;
442                                 unsigned short *a = (unsigned short *)resultPtr;
443                                 log_error( "    Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
444                                 log_error( "    Actual:   0x%04x 0x%04x 0x%04x 0x%04x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
445                                 if( inputType == kFloat )
446                                 {
447                                     float *p = (float *)imagePtr;
448                                     log_error( "    Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
449                                     log_error( "          : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
450                                 }
451                                 if( ( --numTries ) == 0 )
452                                     return 1;
453                             }
454                         }
455                         else
456                         {
457 
458                             filter_undefined_bits(imageInfo, resultPtr);
459 
460                             // Exact result passes every time
461                             if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
462                             {
463                                 // result is inexact.  Calculate error
464                                 int failure = 1;
465                                 float errors[4] = {NAN, NAN, NAN, NAN};
466                                 pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
467 
468                                 failure = filter_rounding_errors(
469                                     forceCorrectlyRoundedWrites, imageInfo,
470                                     errors);
471 
472                                 if( failure )
473                                 {
474                                     totalErrors++;
475                                     // Is it our special rounding test?
476                                     if( verifyRounding && i >= 1 && i <= 2 )
477                                     {
478                                         // Try to guess what the rounding mode of the device really is based on what it returned
479                                         const char *deviceRounding = "unknown";
480                                         unsigned int deviceResults[8];
481                                         read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod );
482                                         read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
483 
484                                         if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
485                                            deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
486                                             deviceRounding = "truncate";
487                                         else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
488                                                 deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
489                                             deviceRounding = "round to nearest";
490                                         else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
491                                                 deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
492                                             deviceRounding = "round to even";
493 
494                                         log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
495                                         log_error( "       Actual values rounded by device: %d %d %d %d %d %d %d %d\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
496                                                   deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
497                                         log_error( "       Rounding mode of device appears to be %s\n", deviceRounding );
498                                         return 1;
499                                     }
500                                     log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
501                                     switch(imageInfo->format->image_channel_data_type)
502                                     {
503                                         case CL_UNORM_INT8:
504                                         case CL_SNORM_INT8:
505                                         case CL_UNSIGNED_INT8:
506                                         case CL_SIGNED_INT8:
507                                             log_error( "    Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
508                                             log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
509                                             log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
510                                             break;
511                                         case CL_UNORM_SHORT_565: {
512                                             cl_uint *ref_value =
513                                                 (cl_uint *)resultBuffer;
514                                             cl_uint *test_value =
515                                                 (cl_uint *)resultPtr;
516 
517                                             log_error(" Expected: 0x%2.2x "
518                                                       "Actual: 0x%2.2x \n",
519                                                       ref_value[0],
520                                                       test_value[0]);
521 
522                                             log_error(
523                                                 "    Expected: 0x%2.2x 0x%2.2x "
524                                                 "0x%2.2x \n",
525                                                 ref_value[0] & 0x1F,
526                                                 (ref_value[0] >> 5) & 0x3F,
527                                                 (ref_value[0] >> 11) & 0x1F);
528                                             log_error(
529                                                 "    Actual:   0x%2.2x 0x%2.2x "
530                                                 "0x%2.2x \n",
531                                                 test_value[0] & 0x1F,
532                                                 (test_value[0] >> 5) & 0x3F,
533                                                 (test_value[0] >> 11) & 0x1F);
534                                             log_error(
535                                                 "    Error:    %f %f %f %f\n",
536                                                 errors[0], errors[1],
537                                                 errors[2]);
538                                             break;
539                                         }
540                                         case CL_UNORM_SHORT_555: {
541                                             cl_uint *ref_value =
542                                                 (cl_uint *)resultBuffer;
543                                             cl_uint *test_value =
544                                                 (cl_uint *)resultPtr;
545 
546                                             log_error(" Expected: 0x%2.2x "
547                                                       "Actual: 0x%2.2x \n",
548                                                       ref_value[0],
549                                                       test_value[0]);
550 
551                                             log_error(
552                                                 "    Expected: 0x%2.2x 0x%2.2x "
553                                                 "0x%2.2x \n",
554                                                 ref_value[0] & 0x1F,
555                                                 (ref_value[0] >> 5) & 0x1F,
556                                                 (ref_value[0] >> 10) & 0x1F);
557                                             log_error(
558                                                 "    Actual:   0x%2.2x 0x%2.2x "
559                                                 "0x%2.2x \n",
560                                                 test_value[0] & 0x1F,
561                                                 (test_value[0] >> 5) & 0x1F,
562                                                 (test_value[0] >> 10) & 0x1F);
563                                             log_error(
564                                                 "    Error:    %f %f %f %f\n",
565                                                 errors[0], errors[1],
566                                                 errors[2]);
567                                             break;
568                                         }
569                                         case CL_UNORM_INT16:
570                                         case CL_SNORM_INT16:
571                                         case CL_UNSIGNED_INT16:
572                                         case CL_SIGNED_INT16:
573 #ifdef CL_SFIXED14_APPLE
574                                         case CL_SFIXED14_APPLE:
575 #endif
576                                             log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
577                                             log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
578                                             log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
579                                             break;
580                                         case CL_HALF_FLOAT:
581                                             log_error(
582                                                 "    Expected: 0x%4.4x 0x%4.4x "
583                                                 "0x%4.4x 0x%4.4x\n",
584                                                 ((cl_half *)resultBuffer)[0],
585                                                 ((cl_half *)resultBuffer)[1],
586                                                 ((cl_half *)resultBuffer)[2],
587                                                 ((cl_half *)resultBuffer)[3]);
588                                             log_error(
589                                                 "    Actual:   0x%4.4x 0x%4.4x "
590                                                 "0x%4.4x 0x%4.4x\n",
591                                                 ((cl_half *)resultPtr)[0],
592                                                 ((cl_half *)resultPtr)[1],
593                                                 ((cl_half *)resultPtr)[2],
594                                                 ((cl_half *)resultPtr)[3]);
595                                             log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
596                                             break;
597                                         case CL_UNSIGNED_INT32:
598                                         case CL_SIGNED_INT32:
599                                             log_error( "    Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
600                                             log_error( "    Actual:   0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
601                                             break;
602                                         case CL_FLOAT:
603                                             log_error( "    Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
604                                             log_error( "    Actual:   %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
605                                             log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
606                                             break;
607                                     }
608 
609                                     float *v = (float *)(char *)imagePtr;
610                                     log_error( "   src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
611                                     log_error( "      : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
612                                     log_error( "   src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[  1], v[ 2 ], v[ 3 ] );
613 
614                                     if( ( --numTries ) == 0 )
615                                         return 1;
616                                 }
617                             }
618                         }
619                         imagePtr += get_explicit_type_size( inputType ) * 4;
620                         resultPtr += get_pixel_size( imageInfo->format );
621                     }
622                 }
623             }
624             {
625                 nextLevelOffset += width_lod * height_lod * depth_lod * pixelSize;
626                 width_lod = ( width_lod >> 1 ) ? ( width_lod >> 1 ) : 1;
627                 height_lod = ( height_lod >> 1 ) ? ( height_lod >> 1 ) : 1;
628                 depth_lod = ( depth_lod >> 1 ) ? ( depth_lod >> 1 ) : 1;
629             }
630         }
631     }
632     // All done!
633     return totalErrors;
634 }
635 
636 
test_write_image_3D_set(cl_device_id device,cl_context context,cl_command_queue queue,const cl_image_format * format,ExplicitType inputType,MTdata d)637 int test_write_image_3D_set(cl_device_id device, cl_context context,
638                             cl_command_queue queue,
639                             const cl_image_format *format,
640                             ExplicitType inputType, MTdata d)
641 {
642     char programSrc[10240];
643     const char *ptr;
644     const char *readFormat;
645     clProgramWrapper program;
646     clKernelWrapper kernel;
647     const char *KernelSourcePattern = NULL;
648 
649     int error;
650 
651     // Get our operating parameters
652     size_t maxWidth, maxHeight, maxDepth;
653     cl_ulong maxAllocSize, memSize;
654 
655     image_descriptor imageInfo = { 0x0 };
656 
657     imageInfo.format = format;
658     imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
659 
660     error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
661     error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
662     error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
663     error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
664     error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
665     test_error( error, "Unable to get max image 3D size from device" );
666 
667     if (memSize > (cl_ulong)SIZE_MAX) {
668       memSize = (cl_ulong)SIZE_MAX;
669     }
670 
671     // Determine types
672     if( inputType == kInt )
673         readFormat = "i";
674     else if( inputType == kUInt )
675         readFormat = "ui";
676     else // kFloat
677         readFormat = "f";
678 
679     if(gtestTypesToRun & kWriteTests)
680     {
681         KernelSourcePattern = write3DKernelSourcePattern;
682     }
683     else
684     {
685         KernelSourcePattern = readwrite3DKernelSourcePattern;
686     }
687 
688     // Construct the source
689     sprintf(
690         programSrc, KernelSourcePattern, khr3DWritesPragma,
691         gTestMipmaps
692             ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma "
693               "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable"
694             : "",
695         get_explicit_type_name(inputType), gTestMipmaps ? ", int lod" : "",
696         gTestMipmaps ? offset3DLodSource : offset3DSource, readFormat,
697         gTestMipmaps ? ", lod" : "");
698 
699     ptr = programSrc;
700     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
701                                         "sample_kernel");
702     test_error( error, "Unable to create testing kernel" );
703 
704     // Run tests
705     if( gTestSmallImages )
706     {
707         for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
708         {
709             imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
710             for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
711             {
712                 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
713                 for( imageInfo.depth = 2; imageInfo.depth < 7; imageInfo.depth++ )
714                 {
715                     if (gTestMipmaps)
716                         imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
717 
718                     if( gDebugTrace )
719                         log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
720                     int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
721                     if( retCode )
722                         return retCode;
723                 }
724             }
725         }
726     }
727     else if( gTestMaxImages )
728     {
729         // Try a specific set of maximum sizes
730         size_t numbeOfSizes;
731         size_t sizes[100][3];
732 
733         get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, maxDepth, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE3D, imageInfo.format, CL_TRUE);
734 
735         for( size_t idx = 0; idx < numbeOfSizes; idx++ )
736         {
737             imageInfo.width = sizes[ idx ][ 0 ];
738             imageInfo.height = sizes[ idx ][ 1 ];
739             imageInfo.depth = sizes[ idx ][ 2 ];
740             imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
741             imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
742             if (gTestMipmaps)
743                 imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
744             log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth);
745             int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
746             if( retCode )
747                 return retCode;
748         }
749     }
750     else if( gTestRounding )
751     {
752         size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
753         imageInfo.height = typeRange / 256;
754         imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
755         imageInfo.depth = 1;
756 
757         imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
758         imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
759         int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
760         if( retCode )
761             return retCode;
762     }
763     else
764     {
765         for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
766         {
767             cl_ulong size;
768             // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
769             // image, the result array, plus offset arrays, will fit in the global ram space
770             do
771             {
772                 imageInfo.width = reduceImageSizeRange(maxWidth, d );
773                 imageInfo.height = reduceImageSizeRange(maxHeight, d );
774                 imageInfo.depth = reduceImageDepth(maxDepth, d );
775 
776                 if(gTestMipmaps)
777                 {
778                     imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
779                     //Need to take into account the input buffer size, otherwise we will end up with input buffer that is exceeding MaxAlloc
780                     size = 4 * compute_mipmapped_image_size(imageInfo) * get_explicit_type_size( inputType );
781                 }
782                 else
783                 {
784                     imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
785                     imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
786                     if( gEnablePitch )
787                     {
788                         size_t extraWidth = (int)random_log_in_range( 0, 64, d );
789                         imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
790 
791                         imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
792                         extraWidth = (int)random_log_in_range( 0, 64, d );
793                         imageInfo.slicePitch += extraWidth * imageInfo.rowPitch;
794                     }
795 
796                     size = (size_t)imageInfo.slicePitch * (size_t)imageInfo.depth * 4;
797                 }
798             } while(  size > maxAllocSize || ( size * 3 ) > memSize );
799 
800             if( gDebugTrace )
801                 log_info( "   at size %ld,%ld,%ld (pitch %ld, slice %ld) out of %ld,%ld,%ld\n", imageInfo.width, imageInfo.height, imageInfo.depth,
802                          imageInfo.rowPitch, imageInfo.slicePitch, maxWidth, maxHeight, maxDepth );
803 
804             int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
805             if( retCode )
806                 return retCode;
807         }
808     }
809 
810     return 0;
811 }
812