1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "../testBase.h"
17 #include "test_common.h"
18 
19 #if !defined(_WIN32)
20 #include <sys/mman.h>
21 #endif
22 
23 extern cl_mem_flags gMemFlagsToUse;
24 extern int gtestTypesToRun;
25 
26 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
27 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
28 
29 // Utility function to clamp down image sizes for certain tests to avoid
30 // using too much memory.
reduceImageSizeRange(size_t maxDimSize)31 static size_t reduceImageSizeRange(size_t maxDimSize) {
32   size_t DimSize = maxDimSize/32;
33   if (DimSize < (size_t) 16)
34     return 16;
35   else if (DimSize > (size_t) 128)
36     return 128;
37   else
38     return DimSize;
39 }
40 
reduceImageDepth(size_t maxDepth)41 static size_t reduceImageDepth(size_t maxDepth) {
42   size_t Depth = maxDepth/32;
43   if (Depth < (size_t) 8)
44     return 8;
45   else if (Depth > (size_t) 32)
46     return 32;
47   else
48     return Depth;
49 }
50 
51 const char *write2DArrayKernelSourcePattern =
52     "%s\n"
53     "__kernel void sample_kernel( __global %s%s *input, write_only %s output "
54     "%s)\n"
55     "{\n"
56     "   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
57     "get_global_id(2);\n"
58     "%s"
59     "   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset "
60     "]);\n"
61     "}";
62 
63 const char *readwrite2DArrayKernelSourcePattern =
64     "%s\n"
65     "__kernel void sample_kernel( __global %s%s *input, read_write %s output "
66     "%s)\n"
67     "{\n"
68     "   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
69     "get_global_id(2);\n"
70     "%s"
71     "   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset "
72     "] );\n"
73     "}";
74 
75 const char *offset2DArrayKernelSource =
76 "   int offset = tidZ*get_image_width(output)*get_image_height(output) + tidY*get_image_width(output) + tidX;\n";
77 
78 const char *offset2DArrayLodKernelSource =
79 "   int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
80 "   int height_lod = ( get_image_height(output) >> lod ) ? ( get_image_height(output) >> lod ) : 1;\n"
81 "   int offset = tidZ*width_lod*height_lod + tidY*width_lod + tidX;\n";
82 
test_write_image_2D_array(cl_device_id device,cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,ExplicitType inputType,MTdata d)83 int test_write_image_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
84                         image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
85 {
86     int                 totalErrors = 0;
87 
88     size_t              num_flags   = 0;
89     const cl_mem_flags  *mem_flag_types = NULL;
90     const char *        *mem_flag_names = NULL;
91     const cl_mem_flags  write_only_mem_flag_types[2] = {  CL_MEM_WRITE_ONLY,   CL_MEM_READ_WRITE };
92     const char *        write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
93     const cl_mem_flags  read_write_mem_flag_types[1] = {  CL_MEM_READ_WRITE};
94     const char *        read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
95     if(gtestTypesToRun & kWriteTests)
96     {
97         mem_flag_types = write_only_mem_flag_types;
98         mem_flag_names = write_only_mem_flag_names;
99         num_flags      = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
100     }
101     else
102     {
103         mem_flag_types = read_write_mem_flag_types;
104         mem_flag_names = read_write_mem_flag_names;
105         num_flags      = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
106     }
107 
108     size_t pixelSize = get_pixel_size( imageInfo->format );
109 
110     for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
111     {
112         int error;
113         size_t threads[3];
114         bool verifyRounding = false;
115         int forceCorrectlyRoundedWrites = 0;
116 
117 #if defined( __APPLE__ )
118         // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
119         if( GetDeviceType(device) == CL_DEVICE_TYPE_CPU )
120             forceCorrectlyRoundedWrites = 1;
121 #endif
122 
123         if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
124             if( DetectFloatToHalfRoundingMode(queue) )
125                 return 1;
126 
127         BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
128 
129         create_random_image_data( inputType, imageInfo, imageValues, d );
130 
131         if(!gTestMipmaps)
132         {
133             if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT )
134             {
135                 /* Pilot data for sRGB images */
136                 if(is_sRGBA_order(imageInfo->format->image_channel_order))
137                 {
138                     // We want to generate ints (mostly) in range of the target format which should be [0,255]
139                     // However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
140                     // it can test some out-of-range data points
141                     const unsigned int test_range_ext = 16;
142                     int formatMin = 0 - test_range_ext;
143                     int formatMax = 255 + test_range_ext;
144                     int pixel_value = 0;
145 
146                     // First, fill with arbitrary floats
147                     for( size_t z = 0; z < imageInfo->arraySize; z++ )
148                     {
149                         for( size_t y = 0; y < imageInfo->height; y++ )
150                         {
151                             float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
152                             for( size_t i = 0; i < imageInfo->width * 4; i++ )
153                             {
154                                 pixel_value = random_in_range( formatMin, (int)formatMax, d );
155                                 inputValues[ i ] = (float)(pixel_value/255.0f);
156                             }
157                         }
158                     }
159 
160                     // Throw a few extra test values in there
161                     float *inputValues = (float *)(char*)imageValues;
162                     size_t i = 0;
163                     // Piloting some debug inputs.
164                     inputValues[ i++ ] = -0.5f;
165                     inputValues[ i++ ] = 0.5f;
166                     inputValues[ i++ ] = 2.f;
167                     inputValues[ i++ ] = 0.5f;
168 
169                     // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
170                     // is correct
171                     if( imageInfo->width > 12 )
172                     {
173                         float formatMax = (float)get_format_max_int( imageInfo->format );
174                         inputValues[ i++ ] = 4.0f / formatMax;
175                         inputValues[ i++ ] = 4.3f / formatMax;
176                         inputValues[ i++ ] = 4.5f / formatMax;
177                         inputValues[ i++ ] = 4.7f / formatMax;
178                         inputValues[ i++ ] = 5.0f / formatMax;
179                         inputValues[ i++ ] = 5.3f / formatMax;
180                         inputValues[ i++ ] = 5.5f / formatMax;
181                         inputValues[ i++ ] = 5.7f / formatMax;
182                     }
183                 }
184                 else
185                 {
186                     // First, fill with arbitrary floats
187                     for( size_t z = 0; z < imageInfo->arraySize; z++ )
188                     {
189                         for( size_t y = 0; y < imageInfo->height; y++ )
190                         {
191                             float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
192                             for( size_t i = 0; i < imageInfo->width * 4; i++ )
193                                 inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
194                         }
195                     }
196 
197                     // Throw a few extra test values in there
198                     float *inputValues = (float *)(char*)imageValues;
199                     size_t i = 0;
200                     inputValues[ i++ ] = -0.0000000000009f;
201                     inputValues[ i++ ] = 1.f;
202                     inputValues[ i++ ] = -1.f;
203                     inputValues[ i++ ] = 2.f;
204 
205                     // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
206                     // is correct
207                     if( imageInfo->width > 12 )
208                     {
209                         float formatMax = (float)get_format_max_int( imageInfo->format );
210                         inputValues[ i++ ] = 4.0f / formatMax;
211                         inputValues[ i++ ] = 4.3f / formatMax;
212                         inputValues[ i++ ] = 4.5f / formatMax;
213                         inputValues[ i++ ] = 4.7f / formatMax;
214                         inputValues[ i++ ] = 5.0f / formatMax;
215                         inputValues[ i++ ] = 5.3f / formatMax;
216                         inputValues[ i++ ] = 5.5f / formatMax;
217                         inputValues[ i++ ] = 5.7f / formatMax;
218                         verifyRounding = true;
219                     }
220                 }
221             }
222             else if( inputType == kUInt )
223             {
224                 unsigned int *inputValues = (unsigned int*)(char*)imageValues;
225                 size_t i = 0;
226                 inputValues[ i++ ] = 0;
227                 inputValues[ i++ ] = 65535;
228                 inputValues[ i++ ] = 7271820;
229                 inputValues[ i++ ] = 0;
230             }
231         }
232 
233         // Construct testing sources
234         clProtectedImage protImage;
235         clMemWrapper unprotImage;
236         cl_mem image;
237 
238         if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
239         {
240             create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
241 
242             unprotImage = create_image_2d_array( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
243                                           imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0,
244                                           maxImageUseHostPtrBackingStore, &error );
245 
246             if( error != CL_SUCCESS )
247             {
248                 log_error( "ERROR: Unable to create 2D image array of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->arraySize, imageInfo->rowPitch, IGetErrorString( error ) );
249                 return error;
250             }
251 
252             image = (cl_mem)unprotImage;
253         }
254         else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
255         {
256             // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
257             // it works just as if no flag is specified, so we just do the same thing either way
258             // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
259             if( gTestMipmaps )
260             {
261                 cl_image_desc image_desc = {0};
262                 image_desc.image_type = imageInfo->type;
263                 image_desc.num_mip_levels = imageInfo->num_mip_levels;
264                 image_desc.image_width = imageInfo->width;
265                 image_desc.image_height = imageInfo->height;
266                 image_desc.image_array_size = imageInfo->arraySize;
267 
268                 unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
269                                              imageInfo->format, &image_desc, NULL, &error);
270                 if( error != CL_SUCCESS )
271                 {
272                     log_error( "ERROR: Unable to create %d level 2D image array of size %ld x %ld x %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->height, imageInfo->arraySize,
273                                IGetErrorString( error ), mem_flag_names[mem_flag_index] );
274                     return error;
275                 }
276             }
277             else
278             {
279                 unprotImage = create_image_2d_array( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
280                                               imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0, imageValues, &error );
281                 if( error != CL_SUCCESS )
282                 {
283                     log_error( "ERROR: Unable to create 2D image array of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->arraySize, imageInfo->rowPitch, IGetErrorString( error ) );
284                     return error;
285                 }
286             }
287             image = unprotImage;
288         }
289 
290         error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
291         test_error( error, "Unable to set kernel arguments" );
292 
293         size_t width_lod = imageInfo->width, height_lod = imageInfo->height, nextLevelOffset = 0;
294         size_t origin[ 4 ] = { 0, 0, 0, 0 };
295         size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->arraySize };
296         size_t resultSize;
297 
298         int num_lod_loops = (gTestMipmaps)? imageInfo->num_mip_levels : 1;
299         for( int lod = 0; lod < num_lod_loops; lod++)
300         {
301             if(gTestMipmaps)
302             {
303                 error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
304             }
305             // Run the kernel
306             threads[0] = (size_t)width_lod;
307             threads[1] = (size_t)height_lod;
308             threads[2] = (size_t)imageInfo->arraySize;
309 
310             clMemWrapper inputStream;
311 
312             char *imagePtrOffset = imageValues + nextLevelOffset;
313             inputStream =
314                 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
315                                get_explicit_type_size(inputType) * 4 * width_lod
316                                    * height_lod * imageInfo->arraySize,
317                                imagePtrOffset, &error);
318             test_error( error, "Unable to create input buffer" );
319 
320             // Set arguments
321             error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
322             test_error( error, "Unable to set kernel arguments" );
323 
324             error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
325             test_error( error, "Unable to run kernel" );
326 
327             // Get results
328             if( gTestMipmaps )
329                 resultSize = width_lod * height_lod *imageInfo->arraySize * pixelSize;
330             else
331                 resultSize = imageInfo->slicePitch *imageInfo->arraySize;
332             clProtectedArray PA(resultSize);
333             char *resultValues = (char *)((void *)PA);
334 
335             if( gDebugTrace )
336                 log_info( "    reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
337 
338             origin[3] = lod;
339             region[0] = width_lod;
340             region[1] = height_lod;
341 
342             error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
343             test_error( error, "Unable to read results from kernel" );
344             if( gDebugTrace )
345                 log_info( "    results read\n" );
346 
347             // Validate results element by element
348             char *imagePtr = imageValues + nextLevelOffset;
349             int numTries = 5;
350             for( size_t z = 0, i = 0; z < imageInfo->arraySize; z++ )
351             {
352                 for( size_t y = 0; y < height_lod; y++ )
353                 {
354                     char *resultPtr;
355                     if( gTestMipmaps )
356                         resultPtr = (char *)resultValues + y * width_lod * pixelSize + z * width_lod * height_lod * pixelSize;
357                     else
358                         resultPtr = (char*)resultValues + y * imageInfo->rowPitch + z * imageInfo->slicePitch;
359                     for( size_t x = 0; x < width_lod; x++, i++ )
360                     {
361                         char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
362 
363                         // Convert this pixel
364                         if( inputType == kFloat )
365                             pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
366                         else if( inputType == kInt )
367                             pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
368                         else // if( inputType == kUInt )
369                             pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
370 
371                         // Compare against the results
372                         if(is_sRGBA_order(imageInfo->format->image_channel_order))
373                         {
374                             // Compare sRGB-mapped values
375                             cl_float expected[4]    = {0};
376                             cl_float* input_values  = (float*)imagePtr;
377                             cl_uchar *actual        = (cl_uchar*)resultPtr;
378                             float max_err           = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
379                             float err[4]            = {0.0f};
380 
381                             for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
382                             {
383                                 if(j < 3)
384                                 {
385                                     expected[j] = sRGBmap(input_values[j]);
386                                 }
387                                 else // there is no sRGB conversion for alpha component if it exists
388                                 {
389                                     expected[j] = NORMALIZE(input_values[j], 255.0f);
390                                 }
391 
392                                 err[j] = fabsf( expected[ j ] - actual[ j ] );
393                             }
394 
395                             if ((err[0] > max_err) ||
396                                 (err[1] > max_err) ||
397                                 (err[2] > max_err) ||
398                                 (err[3] > 0)) // there is no conversion for alpha so the error should be zero
399                             {
400                                 log_error( "       Error:     %g %g %g %g\n", err[0], err[1], err[2], err[3]);
401                                 log_error( "       Input:     %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
402                                 log_error( "       Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
403                                 log_error( "       Actual:   %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
404                                 return 1;
405                             }
406                         }
407                         else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
408                         {
409                             float *expected = (float *)resultBuffer;
410                             float *actual = (float *)resultPtr;
411 
412                             if( !validate_float_write_results( expected, actual, imageInfo ) )
413                             {
414                                 unsigned int *e = (unsigned int *)resultBuffer;
415                                 unsigned int *a = (unsigned int *)resultPtr;
416                                 log_error( "ERROR: Sample %ld (%ld,%ld,%ld) did not validate! (%s)\n", i, x, y, z, mem_flag_names[ mem_flag_index ] );
417                                 log_error( "       Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
418                                 log_error( "       Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
419                                 log_error( "       Actual:   %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
420                                 log_error( "       Actual:   %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
421                                 totalErrors++;
422                                 if( ( --numTries ) == 0 )
423                                     return 1;
424                             }
425                         }
426                         else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
427                         {
428                             cl_half *e = (cl_half *)resultBuffer;
429                             cl_half *a = (cl_half *)resultPtr;
430                             if( !validate_half_write_results( e, a, imageInfo ) )
431                             {
432                                 totalErrors++;
433                                 log_error( "ERROR: Sample %ld (%ld,%ld,%ld) did not validate! (%s)\n", i, x, y, z, mem_flag_names[ mem_flag_index ] );
434                                 unsigned short *e = (unsigned short *)resultBuffer;
435                                 unsigned short *a = (unsigned short *)resultPtr;
436                                 log_error( "    Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
437                                 log_error( "    Actual:   0x%04x 0x%04x 0x%04x 0x%04x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
438                                 if( inputType == kFloat )
439                                 {
440                                     float *p = (float *)imagePtr;
441                                     log_error( "    Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
442                                     log_error( "          : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
443                                 }
444                                 if( ( --numTries ) == 0 )
445                                     return 1;
446                             }
447                         }
448                         else
449                         {
450 
451                             filter_undefined_bits(imageInfo, resultPtr);
452 
453                             // Exact result passes every time
454                             if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
455                             {
456                                 // result is inexact.  Calculate error
457                                 int failure = 1;
458                                 float errors[4] = {NAN, NAN, NAN, NAN};
459                                 pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
460 
461                                 failure = filter_rounding_errors(
462                                     forceCorrectlyRoundedWrites, imageInfo,
463                                     errors);
464 
465                                 if( failure )
466                                 {
467                                     totalErrors++;
468                                     // Is it our special rounding test?
469                                     if( verifyRounding && i >= 1 && i <= 2 )
470                                     {
471                                         // Try to guess what the rounding mode of the device really is based on what it returned
472                                         const char *deviceRounding = "unknown";
473                                         unsigned int deviceResults[8];
474                                         read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod);
475                                         read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
476 
477                                         if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
478                                            deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
479                                             deviceRounding = "truncate";
480                                         else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
481                                                 deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
482                                             deviceRounding = "round to nearest";
483                                         else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
484                                                 deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
485                                             deviceRounding = "round to even";
486 
487                                         log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
488                                         log_error( "       Actual values rounded by device: %d %d %d %d %d %d %d %d\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
489                                                   deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
490                                         log_error( "       Rounding mode of device appears to be %s\n", deviceRounding );
491                                         return 1;
492                                     }
493                                     log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
494                                     switch(imageInfo->format->image_channel_data_type)
495                                     {
496                                         case CL_UNORM_INT8:
497                                         case CL_SNORM_INT8:
498                                         case CL_UNSIGNED_INT8:
499                                         case CL_SIGNED_INT8:
500                                             log_error( "    Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
501                                             log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
502                                             log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
503                                             break;
504                                         case CL_UNORM_SHORT_565: {
505                                             cl_uint *ref_value =
506                                                 (cl_uint *)resultBuffer;
507                                             cl_uint *test_value =
508                                                 (cl_uint *)resultPtr;
509 
510                                             log_error(" Expected: 0x%2.2x "
511                                                       "Actual: 0x%2.2x \n",
512                                                       ref_value[0],
513                                                       test_value[0]);
514 
515                                             log_error(
516                                                 "    Expected: 0x%2.2x 0x%2.2x "
517                                                 "0x%2.2x \n",
518                                                 ref_value[0] & 0x1F,
519                                                 (ref_value[0] >> 5) & 0x3F,
520                                                 (ref_value[0] >> 11) & 0x1F);
521                                             log_error(
522                                                 "    Actual:   0x%2.2x 0x%2.2x "
523                                                 "0x%2.2x \n",
524                                                 test_value[0] & 0x1F,
525                                                 (test_value[0] >> 5) & 0x3F,
526                                                 (test_value[0] >> 11) & 0x1F);
527                                             log_error(
528                                                 "    Error:    %f %f %f %f\n",
529                                                 errors[0], errors[1],
530                                                 errors[2]);
531                                             break;
532                                         }
533                                         case CL_UNORM_SHORT_555: {
534                                             cl_uint *ref_value =
535                                                 (cl_uint *)resultBuffer;
536                                             cl_uint *test_value =
537                                                 (cl_uint *)resultPtr;
538 
539                                             log_error(" Expected: 0x%2.2x "
540                                                       "Actual: 0x%2.2x \n",
541                                                       ref_value[0],
542                                                       test_value[0]);
543 
544                                             log_error(
545                                                 "    Expected: 0x%2.2x 0x%2.2x "
546                                                 "0x%2.2x \n",
547                                                 ref_value[0] & 0x1F,
548                                                 (ref_value[0] >> 5) & 0x1F,
549                                                 (ref_value[0] >> 10) & 0x1F);
550                                             log_error(
551                                                 "    Actual:   0x%2.2x 0x%2.2x "
552                                                 "0x%2.2x \n",
553                                                 test_value[0] & 0x1F,
554                                                 (test_value[0] >> 5) & 0x1F,
555                                                 (test_value[0] >> 10) & 0x1F);
556                                             log_error(
557                                                 "    Error:    %f %f %f %f\n",
558                                                 errors[0], errors[1],
559                                                 errors[2]);
560                                             break;
561                                         }
562                                         case CL_UNORM_INT16:
563                                         case CL_SNORM_INT16:
564                                         case CL_UNSIGNED_INT16:
565                                         case CL_SIGNED_INT16:
566 #ifdef CL_SFIXED14_APPLE
567                                         case CL_SFIXED14_APPLE:
568 #endif
569                                             log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
570                                             log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
571                                             log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
572                                             break;
573                                         case CL_HALF_FLOAT:
574                                             log_error(
575                                                 "    Expected: 0x%4.4x 0x%4.4x "
576                                                 "0x%4.4x 0x%4.4x\n",
577                                                 ((cl_half *)resultBuffer)[0],
578                                                 ((cl_half *)resultBuffer)[1],
579                                                 ((cl_half *)resultBuffer)[2],
580                                                 ((cl_half *)resultBuffer)[3]);
581                                             log_error(
582                                                 "    Actual:   0x%4.4x 0x%4.4x "
583                                                 "0x%4.4x 0x%4.4x\n",
584                                                 ((cl_half *)resultPtr)[0],
585                                                 ((cl_half *)resultPtr)[1],
586                                                 ((cl_half *)resultPtr)[2],
587                                                 ((cl_half *)resultPtr)[3]);
588                                             log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
589                                             break;
590                                         case CL_UNSIGNED_INT32:
591                                         case CL_SIGNED_INT32:
592                                             log_error( "    Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
593                                             log_error( "    Actual:   0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
594                                             break;
595                                         case CL_FLOAT:
596                                             log_error( "    Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
597                                             log_error( "    Actual:   %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
598                                             log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
599                                             break;
600                                     }
601 
602                                     float *v = (float *)(char *)imagePtr;
603                                     log_error( "   src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
604                                     log_error( "      : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
605                                     log_error( "   src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[  1], v[ 2 ], v[ 3 ] );
606 
607                                     if( ( --numTries ) == 0 )
608                                         return 1;
609                                 }
610                             }
611                         }
612                         imagePtr += get_explicit_type_size( inputType ) * (( imageInfo->format->image_channel_order == CL_DEPTH ) ? 1 : 4);
613                         resultPtr += get_pixel_size( imageInfo->format );
614                     }
615                 }
616             }
617             {
618                 nextLevelOffset += width_lod*height_lod*imageInfo->arraySize*pixelSize;
619                 width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
620                 height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
621             }
622         }
623     }
624     // All done!
625     return totalErrors;
626 }
627 
628 
test_write_image_2D_array_set(cl_device_id device,cl_context context,cl_command_queue queue,const cl_image_format * format,ExplicitType inputType,MTdata d)629 int test_write_image_2D_array_set(cl_device_id device, cl_context context,
630                                   cl_command_queue queue,
631                                   const cl_image_format *format,
632                                   ExplicitType inputType, MTdata d)
633 {
634     char programSrc[10240];
635     const char *ptr;
636     const char *readFormat;
637     clProgramWrapper program;
638     clKernelWrapper kernel;
639     const char *KernelSourcePattern = NULL;
640     int error;
641 
642     // Get our operating parameters
643     size_t maxWidth, maxHeight, maxArraySize;
644     cl_ulong maxAllocSize, memSize;
645 
646     image_descriptor imageInfo = { 0x0 };
647 
648     imageInfo.format = format;
649     imageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
650     imageInfo.depth = 1;
651     imageInfo.slicePitch = 0;
652 
653     error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
654     error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
655     error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
656     error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
657     error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
658     test_error( error, "Unable to get max image 3D size from device" );
659 
660     if (memSize > (cl_ulong)SIZE_MAX) {
661       memSize = (cl_ulong)SIZE_MAX;
662     }
663 
664     // Determine types
665     if( inputType == kInt )
666         readFormat = "i";
667     else if( inputType == kUInt )
668         readFormat = "ui";
669     else // kFloat
670         readFormat = "f";
671 
672     if(gtestTypesToRun & kWriteTests)
673     {
674         KernelSourcePattern = write2DArrayKernelSourcePattern;
675     }
676     else
677     {
678         KernelSourcePattern = readwrite2DArrayKernelSourcePattern;
679     }
680     // Construct the source
681     // Construct the source
682     sprintf(
683         programSrc, KernelSourcePattern,
684         gTestMipmaps
685             ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma "
686               "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable"
687             : "",
688         get_explicit_type_name(inputType),
689         (format->image_channel_order == CL_DEPTH) ? "" : "4",
690         (format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t"
691                                                   : "image2d_array_t",
692         gTestMipmaps ? " , int lod" : "",
693         gTestMipmaps ? offset2DArrayLodKernelSource : offset2DArrayKernelSource,
694         readFormat, gTestMipmaps ? ", lod" : "");
695 
696     ptr = programSrc;
697     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
698                                         "sample_kernel");
699     test_error( error, "Unable to create testing kernel" );
700 
701     // Run tests
702     if( gTestSmallImages )
703     {
704         for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
705         {
706             imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
707             for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
708             {
709                 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
710                 for( imageInfo.arraySize = 2; imageInfo.arraySize < 7; imageInfo.arraySize++ )
711                 {
712                     if( gTestMipmaps )
713                         imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
714 
715                     if( gDebugTrace )
716                         log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
717                     int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
718                     if( retCode )
719                         return retCode;
720                 }
721             }
722         }
723     }
724     else if( gTestMaxImages )
725     {
726         // Try a specific set of maximum sizes
727         size_t numbeOfSizes;
728         size_t sizes[100][3];
729 
730         get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D_ARRAY, imageInfo.format, CL_TRUE);
731 
732         for( size_t idx = 0; idx < numbeOfSizes; idx++ )
733         {
734             imageInfo.width = sizes[ idx ][ 0 ];
735             imageInfo.height = sizes[ idx ][ 1 ];
736             imageInfo.arraySize = sizes[ idx ][ 2 ];
737             imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
738             imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
739             if( gTestMipmaps )
740                 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
741             log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize);
742             int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
743             if( retCode )
744                 return retCode;
745         }
746     }
747     else if( gTestRounding )
748     {
749         size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
750         imageInfo.height = typeRange / 256;
751         imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
752         imageInfo.arraySize = 2;
753 
754         imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
755         imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
756         int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
757         if( retCode )
758             return retCode;
759     }
760     else
761     {
762         for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
763         {
764             int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
765             int maxHeighthRange = (int) reduceImageSizeRange(maxHeight);
766             int maxArraySizeRange = (int) reduceImageDepth(maxArraySize);
767 
768             cl_ulong size, buffSize;
769             // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
770             // image, the result array, plus offset arrays, will fit in the global ram space
771             do
772             {
773                 imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, d );
774                 imageInfo.height = (size_t)random_log_in_range( 16, maxHeighthRange, d );
775                 imageInfo.arraySize = (size_t)random_log_in_range( 8, maxArraySizeRange, d );
776 
777                 if(gTestMipmaps)
778                 {
779                     imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, 0) - 1), d);
780                     //Need to take into account the input buffer size, otherwise we will end up with input buffer that is exceeding MaxAlloc
781                     size = 4 * compute_mipmapped_image_size(imageInfo);
782                     buffSize = size * get_explicit_type_size( inputType );
783                 }
784                 else
785                 {
786                     imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
787                     imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
788                     if( gEnablePitch )
789                     {
790                         size_t extraWidth = (int)random_log_in_range( 0, 64, d );
791                         imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
792 
793                         imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
794                         extraWidth = (int)random_log_in_range( 0, 64, d );
795                         imageInfo.slicePitch += extraWidth * imageInfo.rowPitch;
796                     }
797 
798 
799                     // Image size and buffer size may differ due to different pixel size.
800                     // See creation of buffer at line ~153.
801                     size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4;
802                     buffSize = (cl_ulong)imageInfo.width * (cl_ulong)imageInfo.height * imageInfo.arraySize * get_explicit_type_size(inputType) * 4;
803                 }
804             } while(  size > maxAllocSize || buffSize > maxAllocSize || ( size * 3 ) > memSize );
805 
806             if( gDebugTrace )
807                 log_info( "   at size %ld,%ld,%ld (pitch %ld, slice %ld) out of %ld,%ld,%ld\n", imageInfo.width, imageInfo.height, imageInfo.arraySize,
808                          imageInfo.rowPitch, imageInfo.slicePitch, maxWidth, maxHeight, maxArraySize );
809 
810             int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
811             if( retCode )
812                 return retCode;
813         }
814     }
815 
816     return 0;
817 }
818