xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/images/kernel_read_write/test_write_image.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "../testBase.h"
17 #include "test_common.h"
18 
19 #if !defined(_WIN32)
20 #include <sys/mman.h>
21 #endif
22 
23 extern bool gTestImage2DFromBuffer;
24 extern cl_mem_flags gMemFlagsToUse;
25 extern int gtestTypesToRun;
26 
27 extern int test_write_image_1D_set(cl_device_id device, cl_context context,
28                                    cl_command_queue queue,
29                                    const cl_image_format *format,
30                                    ExplicitType inputType, MTdata d);
31 extern int test_write_image_3D_set(cl_device_id device, cl_context context,
32                                    cl_command_queue queue,
33                                    const cl_image_format *format,
34                                    ExplicitType inputType, MTdata d);
35 extern int test_write_image_1D_array_set(cl_device_id device,
36                                          cl_context context,
37                                          cl_command_queue queue,
38                                          const cl_image_format *format,
39                                          ExplicitType inputType, MTdata d);
40 extern int test_write_image_2D_array_set(cl_device_id device,
41                                          cl_context context,
42                                          cl_command_queue queue,
43                                          const cl_image_format *format,
44                                          ExplicitType inputType, MTdata d);
45 
46 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
47 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
48 
49 const char *writeKernelSourcePattern =
50     "%s\n"
51     "__kernel void sample_kernel( __global %s%s *input, write_only %s output "
52     "%s)\n"
53     "{\n"
54     "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
55     "%s"
56     "   write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
57     "}";
58 
59 const char *read_writeKernelSourcePattern =
60     "%s\n"
61     "__kernel void sample_kernel( __global %s%s *input, read_write %s output "
62     "%s)\n"
63     "{\n"
64     "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
65     "%s"
66     "   write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ] );\n"
67     "}";
68 
69 const char *offset2DKernelSource =
70 "   int offset = tidY*get_image_width(output) + tidX;\n";
71 
72 const char *offset2DLodKernelSource =
73 "   int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
74 "   int offset = tidY * width_lod + tidX;\n";
75 
test_write_image(cl_device_id device,cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,ExplicitType inputType,MTdata d)76 int test_write_image( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
77                      image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
78 {
79     int                 totalErrors = 0;
80     size_t              num_flags   = 0;
81     const cl_mem_flags  *mem_flag_types = NULL;
82     const char *        *mem_flag_names = NULL;
83     const cl_mem_flags  write_only_mem_flag_types[2] = {  CL_MEM_WRITE_ONLY,   CL_MEM_READ_WRITE };
84     const char *        write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
85     const cl_mem_flags  read_write_mem_flag_types[1] = {  CL_MEM_READ_WRITE};
86     const char *        read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
87 
88     if(gtestTypesToRun & kWriteTests)
89     {
90         mem_flag_types = write_only_mem_flag_types;
91         mem_flag_names = write_only_mem_flag_names;
92         num_flags      = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
93     }
94     else
95     {
96         mem_flag_types = read_write_mem_flag_types;
97         mem_flag_names = read_write_mem_flag_names;
98         num_flags      = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
99     }
100 
101     size_t  pixelSize       = get_pixel_size( imageInfo->format );
102     int     channel_scale   = (imageInfo->format->image_channel_order == CL_DEPTH) ? 1 : 4;
103 
104     for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
105     {
106         int error;
107         size_t threads[2];
108         bool verifyRounding = false;
109         int forceCorrectlyRoundedWrites = 0;
110 
111 #if defined( __APPLE__ )
112         // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
113         if( GetDeviceType(device) == CL_DEVICE_TYPE_CPU )
114             forceCorrectlyRoundedWrites = 1;
115 #endif
116 
117         if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
118             if( DetectFloatToHalfRoundingMode(queue) )
119                 return 1;
120 
121         BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues, imageBufferValues;
122 
123         create_random_image_data( inputType, imageInfo, imageValues, d, gTestImage2DFromBuffer );
124 
125         if(!gTestMipmaps)
126         {
127             if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT && imageInfo->format->image_channel_data_type != CL_HALF_FLOAT )
128             {
129                 /* Pilot data for sRGB images */
130                 if(is_sRGBA_order(imageInfo->format->image_channel_order))
131                 {
132                     // We want to generate ints (mostly) in range of the target format which should be [0,255]
133                     // However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
134                     // it can test some out-of-range data points
135                     const unsigned int test_range_ext = 16;
136                     int formatMin = 0 - test_range_ext;
137                     int formatMax = 255 + test_range_ext;
138                     int pixel_value = 0;
139 
140                     // First, fill with arbitrary floats
141                     for( size_t y = 0; y < imageInfo->height; y++ )
142                     {
143                         float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4;
144                         for( size_t i = 0; i < imageInfo->width * 4; i++ )
145                         {
146                             pixel_value = random_in_range( formatMin, (int)formatMax, d );
147                             inputValues[ i ] = (float)(pixel_value/255.0f);
148                         }
149                     }
150 
151                     // Throw a few extra test values in there
152                     float *inputValues = (float *)(char*)imageValues;
153                     size_t i = 0;
154 
155                     // Piloting some debug inputs.
156                     inputValues[ i++ ] = -0.5f;
157                     inputValues[ i++ ] = 0.5f;
158                     inputValues[ i++ ] = 2.0f;
159                     inputValues[ i++ ] = 0.5f;
160 
161                     // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
162                     // is correct
163                     if( imageInfo->width > 12 )
164                     {
165                         float formatMax = (float)get_format_max_int( imageInfo->format );
166                         inputValues[ i++ ] = 4.0f / formatMax;
167                         inputValues[ i++ ] = 4.3f / formatMax;
168                         inputValues[ i++ ] = 4.5f / formatMax;
169                         inputValues[ i++ ] = 4.7f / formatMax;
170                         inputValues[ i++ ] = 5.0f / formatMax;
171                         inputValues[ i++ ] = 5.3f / formatMax;
172                         inputValues[ i++ ] = 5.5f / formatMax;
173                         inputValues[ i++ ] = 5.7f / formatMax;
174                     }
175                 }
176                 else
177                 {
178                     // First, fill with arbitrary floats
179                     for( size_t y = 0; y < imageInfo->height; y++ )
180                     {
181                         float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * channel_scale;
182                         for( size_t i = 0; i < imageInfo->width * channel_scale; i++ )
183                             inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
184                     }
185 
186                     // Throw a few extra test values in there
187                     float *inputValues = (float *)(char*)imageValues;
188                     size_t i = 0;
189                     inputValues[ i++ ] = -0.0000000000009f;
190                     inputValues[ i++ ] = 1.f;
191                     inputValues[ i++ ] = -1.f;
192                     inputValues[ i++ ] = 2.f;
193 
194                     // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
195                     // is correct
196                     if( imageInfo->width > 12 )
197                     {
198                         float formatMax = (float)get_format_max_int( imageInfo->format );
199                         inputValues[ i++ ] = 4.0f / formatMax;
200                         inputValues[ i++ ] = 4.3f / formatMax;
201                         inputValues[ i++ ] = 4.5f / formatMax;
202                         inputValues[ i++ ] = 4.7f / formatMax;
203                         inputValues[ i++ ] = 5.0f / formatMax;
204                         inputValues[ i++ ] = 5.3f / formatMax;
205                         inputValues[ i++ ] = 5.5f / formatMax;
206                         inputValues[ i++ ] = 5.7f / formatMax;
207                         verifyRounding = true;
208                     }
209                 }
210             }
211             else if( inputType == kUInt )
212             {
213                 unsigned int *inputValues = (unsigned int*)(char*)imageValues;
214                 size_t i = 0;
215                 inputValues[ i++ ] = 0;
216                 inputValues[ i++ ] = 65535;
217                 inputValues[ i++ ] = 7271820;
218                 inputValues[ i++ ] = 0;
219             }
220         }
221 
222         // Construct testing sources
223         clProtectedImage protImage;
224         clMemWrapper unprotImage;
225         cl_mem image;
226         cl_mem imageBuffer;
227 
228         if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
229         {
230             if (gTestImage2DFromBuffer)
231             {
232                 imageBuffer = clCreateBuffer( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR,
233                                              imageInfo->rowPitch * imageInfo->height, maxImageUseHostPtrBackingStore, &error);
234                 test_error( error, "Unable to create buffer" );
235                 unprotImage = create_image_2d_buffer( context, mem_flag_types[mem_flag_index], imageInfo->format,
236                                                      imageInfo->width, imageInfo->height, imageInfo->rowPitch,
237                                                      imageBuffer, &error );
238 
239             }
240             else
241             {
242                 // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
243                 // Do not use protected images for max image size test since it rounds the row size to a page size
244                 if (gTestMaxImages) {
245                     create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
246 
247                     unprotImage = create_image_2d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
248                                               imageInfo->width, imageInfo->height, 0,
249                                               maxImageUseHostPtrBackingStore, &error );
250                 } else {
251                     error = protImage.Create( context, mem_flag_types[mem_flag_index], imageInfo->format, imageInfo->width, imageInfo->height );
252                 }
253             }
254             if( error != CL_SUCCESS )
255             {
256                 if (gTestImage2DFromBuffer) {
257                     clReleaseMemObject(imageBuffer);
258                     if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
259                         log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
260                         return 0;
261                     }
262                 }
263 
264                 log_error( "ERROR: Unable to create 2D image of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->height,
265                           imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
266                 return error;
267             }
268 
269             if (gTestMaxImages || gTestImage2DFromBuffer)
270                 image = (cl_mem)unprotImage;
271             else
272                 image = (cl_mem)protImage;
273         }
274         else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
275         {
276             if( gTestMipmaps )
277             {
278                 cl_image_desc image_desc = {0};
279                 image_desc.image_type = imageInfo->type;
280                 image_desc.num_mip_levels = imageInfo->num_mip_levels;
281                 image_desc.image_width = imageInfo->width;
282                 image_desc.image_height = imageInfo->height;
283 
284                 unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
285                                              imageInfo->format, &image_desc, NULL, &error);
286                 if( error != CL_SUCCESS )
287                 {
288                     log_error( "ERROR: Unable to create %d level 2D image of size %ld x %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->height,
289                                IGetErrorString( error ), mem_flag_names[mem_flag_index] );
290                     return error;
291                 }
292             }
293             else if (gTestImage2DFromBuffer)
294             {
295                 generate_random_image_data( imageInfo, imageBufferValues, d );
296                 imageBuffer = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR,
297                                              imageInfo->rowPitch * imageInfo->height, imageBufferValues, &error);
298                 test_error( error, "Unable to create buffer" );
299                 unprotImage = create_image_2d_buffer( context, mem_flag_types[mem_flag_index], imageInfo->format,
300                                                      imageInfo->width, imageInfo->height, imageInfo->rowPitch,
301                                                      imageBuffer, &error );
302 
303             }
304             else
305             {
306                 // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
307                 // it works just as if no flag is specified, so we just do the same thing either way
308                 // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
309                 unprotImage = create_image_2d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
310                                           imageInfo->width, imageInfo->height, 0,
311                                           imageValues, &error );
312             }
313             if( error != CL_SUCCESS )
314             {
315                 if (gTestImage2DFromBuffer) {
316                     clReleaseMemObject(imageBuffer);
317                     if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
318                         log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
319                         return 0;
320                     }
321                 }
322 
323                 log_error( "ERROR: Unable to create 2D image of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->height,
324                           imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
325                 return error;
326             }
327             image = unprotImage;
328         }
329 
330         error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
331         test_error( error, "Unable to set kernel arguments" );
332 
333         size_t width_lod = imageInfo->width, height_lod = imageInfo->height, nextLevelOffset = 0;
334         size_t origin[ 3 ] = { 0, 0, 0 };
335         size_t region[ 3 ] = { imageInfo->width, imageInfo->height, 1 };
336         size_t resultSize;
337 
338         int num_lod_loops = (gTestMipmaps)? imageInfo->num_mip_levels : 1;
339         for( int lod = 0; lod < num_lod_loops; lod++)
340         {
341             if(gTestMipmaps)
342             {
343                 error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
344             }
345             // Run the kernel
346             threads[0] = (size_t)width_lod;
347             threads[1] = (size_t)height_lod;
348 
349             clMemWrapper inputStream;
350 
351             char *imagePtrOffset = imageValues + nextLevelOffset;
352 
353             inputStream =
354                 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
355                                get_explicit_type_size(inputType) * channel_scale
356                                    * width_lod * height_lod,
357                                imagePtrOffset, &error);
358             test_error( error, "Unable to create input buffer" );
359 
360             // Set arguments
361             error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
362             test_error( error, "Unable to set kernel arguments" );
363 
364             error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
365             test_error( error, "Unable to run kernel" );
366 
367             // Get results
368             if( gTestMipmaps )
369                 resultSize = width_lod * height_lod * get_pixel_size(imageInfo->format);
370             else
371                 resultSize = imageInfo->rowPitch * imageInfo->height;
372             clProtectedArray PA(resultSize);
373             char *resultValues = (char *)((void *)PA);
374 
375             if( gDebugTrace )
376                 log_info( "    reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
377 
378             origin[2] = lod;
379             region[0] = width_lod;
380             region[1] = height_lod;
381             error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, 0, resultValues, 0, NULL, NULL );
382             test_error( error, "Unable to read results from kernel" );
383             if( gDebugTrace )
384                 log_info( "    results read\n" );
385 
386             // Validate results element by element
387             char *imagePtr = (char*)imageValues + nextLevelOffset;
388             int numTries = 5;
389             for( size_t y = 0, i = 0; y < height_lod; y++ )
390             {
391                 char *resultPtr;
392                 if( gTestMipmaps )
393                     resultPtr = (char *)resultValues + y * width_lod * pixelSize;
394                 else
395                     resultPtr = (char*)resultValues + y * imageInfo->rowPitch;
396                 for( size_t x = 0; x < width_lod; x++, i++ )
397                 {
398                     char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
399 
400                     // Convert this pixel
401                     if( inputType == kFloat )
402                         pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
403                     else if( inputType == kInt )
404                         pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
405                     else // if( inputType == kUInt )
406                         pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
407 
408                     // Compare against the results
409                     if(is_sRGBA_order(imageInfo->format->image_channel_order))
410                     {
411                         // Compare sRGB-mapped values
412                         cl_float expected[4]    = {0};
413                         cl_float* input_values  = (float*)imagePtr;
414                         cl_uchar *actual        = (cl_uchar*)resultPtr;
415                         float max_err           = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
416                         float err[4]            = {0.0f};
417 
418                         for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
419                         {
420                             if(j < 3)
421                             {
422                                 expected[j] = sRGBmap(input_values[j]);
423                             }
424                             else // there is no sRGB conversion for alpha component if it exists
425                             {
426                                 expected[j] = NORMALIZE(input_values[j], 255.0f);
427                             }
428 
429                             err[j] = fabsf( expected[ j ] - actual[ j ] );
430                         }
431 
432                         if ((err[0] > max_err) ||
433                             (err[1] > max_err) ||
434                             (err[2] > max_err) ||
435                             (err[3] > 0)) // there is no conversion for alpha so the error should be zero
436                         {
437                             log_error( "       Error:     %g %g %g %g\n", err[0], err[1], err[2], err[3]);
438                             log_error( "       Input:     %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
439                             log_error( "       Expected:  %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
440                             log_error( "       Actual:    %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
441                             return 1;
442                         }
443                     }
444                     else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
445                     {
446                         float *expected = (float *)resultBuffer;
447                         float *actual = (float *)resultPtr;
448 
449                         if( !validate_float_write_results( expected, actual, imageInfo ) )
450                         {
451                             unsigned int *e = (unsigned int *)resultBuffer;
452                             unsigned int *a = (unsigned int *)resultPtr;
453                             log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[ mem_flag_index ] );
454                             log_error( "       Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
455                             log_error( "       Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
456                             log_error( "       Actual:   %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
457                             log_error( "       Actual:   %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
458                             totalErrors++;
459                             if( ( --numTries ) == 0 )
460                                 return 1;
461                         }
462                     }
463                     else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
464                     {
465                         cl_half *e = (cl_half *)resultBuffer;
466                         cl_half *a = (cl_half *)resultPtr;
467                         if( !validate_half_write_results( e, a, imageInfo ) )
468                         {
469                             totalErrors++;
470                             log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[ mem_flag_index ] );
471                             log_error( "    Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
472                             log_error( "    Actual:   0x%04x 0x%04x 0x%04x 0x%04x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
473                             if( inputType == kFloat )
474                             {
475                                 float *p = (float *)(char *)imagePtr;
476                                 log_error( "    Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
477                                 log_error( "          : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
478                             }
479                             if( ( --numTries ) == 0 )
480                                 return 1;
481                         }
482                     }
483                     else
484                     {
485 
486                         filter_undefined_bits(imageInfo, resultPtr);
487 
488                         // Exact result passes every time
489                         if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
490                         {
491                             // result is inexact.  Calculate error
492                             int failure = 1;
493                             float errors[4] = {NAN, NAN, NAN, NAN};
494                             pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
495 
496                             failure = filter_rounding_errors(
497                                 forceCorrectlyRoundedWrites, imageInfo, errors);
498 
499                             if( failure )
500                             {
501                                 totalErrors++;
502                                 // Is it our special rounding test?
503                                 if( verifyRounding && i >= 1 && i <= 2 )
504                                 {
505                                     // Try to guess what the rounding mode of the device really is based on what it returned
506                                     const char *deviceRounding = "unknown";
507                                     unsigned int deviceResults[8];
508                                     read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod );
509                                     read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
510 
511                                     if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
512                                        deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
513                                         deviceRounding = "truncate";
514                                     else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
515                                             deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
516                                         deviceRounding = "round to nearest";
517                                     else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
518                                             deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
519                                         deviceRounding = "round to even";
520 
521                                     log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
522                                     log_error( "       Actual values rounded by device: %x %x %x %x %x %x %x %x\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
523                                               deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
524                                     log_error( "       Rounding mode of device appears to be %s\n", deviceRounding );
525                                     return 1;
526                                 }
527                                 log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
528                                 switch(imageInfo->format->image_channel_data_type)
529                                 {
530                                     case CL_UNORM_INT8:
531                                     case CL_SNORM_INT8:
532                                     case CL_UNSIGNED_INT8:
533                                     case CL_SIGNED_INT8:
534                                     case CL_UNORM_INT_101010:
535                                         log_error( "    Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
536                                         log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
537                                         log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
538                                         break;
539                                     case CL_UNORM_INT16:
540                                     case CL_SNORM_INT16:
541                                     case CL_UNSIGNED_INT16:
542                                     case CL_SIGNED_INT16:
543 #ifdef CL_SFIXED14_APPLE
544                                     case CL_SFIXED14_APPLE:
545 #endif
546                                         log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
547                                         log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
548                                         log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
549                                         break;
550                                     case CL_HALF_FLOAT:
551                                         log_error("    Expected: 0x%4.4x "
552                                                   "0x%4.4x 0x%4.4x 0x%4.4x\n",
553                                                   ((cl_half *)resultBuffer)[0],
554                                                   ((cl_half *)resultBuffer)[1],
555                                                   ((cl_half *)resultBuffer)[2],
556                                                   ((cl_half *)resultBuffer)[3]);
557                                         log_error("    Actual:   0x%4.4x "
558                                                   "0x%4.4x 0x%4.4x 0x%4.4x\n",
559                                                   ((cl_half *)resultPtr)[0],
560                                                   ((cl_half *)resultPtr)[1],
561                                                   ((cl_half *)resultPtr)[2],
562                                                   ((cl_half *)resultPtr)[3]);
563                                         log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
564                                         break;
565                                     case CL_UNSIGNED_INT32:
566                                     case CL_SIGNED_INT32:
567                                         log_error( "    Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
568                                         log_error( "    Actual:   0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
569                                         break;
570                                     case CL_FLOAT:
571                                         log_error( "    Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
572                                         log_error( "    Actual:   %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
573                                         log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
574                                         break;
575                                     case CL_UNORM_SHORT_565: {
576                                         cl_uint *ref_value =
577                                             (cl_uint *)resultBuffer;
578                                         cl_uint *test_value =
579                                             (cl_uint *)resultPtr;
580 
581                                         log_error(" Expected: 0x%2.2x Actual: "
582                                                   "0x%2.2x \n",
583                                                   ref_value[0], test_value[0]);
584 
585                                         log_error("    Expected: 0x%2.2x "
586                                                   "0x%2.2x 0x%2.2x \n",
587                                                   ref_value[0] & 0x1F,
588                                                   (ref_value[0] >> 5) & 0x3F,
589                                                   (ref_value[0] >> 11) & 0x1F);
590                                         log_error("    Actual:   0x%2.2x "
591                                                   "0x%2.2x 0x%2.2x \n",
592                                                   test_value[0] & 0x1F,
593                                                   (test_value[0] >> 5) & 0x3F,
594                                                   (test_value[0] >> 11) & 0x1F);
595                                         log_error("    Error:    %f %f %f %f\n",
596                                                   errors[0], errors[1],
597                                                   errors[2]);
598                                         break;
599                                     }
600 
601                                     case CL_UNORM_SHORT_555: {
602                                         cl_uint *ref_value =
603                                             (cl_uint *)resultBuffer;
604                                         cl_uint *test_value =
605                                             (cl_uint *)resultPtr;
606 
607                                         log_error(" Expected: 0x%2.2x Actual: "
608                                                   "0x%2.2x \n",
609                                                   ref_value[0], test_value[0]);
610 
611                                         log_error("    Expected: 0x%2.2x "
612                                                   "0x%2.2x 0x%2.2x \n",
613                                                   ref_value[0] & 0x1F,
614                                                   (ref_value[0] >> 5) & 0x1F,
615                                                   (ref_value[0] >> 10) & 0x1F);
616                                         log_error("    Actual:   0x%2.2x "
617                                                   "0x%2.2x 0x%2.2x \n",
618                                                   test_value[0] & 0x1F,
619                                                   (test_value[0] >> 5) & 0x1F,
620                                                   (test_value[0] >> 10) & 0x1F);
621                                         log_error("    Error:    %f %f %f %f\n",
622                                                   errors[0], errors[1],
623                                                   errors[2]);
624                                         break;
625                                     }
626                                 }
627 
628                                 float *v = (float *)(char *)imagePtr;
629                                 log_error( "   src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
630                                 log_error( "      : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
631                                 log_error( "   src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[  1], v[ 2 ], v[ 3 ] );
632 
633                                 if( ( --numTries ) == 0 )
634                                     return 1;
635                             }
636                         }
637                     }
638                     imagePtr += get_explicit_type_size( inputType ) * channel_scale;
639                     resultPtr += get_pixel_size( imageInfo->format );
640                 }
641             }
642             {
643                 nextLevelOffset += width_lod * height_lod * get_pixel_size( imageInfo->format);
644                 width_lod = (width_lod >> 1) ?(width_lod >> 1) : 1;
645                 height_lod = (height_lod >> 1) ?(height_lod >> 1) : 1;
646             }
647         }
648 
649         if (gTestImage2DFromBuffer) clReleaseMemObject(imageBuffer);
650     }
651 
652 
653     // All done!
654     return totalErrors;
655 }
656 
657 
test_write_image_set(cl_device_id device,cl_context context,cl_command_queue queue,const cl_image_format * format,ExplicitType inputType,MTdata d)658 int test_write_image_set(cl_device_id device, cl_context context,
659                          cl_command_queue queue, const cl_image_format *format,
660                          ExplicitType inputType, MTdata d)
661 {
662     char programSrc[10240];
663     const char *ptr;
664     const char *readFormat;
665     clProgramWrapper program;
666     clKernelWrapper kernel;
667     const char *KernelSourcePattern = NULL;
668     int error;
669 
670     if (gTestImage2DFromBuffer)
671     {
672       if (format->image_channel_order == CL_RGB || format->image_channel_order == CL_RGBx)
673       {
674         switch (format->image_channel_data_type)
675         {
676           case CL_UNORM_INT8:
677           case CL_UNORM_INT16:
678           case CL_SNORM_INT8:
679           case CL_SNORM_INT16:
680           case CL_HALF_FLOAT:
681           case CL_FLOAT:
682           case CL_SIGNED_INT8:
683           case CL_SIGNED_INT16:
684           case CL_SIGNED_INT32:
685           case CL_UNSIGNED_INT8:
686           case CL_UNSIGNED_INT16:
687           case CL_UNSIGNED_INT32:
688             log_info( "Skipping image format: %s %s\n", GetChannelOrderName( format->image_channel_order ),
689                      GetChannelTypeName( format->image_channel_data_type ));
690             return 0;
691           default:
692             break;
693         }
694       }
695     }
696 
697     // Get our operating parameters
698     size_t maxWidth, maxHeight;
699     cl_ulong maxAllocSize, memSize;
700 
701     image_descriptor imageInfo = { 0x0 };
702 
703     imageInfo.format = format;
704     imageInfo.slicePitch = imageInfo.arraySize = imageInfo.depth = 0;
705     imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
706 
707     error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
708     error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
709     error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
710     error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
711     test_error( error, "Unable to get max image 2D size from device" );
712 
713     if (memSize > (cl_ulong)SIZE_MAX) {
714       memSize = (cl_ulong)SIZE_MAX;
715     }
716 
717     // Determine types
718     if( inputType == kInt )
719         readFormat = "i";
720     else if( inputType == kUInt )
721         readFormat = "ui";
722     else // kFloat
723         readFormat = "f";
724 
725     if(gtestTypesToRun & kWriteTests)
726     {
727         KernelSourcePattern = writeKernelSourcePattern;
728     }
729     else
730     {
731         KernelSourcePattern = read_writeKernelSourcePattern;
732     }
733 
734     // Construct the source
735     sprintf(
736         programSrc, KernelSourcePattern,
737         gTestMipmaps
738             ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma "
739               "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable"
740             : "",
741         get_explicit_type_name(inputType),
742         (format->image_channel_order == CL_DEPTH) ? "" : "4",
743         (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t"
744                                                   : "image2d_t",
745         gTestMipmaps ? ", int lod" : "",
746         gTestMipmaps ? offset2DLodKernelSource : offset2DKernelSource,
747         readFormat, gTestMipmaps ? ", lod" : "");
748 
749     ptr = programSrc;
750     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
751                                         "sample_kernel");
752     test_error( error, "Unable to create testing kernel" );
753 
754     // Run tests
755     if( gTestSmallImages )
756     {
757         for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
758         {
759             imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
760             for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
761             {
762                 if( gTestMipmaps )
763                     imageInfo.num_mip_levels = (size_t) random_in_range(1, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
764 
765                 if( gDebugTrace )
766                     log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
767                 int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
768                 if( retCode )
769                     return retCode;
770             }
771         }
772     }
773     else if( gTestMaxImages )
774     {
775         // Try a specific set of maximum sizes
776         size_t numbeOfSizes;
777         size_t sizes[100][3];
778 
779         get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format, CL_TRUE);
780 
781         for( size_t idx = 0; idx < numbeOfSizes; idx++ )
782         {
783             imageInfo.width = sizes[ idx ][ 0 ];
784             imageInfo.height = sizes[ idx ][ 1 ];
785             imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
786             if( gTestMipmaps )
787                 imageInfo.num_mip_levels = (size_t) random_in_range(1, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
788             log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.height);
789             int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
790             if( retCode )
791                 return retCode;
792         }
793     }
794     else if( gTestRounding )
795     {
796         size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
797         imageInfo.height = typeRange / 256;
798         imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
799 
800         imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
801         int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
802         if( retCode )
803             return retCode;
804     }
805     else
806     {
807 
808         cl_uint imagePitchAlign = 0;
809         if (gTestImage2DFromBuffer)
810         {
811 #if defined(CL_DEVICE_IMAGE_PITCH_ALIGNMENT)
812             error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof( cl_uint ), &imagePitchAlign, NULL );
813             if (!imagePitchAlign)
814               imagePitchAlign = 1;
815 #endif
816             test_error( error, "Unable to get CL_DEVICE_IMAGE_PITCH_ALIGNMENT from device" );
817         }
818 
819         for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
820         {
821             cl_ulong size;
822             // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
823             // image, the result array, plus offset arrays, will fit in the global ram space
824             do
825             {
826                 imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
827                 imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, d );
828 
829                 if(gTestMipmaps)
830                 {
831                     imageInfo.num_mip_levels = (size_t) random_in_range(1, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0) - 1, d);
832                     size = 4 * compute_mipmapped_image_size(imageInfo);
833                 }
834                 else
835                 {
836                     imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
837                     if( gEnablePitch )
838                     {
839                         size_t extraWidth = (int)random_log_in_range( 0, 64, d );
840                         imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
841                     }
842 
843                     // if we are creating a 2D image from a buffer, make sure that the rowpitch is aligned to CL_DEVICE_IMAGE_PITCH_ALIGNMENT_APPLE
844                     if (gTestImage2DFromBuffer)
845                     {
846                         size_t pitch = imagePitchAlign * get_pixel_size( imageInfo.format );
847                         imageInfo.rowPitch = ((imageInfo.rowPitch + pitch - 1) / pitch ) * pitch;
848                     }
849 
850                     size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
851                 }
852             } while(  size > maxAllocSize || ( size * 3 ) > memSize );
853 
854             if( gDebugTrace )
855                 log_info( "   at size %d,%d (pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
856 
857             int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
858             if( retCode )
859                 return retCode;
860         }
861     }
862 
863     return 0;
864 }
865 
test_write_image_formats(cl_device_id device,cl_context context,cl_command_queue queue,const std::vector<cl_image_format> & formatList,const std::vector<bool> & filterFlags,image_sampler_data * imageSampler,ExplicitType inputType,cl_mem_object_type imageType)866 int test_write_image_formats(cl_device_id device, cl_context context,
867                              cl_command_queue queue,
868                              const std::vector<cl_image_format> &formatList,
869                              const std::vector<bool> &filterFlags,
870                              image_sampler_data *imageSampler,
871                              ExplicitType inputType,
872                              cl_mem_object_type imageType)
873 {
874     if( imageSampler->filter_mode == CL_FILTER_LINEAR )
875         // No need to run for linear filters
876         return 0;
877 
878     int ret = 0;
879 
880     log_info( "write_image (%s input) *****************************\n", get_explicit_type_name( inputType ) );
881 
882 
883     RandomSeed seed( gRandomSeed );
884 
885     for (unsigned int i = 0; i < formatList.size(); i++)
886     {
887         const cl_image_format &imageFormat = formatList[i];
888 
889         if( filterFlags[ i ] )
890             continue;
891 
892         gTestCount++;
893 
894         print_write_header( &imageFormat, false );
895         int retCode;
896         switch (imageType)
897         {
898             case CL_MEM_OBJECT_IMAGE1D:
899                 retCode = test_write_image_1D_set( device, context, queue, &imageFormat, inputType, seed );
900                 break;
901             case CL_MEM_OBJECT_IMAGE2D:
902                 retCode = test_write_image_set( device, context, queue, &imageFormat, inputType, seed );
903                 break;
904             case CL_MEM_OBJECT_IMAGE3D:
905                 retCode = test_write_image_3D_set( device, context, queue, &imageFormat, inputType, seed );
906                 break;
907             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
908                 retCode = test_write_image_1D_array_set( device, context, queue, &imageFormat, inputType, seed );
909                 break;
910             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
911                 retCode = test_write_image_2D_array_set( device, context, queue, &imageFormat, inputType, seed );
912                 break;
913         }
914 
915         if( retCode != 0 )
916         {
917             gFailCount++;
918             log_error( "FAILED: " );
919             print_write_header( &imageFormat, true );
920             log_info( "\n" );
921         }
922         ret += retCode;
923     }
924     return ret;
925 }
926 
927 
928