1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "../testBase.h"
17 #include "test_common.h"
18
19 #if !defined(_WIN32)
20 #include <sys/mman.h>
21 #endif
22
23 extern cl_mem_flags gMemFlagsToUse;
24 extern int gtestTypesToRun;
25
26 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
27 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
28
29 // Utility function to clamp down image sizes for certain tests to avoid
30 // using too much memory.
reduceImageSizeRange(size_t maxDimSize,MTdata & seed)31 static size_t reduceImageSizeRange(size_t maxDimSize, MTdata& seed) {
32 size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
33 if (DimSize > (size_t) 128)
34 return 128;
35 else
36 return DimSize;
37 }
38
reduceImageDepth(size_t maxDimSize,MTdata & seed)39 static size_t reduceImageDepth(size_t maxDimSize, MTdata& seed) {
40 size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
41 if (DimSize > (size_t) 32)
42 return 32;
43 else
44 return DimSize;
45 }
46
47
48 const char *write3DKernelSourcePattern =
49 "%s"
50 "%s\n"
51 "__kernel void sample_kernel( __global %s4 *input, write_only image3d_t "
52 "output %s )\n"
53 "{\n"
54 " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
55 "get_global_id(2);\n"
56 "%s"
57 " write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset "
58 "]);\n"
59 "}";
60
61 const char *readwrite3DKernelSourcePattern =
62 "%s"
63 "%s\n"
64 "__kernel void sample_kernel( __global %s4 *input, read_write image3d_t "
65 "output %s )\n"
66 "{\n"
67 " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
68 "get_global_id(2);\n"
69 "%s"
70 " write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset "
71 "]);\n"
72 "}";
73
74 const char *khr3DWritesPragma =
75 "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
76
77 const char *offset3DSource=
78 " int offset = tidZ*get_image_width(output)*get_image_height(output) + tidY*get_image_width(output) + tidX;\n";
79
80 const char *offset3DLodSource =
81 " int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
82 " int height_lod = ( get_image_height(output) >> lod ) ? ( get_image_height(output) >> lod ) : 1;\n"
83 " int offset = tidZ*width_lod*height_lod + tidY*width_lod + tidX;\n";
84
test_write_image_3D(cl_device_id device,cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,ExplicitType inputType,MTdata d)85 int test_write_image_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
86 image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
87 {
88 int totalErrors = 0;
89
90 size_t num_flags = 0;
91 const cl_mem_flags *mem_flag_types = NULL;
92 const char * *mem_flag_names = NULL;
93 const cl_mem_flags write_only_mem_flag_types[2] = { CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE };
94 const char * write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
95 const cl_mem_flags read_write_mem_flag_types[1] = { CL_MEM_READ_WRITE};
96 const char * read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
97
98 if(gtestTypesToRun & kWriteTests)
99 {
100 mem_flag_types = write_only_mem_flag_types;
101 mem_flag_names = write_only_mem_flag_names;
102 num_flags = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
103 }
104 else
105 {
106 mem_flag_types = read_write_mem_flag_types;
107 mem_flag_names = read_write_mem_flag_names;
108 num_flags = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
109 }
110
111 size_t pixelSize = get_pixel_size( imageInfo->format );
112
113 for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
114 {
115 int error;
116 size_t threads[3];
117 bool verifyRounding = false;
118 int forceCorrectlyRoundedWrites = 0;
119
120 #if defined( __APPLE__ )
121 // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
122 if( GetDeviceType(device) == CL_DEVICE_TYPE_CPU )
123 forceCorrectlyRoundedWrites = 1;
124 #endif
125
126 if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
127 if( DetectFloatToHalfRoundingMode(queue) )
128 return 1;
129
130 BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
131
132 create_random_image_data( inputType, imageInfo, imageValues, d );
133
134 if(!gTestMipmaps)
135 {
136 if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT )
137 {
138 /* Pilot data for sRGB images */
139 if(is_sRGBA_order(imageInfo->format->image_channel_order))
140 {
141 // We want to generate ints (mostly) in range of the target format which should be [0,255]
142 // However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
143 // it can test some out-of-range data points
144 const unsigned int test_range_ext = 16;
145 int formatMin = 0 - test_range_ext;
146 int formatMax = 255 + test_range_ext;
147 int pixel_value = 0;
148
149 // First, fill with arbitrary floats
150 for( size_t z = 0; z < imageInfo->depth; z++ )
151 {
152 for( size_t y = 0; y < imageInfo->height; y++ )
153 {
154 float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
155 for( size_t i = 0; i < imageInfo->width * 4; i++ )
156 {
157 pixel_value = random_in_range( formatMin, (int)formatMax, d );
158 inputValues[ i ] = (float)(pixel_value/255.0f);
159 }
160 }
161 }
162
163 // Throw a few extra test values in there
164 float *inputValues = (float *)(char*)imageValues;
165 size_t i = 0;
166
167 // Piloting some debug inputs.
168 inputValues[ i++ ] = -0.5f;
169 inputValues[ i++ ] = 0.5f;
170 inputValues[ i++ ] = 2.f;
171 inputValues[ i++ ] = 0.5f;
172
173 // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
174 // is correct
175 if( imageInfo->width > 12 )
176 {
177 float formatMax = (float)get_format_max_int( imageInfo->format );
178 inputValues[ i++ ] = 4.0f / formatMax;
179 inputValues[ i++ ] = 4.3f / formatMax;
180 inputValues[ i++ ] = 4.5f / formatMax;
181 inputValues[ i++ ] = 4.7f / formatMax;
182 inputValues[ i++ ] = 5.0f / formatMax;
183 inputValues[ i++ ] = 5.3f / formatMax;
184 inputValues[ i++ ] = 5.5f / formatMax;
185 inputValues[ i++ ] = 5.7f / formatMax;
186 }
187 }
188 else
189 {
190 // First, fill with arbitrary floats
191 for( size_t z = 0; z < imageInfo->depth; z++ )
192 {
193 for( size_t y = 0; y < imageInfo->height; y++ )
194 {
195 float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
196 for( size_t i = 0; i < imageInfo->width * 4; i++ )
197 inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
198 }
199 }
200
201 // Throw a few extra test values in there
202 float *inputValues = (float *)(char*)imageValues;
203 size_t i = 0;
204 inputValues[ i++ ] = -0.0000000000009f;
205 inputValues[ i++ ] = 1.f;
206 inputValues[ i++ ] = -1.f;
207 inputValues[ i++ ] = 2.f;
208
209 // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
210 // is correct
211 if( imageInfo->width > 12 )
212 {
213 float formatMax = (float)get_format_max_int( imageInfo->format );
214 inputValues[ i++ ] = 4.0f / formatMax;
215 inputValues[ i++ ] = 4.3f / formatMax;
216 inputValues[ i++ ] = 4.5f / formatMax;
217 inputValues[ i++ ] = 4.7f / formatMax;
218 inputValues[ i++ ] = 5.0f / formatMax;
219 inputValues[ i++ ] = 5.3f / formatMax;
220 inputValues[ i++ ] = 5.5f / formatMax;
221 inputValues[ i++ ] = 5.7f / formatMax;
222 verifyRounding = true;
223 }
224 }
225 }
226 else if( inputType == kUInt )
227 {
228 unsigned int *inputValues = (unsigned int*)(char*)imageValues;
229 size_t i = 0;
230 inputValues[ i++ ] = 0;
231 inputValues[ i++ ] = 65535;
232 inputValues[ i++ ] = 7271820;
233 inputValues[ i++ ] = 0;
234 }
235 }
236
237 // Construct testing sources
238 clProtectedImage protImage;
239 clMemWrapper unprotImage;
240 cl_mem image;
241
242 if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
243 {
244 create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
245
246 unprotImage = create_image_3d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
247 imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0,
248 maxImageUseHostPtrBackingStore, &error );
249
250 if( error != CL_SUCCESS )
251 {
252 log_error( "ERROR: Unable to create 3D image of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->depth, imageInfo->rowPitch, IGetErrorString( error ) );
253 return error;
254 }
255
256 image = (cl_mem)unprotImage;
257 }
258 else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
259 {
260 // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
261 // it works just as if no flag is specified, so we just do the same thing either way
262 // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
263 if(gTestMipmaps)
264 {
265 cl_image_desc image_desc = {0};
266 image_desc.image_type = imageInfo->type;
267 image_desc.num_mip_levels = imageInfo->num_mip_levels;
268 image_desc.image_width = imageInfo->width;
269 image_desc.image_height = imageInfo->height;
270 image_desc.image_depth = imageInfo->depth;
271
272 unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
273 imageInfo->format, &image_desc, NULL, &error);
274 if( error != CL_SUCCESS )
275 {
276 log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %ld x %ld *%ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->height, imageInfo->depth,
277 IGetErrorString( error ), mem_flag_names[mem_flag_index] );
278 return error;
279 }
280 }
281 else
282 {
283 unprotImage = create_image_3d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
284 imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0, imageValues, &error );
285 if( error != CL_SUCCESS )
286 {
287 log_error( "ERROR: Unable to create 3D image of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->depth, imageInfo->rowPitch, IGetErrorString( error ) );
288 return error;
289 }
290 }
291 image = unprotImage;
292 }
293
294 error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
295 test_error( error, "Unable to set kernel arguments" );
296
297 size_t width_lod = imageInfo->width;
298 size_t height_lod = imageInfo->height;
299 size_t depth_lod = imageInfo->depth;
300 size_t nextLevelOffset = 0;
301 size_t origin[ 4 ] = { 0, 0, 0, 0 };
302 size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->depth };
303
304 int num_lod_loops = (gTestMipmaps)? imageInfo->num_mip_levels : 1;
305 for( int lod = 0; lod < num_lod_loops; lod++)
306 {
307 if(gTestMipmaps)
308 {
309 error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
310 }
311 // Run the kernel
312 threads[0] = (size_t)width_lod;
313 threads[1] = (size_t)height_lod;
314 threads[2] = (size_t)depth_lod;
315
316 clMemWrapper inputStream;
317
318 char *imagePtrOffset = imageValues + nextLevelOffset;
319 inputStream =
320 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
321 get_explicit_type_size(inputType) * 4 * width_lod
322 * height_lod * depth_lod,
323 imagePtrOffset, &error);
324 test_error( error, "Unable to create input buffer" );
325
326 // Set arguments
327 error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
328 test_error( error, "Unable to set kernel arguments" );
329
330 error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
331 test_error( error, "Unable to run kernel" );
332
333 // Get results
334 size_t resultSize;
335 if(gTestMipmaps)
336 resultSize = width_lod * height_lod * depth_lod * pixelSize;
337 else
338 resultSize = imageInfo->slicePitch *imageInfo->depth;
339 clProtectedArray PA(resultSize);
340 char *resultValues = (char *)((void *)PA);
341
342 if( gDebugTrace )
343 log_info( " reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
344
345 origin[3] = lod;
346 region[0] = width_lod;
347 region[1] = height_lod;
348 region[2] = depth_lod;
349 error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
350 test_error( error, "Unable to read results from kernel" );
351 if( gDebugTrace )
352 log_info( " results read\n" );
353
354 // Validate results element by element
355 char *imagePtr = (char*)imageValues + nextLevelOffset;
356 int numTries = 5;
357 for( size_t z = 0, i = 0; z < depth_lod; z++ )
358 {
359 for( size_t y = 0; y < height_lod; y++ )
360 {
361 char *resultPtr;
362 if( gTestMipmaps )
363 resultPtr = (char *)resultValues + y * width_lod * pixelSize + z * width_lod * height_lod * pixelSize;
364 else
365 resultPtr = (char *)resultValues + y * imageInfo->rowPitch + z * imageInfo->slicePitch;
366 for( size_t x = 0; x < width_lod; x++, i++ )
367 {
368 char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
369
370 // Convert this pixel
371 if( inputType == kFloat )
372 pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
373 else if( inputType == kInt )
374 pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
375 else // if( inputType == kUInt )
376 pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
377
378 // Compare against the results
379 if(is_sRGBA_order(imageInfo->format->image_channel_order))
380 {
381 // Compare sRGB-mapped values
382 cl_float expected[4] = {0};
383 cl_float* input_values = (float*)imagePtr;
384 cl_uchar *actual = (cl_uchar*)resultPtr;
385 float max_err = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
386 float err[4] = {0.0f};
387
388 for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
389 {
390 if(j < 3)
391 {
392 expected[j] = sRGBmap(input_values[j]);
393 }
394 else // there is no sRGB conversion for alpha component if it exists
395 {
396 expected[j] = NORMALIZE(input_values[j], 255.0f);
397 }
398
399 err[j] = fabsf( expected[ j ] - actual[ j ] );
400 }
401
402 if ((err[0] > max_err) ||
403 (err[1] > max_err) ||
404 (err[2] > max_err) ||
405 (err[3] > FLT_EPSILON)) // there is no conversion for alpha
406 {
407 log_error( " Error: %g %g %g %g\n", err[0], err[1], err[2], err[3]);
408 log_error( " Input: %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
409 log_error( " Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
410 log_error( " Actual: %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
411 return 1;
412 }
413 }
414 else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
415 {
416 float *expected = (float *)resultBuffer;
417 float *actual = (float *)resultPtr;
418
419 if( !validate_float_write_results( expected, actual, imageInfo ) )
420 {
421 unsigned int *e = (unsigned int *)resultBuffer;
422 unsigned int *a = (unsigned int *)resultPtr;
423 log_error( "ERROR: Sample %ld (%ld,%ld,%ld) did not validate! (%s)\n", i, x, y, z, mem_flag_names[ mem_flag_index ] );
424 log_error( " Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
425 log_error( " Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
426 log_error( " Actual: %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
427 log_error( " Actual: %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
428 totalErrors++;
429 if( ( --numTries ) == 0 )
430 return 1;
431 }
432 }
433 else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
434 {
435 cl_half *e = (cl_half *)resultBuffer;
436 cl_half *a = (cl_half *)resultPtr;
437 if( !validate_half_write_results( e, a, imageInfo ) )
438 {
439 totalErrors++;
440 log_error( "ERROR: Sample %ld (%ld,%ld,%ld) did not validate! (%s)\n", i, x, y, z, mem_flag_names[ mem_flag_index ] );
441 unsigned short *e = (unsigned short *)resultBuffer;
442 unsigned short *a = (unsigned short *)resultPtr;
443 log_error( " Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
444 log_error( " Actual: 0x%04x 0x%04x 0x%04x 0x%04x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
445 if( inputType == kFloat )
446 {
447 float *p = (float *)imagePtr;
448 log_error( " Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
449 log_error( " : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
450 }
451 if( ( --numTries ) == 0 )
452 return 1;
453 }
454 }
455 else
456 {
457
458 filter_undefined_bits(imageInfo, resultPtr);
459
460 // Exact result passes every time
461 if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
462 {
463 // result is inexact. Calculate error
464 int failure = 1;
465 float errors[4] = {NAN, NAN, NAN, NAN};
466 pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
467
468 failure = filter_rounding_errors(
469 forceCorrectlyRoundedWrites, imageInfo,
470 errors);
471
472 if( failure )
473 {
474 totalErrors++;
475 // Is it our special rounding test?
476 if( verifyRounding && i >= 1 && i <= 2 )
477 {
478 // Try to guess what the rounding mode of the device really is based on what it returned
479 const char *deviceRounding = "unknown";
480 unsigned int deviceResults[8];
481 read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod );
482 read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
483
484 if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
485 deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
486 deviceRounding = "truncate";
487 else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
488 deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
489 deviceRounding = "round to nearest";
490 else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
491 deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
492 deviceRounding = "round to even";
493
494 log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
495 log_error( " Actual values rounded by device: %d %d %d %d %d %d %d %d\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
496 deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
497 log_error( " Rounding mode of device appears to be %s\n", deviceRounding );
498 return 1;
499 }
500 log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
501 switch(imageInfo->format->image_channel_data_type)
502 {
503 case CL_UNORM_INT8:
504 case CL_SNORM_INT8:
505 case CL_UNSIGNED_INT8:
506 case CL_SIGNED_INT8:
507 log_error( " Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
508 log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
509 log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
510 break;
511 case CL_UNORM_SHORT_565: {
512 cl_uint *ref_value =
513 (cl_uint *)resultBuffer;
514 cl_uint *test_value =
515 (cl_uint *)resultPtr;
516
517 log_error(" Expected: 0x%2.2x "
518 "Actual: 0x%2.2x \n",
519 ref_value[0],
520 test_value[0]);
521
522 log_error(
523 " Expected: 0x%2.2x 0x%2.2x "
524 "0x%2.2x \n",
525 ref_value[0] & 0x1F,
526 (ref_value[0] >> 5) & 0x3F,
527 (ref_value[0] >> 11) & 0x1F);
528 log_error(
529 " Actual: 0x%2.2x 0x%2.2x "
530 "0x%2.2x \n",
531 test_value[0] & 0x1F,
532 (test_value[0] >> 5) & 0x3F,
533 (test_value[0] >> 11) & 0x1F);
534 log_error(
535 " Error: %f %f %f %f\n",
536 errors[0], errors[1],
537 errors[2]);
538 break;
539 }
540 case CL_UNORM_SHORT_555: {
541 cl_uint *ref_value =
542 (cl_uint *)resultBuffer;
543 cl_uint *test_value =
544 (cl_uint *)resultPtr;
545
546 log_error(" Expected: 0x%2.2x "
547 "Actual: 0x%2.2x \n",
548 ref_value[0],
549 test_value[0]);
550
551 log_error(
552 " Expected: 0x%2.2x 0x%2.2x "
553 "0x%2.2x \n",
554 ref_value[0] & 0x1F,
555 (ref_value[0] >> 5) & 0x1F,
556 (ref_value[0] >> 10) & 0x1F);
557 log_error(
558 " Actual: 0x%2.2x 0x%2.2x "
559 "0x%2.2x \n",
560 test_value[0] & 0x1F,
561 (test_value[0] >> 5) & 0x1F,
562 (test_value[0] >> 10) & 0x1F);
563 log_error(
564 " Error: %f %f %f %f\n",
565 errors[0], errors[1],
566 errors[2]);
567 break;
568 }
569 case CL_UNORM_INT16:
570 case CL_SNORM_INT16:
571 case CL_UNSIGNED_INT16:
572 case CL_SIGNED_INT16:
573 #ifdef CL_SFIXED14_APPLE
574 case CL_SFIXED14_APPLE:
575 #endif
576 log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
577 log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
578 log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
579 break;
580 case CL_HALF_FLOAT:
581 log_error(
582 " Expected: 0x%4.4x 0x%4.4x "
583 "0x%4.4x 0x%4.4x\n",
584 ((cl_half *)resultBuffer)[0],
585 ((cl_half *)resultBuffer)[1],
586 ((cl_half *)resultBuffer)[2],
587 ((cl_half *)resultBuffer)[3]);
588 log_error(
589 " Actual: 0x%4.4x 0x%4.4x "
590 "0x%4.4x 0x%4.4x\n",
591 ((cl_half *)resultPtr)[0],
592 ((cl_half *)resultPtr)[1],
593 ((cl_half *)resultPtr)[2],
594 ((cl_half *)resultPtr)[3]);
595 log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
596 break;
597 case CL_UNSIGNED_INT32:
598 case CL_SIGNED_INT32:
599 log_error( " Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
600 log_error( " Actual: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
601 break;
602 case CL_FLOAT:
603 log_error( " Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
604 log_error( " Actual: %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
605 log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
606 break;
607 }
608
609 float *v = (float *)(char *)imagePtr;
610 log_error( " src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
611 log_error( " : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
612 log_error( " src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[ 1], v[ 2 ], v[ 3 ] );
613
614 if( ( --numTries ) == 0 )
615 return 1;
616 }
617 }
618 }
619 imagePtr += get_explicit_type_size( inputType ) * 4;
620 resultPtr += get_pixel_size( imageInfo->format );
621 }
622 }
623 }
624 {
625 nextLevelOffset += width_lod * height_lod * depth_lod * pixelSize;
626 width_lod = ( width_lod >> 1 ) ? ( width_lod >> 1 ) : 1;
627 height_lod = ( height_lod >> 1 ) ? ( height_lod >> 1 ) : 1;
628 depth_lod = ( depth_lod >> 1 ) ? ( depth_lod >> 1 ) : 1;
629 }
630 }
631 }
632 // All done!
633 return totalErrors;
634 }
635
636
test_write_image_3D_set(cl_device_id device,cl_context context,cl_command_queue queue,const cl_image_format * format,ExplicitType inputType,MTdata d)637 int test_write_image_3D_set(cl_device_id device, cl_context context,
638 cl_command_queue queue,
639 const cl_image_format *format,
640 ExplicitType inputType, MTdata d)
641 {
642 char programSrc[10240];
643 const char *ptr;
644 const char *readFormat;
645 clProgramWrapper program;
646 clKernelWrapper kernel;
647 const char *KernelSourcePattern = NULL;
648
649 int error;
650
651 // Get our operating parameters
652 size_t maxWidth, maxHeight, maxDepth;
653 cl_ulong maxAllocSize, memSize;
654
655 image_descriptor imageInfo = { 0x0 };
656
657 imageInfo.format = format;
658 imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
659
660 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
661 error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
662 error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
663 error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
664 error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
665 test_error( error, "Unable to get max image 3D size from device" );
666
667 if (memSize > (cl_ulong)SIZE_MAX) {
668 memSize = (cl_ulong)SIZE_MAX;
669 }
670
671 // Determine types
672 if( inputType == kInt )
673 readFormat = "i";
674 else if( inputType == kUInt )
675 readFormat = "ui";
676 else // kFloat
677 readFormat = "f";
678
679 if(gtestTypesToRun & kWriteTests)
680 {
681 KernelSourcePattern = write3DKernelSourcePattern;
682 }
683 else
684 {
685 KernelSourcePattern = readwrite3DKernelSourcePattern;
686 }
687
688 // Construct the source
689 sprintf(
690 programSrc, KernelSourcePattern, khr3DWritesPragma,
691 gTestMipmaps
692 ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma "
693 "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable"
694 : "",
695 get_explicit_type_name(inputType), gTestMipmaps ? ", int lod" : "",
696 gTestMipmaps ? offset3DLodSource : offset3DSource, readFormat,
697 gTestMipmaps ? ", lod" : "");
698
699 ptr = programSrc;
700 error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
701 "sample_kernel");
702 test_error( error, "Unable to create testing kernel" );
703
704 // Run tests
705 if( gTestSmallImages )
706 {
707 for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
708 {
709 imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
710 for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
711 {
712 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
713 for( imageInfo.depth = 2; imageInfo.depth < 7; imageInfo.depth++ )
714 {
715 if (gTestMipmaps)
716 imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
717
718 if( gDebugTrace )
719 log_info( " at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
720 int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
721 if( retCode )
722 return retCode;
723 }
724 }
725 }
726 }
727 else if( gTestMaxImages )
728 {
729 // Try a specific set of maximum sizes
730 size_t numbeOfSizes;
731 size_t sizes[100][3];
732
733 get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, maxDepth, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE3D, imageInfo.format, CL_TRUE);
734
735 for( size_t idx = 0; idx < numbeOfSizes; idx++ )
736 {
737 imageInfo.width = sizes[ idx ][ 0 ];
738 imageInfo.height = sizes[ idx ][ 1 ];
739 imageInfo.depth = sizes[ idx ][ 2 ];
740 imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
741 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
742 if (gTestMipmaps)
743 imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
744 log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth);
745 int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
746 if( retCode )
747 return retCode;
748 }
749 }
750 else if( gTestRounding )
751 {
752 size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
753 imageInfo.height = typeRange / 256;
754 imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
755 imageInfo.depth = 1;
756
757 imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
758 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
759 int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
760 if( retCode )
761 return retCode;
762 }
763 else
764 {
765 for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
766 {
767 cl_ulong size;
768 // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
769 // image, the result array, plus offset arrays, will fit in the global ram space
770 do
771 {
772 imageInfo.width = reduceImageSizeRange(maxWidth, d );
773 imageInfo.height = reduceImageSizeRange(maxHeight, d );
774 imageInfo.depth = reduceImageDepth(maxDepth, d );
775
776 if(gTestMipmaps)
777 {
778 imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
779 //Need to take into account the input buffer size, otherwise we will end up with input buffer that is exceeding MaxAlloc
780 size = 4 * compute_mipmapped_image_size(imageInfo) * get_explicit_type_size( inputType );
781 }
782 else
783 {
784 imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
785 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
786 if( gEnablePitch )
787 {
788 size_t extraWidth = (int)random_log_in_range( 0, 64, d );
789 imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
790
791 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
792 extraWidth = (int)random_log_in_range( 0, 64, d );
793 imageInfo.slicePitch += extraWidth * imageInfo.rowPitch;
794 }
795
796 size = (size_t)imageInfo.slicePitch * (size_t)imageInfo.depth * 4;
797 }
798 } while( size > maxAllocSize || ( size * 3 ) > memSize );
799
800 if( gDebugTrace )
801 log_info( " at size %ld,%ld,%ld (pitch %ld, slice %ld) out of %ld,%ld,%ld\n", imageInfo.width, imageInfo.height, imageInfo.depth,
802 imageInfo.rowPitch, imageInfo.slicePitch, maxWidth, maxHeight, maxDepth );
803
804 int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
805 if( retCode )
806 return retCode;
807 }
808 }
809
810 return 0;
811 }
812