xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/buffers/test_image_migrate.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <stdlib.h>
18 
19 #include "procs.h"
20 #include "harness/errorHelpers.h"
21 
22 #define MAX_SUB_DEVICES        16        // Limit the sub-devices to ensure no out of resource errors.
23 #define MEM_OBJ_SIZE          1024
24 #define IMAGE_DIM         16
25 
26 // Kernel source code
27 static const char *image_migrate_kernel_code =
28 "__kernel void test_image_migrate(write_only image2d_t dst, read_only image2d_t src1,\n"
29 "                                 read_only image2d_t src2, sampler_t sampler, uint x)\n"
30 "{\n"
31 "  int tidX = get_global_id(0), tidY = get_global_id(1);\n"
32 "  int2 coords = (int2) {tidX, tidY};\n"
33 "  uint4 val = read_imageui(src1, sampler, coords) ^\n"
34 "              read_imageui(src2, sampler, coords) ^\n"
35 "              x;\n"
36 "  write_imageui(dst, coords, val);\n"
37 "}\n";
38 
39 enum migrations { MIGRATE_PREFERRED,           // migrate to the preferred sub-device
40                   MIGRATE_NON_PREFERRED,     // migrate to a randomly chosen non-preferred sub-device
41                   MIGRATE_RANDOM,              // migrate to a randomly chosen sub-device with randomly chosen flags
42                   NUMBER_OF_MIGRATIONS };
43 
init_image(cl_command_queue cmd_q,cl_mem image,cl_uint * data)44 static cl_mem init_image(cl_command_queue cmd_q, cl_mem image, cl_uint *data)
45 {
46   cl_int err;
47 
48   size_t origin[3] = {0, 0, 0};
49   size_t region[3] = {IMAGE_DIM, IMAGE_DIM, 1};
50 
51   if (image) {
52     if ((err = clEnqueueWriteImage(cmd_q, image, CL_TRUE,
53                                    origin, region, 0, 0, data, 0, NULL, NULL)) != CL_SUCCESS) {
54       print_error(err, "Failed on enqueue write of image data.");
55     }
56   }
57 
58   return image;
59 }
60 
migrateMemObject(enum migrations migrate,cl_command_queue * queues,cl_mem * mem_objects,cl_uint num_devices,cl_mem_migration_flags * flags,MTdata d)61 static cl_int migrateMemObject(enum migrations migrate, cl_command_queue *queues, cl_mem *mem_objects,
62                                cl_uint num_devices, cl_mem_migration_flags *flags, MTdata d)
63 {
64   cl_uint i, j;
65   cl_int  err = CL_SUCCESS;
66 
67   for (i=0; i<num_devices; i++) {
68     j = genrand_int32(d) % num_devices;
69     flags[i] = 0;
70     switch (migrate) {
71       case MIGRATE_PREFERRED:
72         // Force the device to be preferred
73         j = i;
74         break;
75       case MIGRATE_NON_PREFERRED:
76         // Coerce the device to be non-preferred
77         if ((j == i) && (num_devices > 1)) j = (j+1) % num_devices;
78         break;
79       case MIGRATE_RANDOM:
80         // Choose a random set of flags
81         flags[i] = (cl_mem_migration_flags)(genrand_int32(d) & (CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED));
82         break;
83       default: log_error("Unhandled migration type: %d\n", migrate); return -1;
84     }
85     if ((err = clEnqueueMigrateMemObjects(queues[j], 1, (const cl_mem *)(&mem_objects[i]),
86                                           flags[i], 0, NULL, NULL)) != CL_SUCCESS) {
87       print_error(err, "Failed migrating memory object.");
88     }
89   }
90   return err;
91 }
92 
restoreImage(cl_command_queue * queues,cl_mem * mem_objects,cl_uint num_devices,cl_mem_migration_flags * flags,cl_uint * buffer)93 static cl_int restoreImage(cl_command_queue *queues, cl_mem *mem_objects, cl_uint num_devices,
94                            cl_mem_migration_flags *flags, cl_uint *buffer)
95 {
96   cl_uint i;
97   cl_int  err;
98 
99   const size_t origin[3] = {0, 0, 0};
100   const size_t region[3] = {IMAGE_DIM, IMAGE_DIM, 1};
101 
102   // If the image was previously migrated with undefined content, reload the content.
103 
104   for (i=0; i<num_devices; i++) {
105     if (flags[i] & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) {
106       if ((err = clEnqueueWriteImage(queues[i], mem_objects[i], CL_TRUE,
107                                      origin, region, 0, 0, buffer, 0, NULL, NULL)) != CL_SUCCESS) {
108         print_error(err, "Failed on restoration enqueue write of image data.");
109         return err;
110       }
111     }
112   }
113   return CL_SUCCESS;
114 }
115 
test_image_migrate(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)116 int test_image_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
117 {
118   int failed = 0;
119   cl_uint i, j;
120   cl_int err;
121   cl_uint max_sub_devices = 0;
122   cl_uint num_devices, num_devices_limited;
123   cl_uint A[MEM_OBJ_SIZE], B[MEM_OBJ_SIZE], C[MEM_OBJ_SIZE];
124   cl_uint test_number = 1;
125   cl_device_affinity_domain domain, domains;
126   cl_device_id *devices;
127   cl_command_queue *queues;
128   cl_mem_migration_flags *flagsA, *flagsB, *flagsC;
129   cl_device_partition_property property[] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, 0, 0};
130   cl_mem *imageA, *imageB, *imageC;
131   cl_image_format format;
132   cl_sampler sampler = NULL;
133   cl_program program = NULL;
134   cl_kernel kernel = NULL;
135   cl_context ctx = NULL;
136   enum migrations migrateA, migrateB, migrateC;
137   MTdata d = init_genrand(gRandomSeed);
138   const size_t wgs[2] = {IMAGE_DIM, IMAGE_DIM};
139   const size_t wls[2] = {1, 1};
140 
141   // Check for image support.
142   if(checkForImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED) {
143     log_info("Device does not support images. Skipping test.\n");
144     return 0;
145   }
146 
147   // Allocate arrays whose size varies according to the maximum number of sub-devices.
148   if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_sub_devices), &max_sub_devices, NULL)) != CL_SUCCESS) {
149     print_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_COMPUTE_UNITS) failed");
150     return -1;
151   }
152   if (max_sub_devices < 1) {
153     log_error("ERROR: Invalid number of compute units returned.\n");
154     return -1;
155   }
156 
157   devices = (cl_device_id *)malloc(max_sub_devices * sizeof(cl_device_id));
158   queues = (cl_command_queue *)malloc(max_sub_devices * sizeof(cl_command_queue));
159   flagsA = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
160   flagsB = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
161   flagsC = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
162   imageA = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
163   imageB = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
164   imageC = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
165 
166   if ((devices == NULL) || (queues  == NULL) ||
167       (flagsA  == NULL) || (flagsB  == NULL) || (flagsC == NULL) ||
168       (imageA  == NULL) || (imageB == NULL)  || (imageC == NULL)) {
169     log_error("ERROR: Failed to successfully allocate required local buffers.\n");
170     failed = -1;
171     goto cleanup_allocations;
172   }
173 
174   for (i=0; i<max_sub_devices; i++) {
175     devices[i] = NULL;
176     queues [i] = NULL;
177     imageA[i] = imageB[i] = imageC[i] = NULL;
178   }
179 
180   for (i=0; i<MEM_OBJ_SIZE; i++) {
181     A[i] = genrand_int32(d);
182     B[i] = genrand_int32(d);
183   }
184 
185   // Set image format.
186   format.image_channel_order = CL_RGBA;
187   format.image_channel_data_type = CL_UNSIGNED_INT32;
188 
189 
190   // Attempt to partition the device along each of the allowed affinity domain.
191   if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(domains), &domains, NULL)) != CL_SUCCESS) {
192     print_error(err, "clGetDeviceInfo(CL_PARTITION_AFFINITY_DOMAIN) failed");
193     return -1;
194   }
195 
196   domains &= (CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE |
197               CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE | CL_DEVICE_AFFINITY_DOMAIN_NUMA);
198 
199   do {
200     if (domains) {
201       for (domain = 1; (domain & domains) == 0; domain <<= 1) {};
202       domains &= ~domain;
203     } else {
204       domain = 0;
205     }
206 
207     // Determine the number of partitions for the device given the specific domain.
208     if (domain) {
209       property[1] = domain;
210       err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, -1, NULL, &num_devices);
211       if ((err != CL_SUCCESS) || (num_devices == 0)) {
212         print_error(err, "Obtaining the number of partions by affinity failed.");
213         failed = 1;
214         goto cleanup;
215       }
216     } else {
217       num_devices = 1;
218     }
219 
220     if (num_devices > 1) {
221       // Create each of the sub-devices and a corresponding context.
222       if ((err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, num_devices, devices, &num_devices)) != CL_SUCCESS) {
223         print_error(err, "Failed creating sub devices.");
224         failed = 1;
225         goto cleanup;
226       }
227 
228       // Create a context containing all the sub-devices
229       ctx = clCreateContext(NULL, num_devices, devices, notify_callback, NULL, &err);
230       if (ctx == NULL) {
231     print_error(err, "Failed creating context containing the sub-devices.");
232     failed = 1;
233     goto cleanup;
234       }
235 
236       // Create a command queue for each sub-device
237       for (i=0; i<num_devices; i++) {
238         if (devices[i]) {
239           if ((queues[i] = clCreateCommandQueue(ctx, devices[i], 0, &err)) == NULL) {
240             print_error(err, "Failed creating command queues.");
241             failed = 1;
242             goto cleanup;
243           }
244         }
245       }
246     } else {
247       // No partitioning available. Just exercise the APIs on a single device.
248       devices[0] = deviceID;
249       queues[0] = queue;
250       ctx = context;
251     }
252 
253     // Build the kernel program.
254     if ((err = create_single_kernel_helper(ctx, &program, &kernel, 1,
255                                            &image_migrate_kernel_code,
256                                            "test_image_migrate")))
257     {
258         print_error(err, "Failed creating kernel.");
259         failed = 1;
260         goto cleanup;
261     }
262 
263     // Create sampler.
264     sampler = clCreateSampler(ctx, CL_FALSE, CL_ADDRESS_CLAMP, CL_FILTER_NEAREST, &err );
265     if ((err != CL_SUCCESS) || !sampler) {
266       print_error(err, "Failed to create a sampler.");
267       failed = 1;
268       goto cleanup;
269     }
270 
271     num_devices_limited = num_devices;
272 
273     // Allocate memory buffers. 3 buffers (2 input, 1 output) for each sub-device.
274     // If we run out of memory, then restrict the number of sub-devices to be tested.
275     for (i=0; i<num_devices; i++) {
276       imageA[i] = init_image(queues[i], create_image_2d(ctx, (CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR),
277                                                         &format, IMAGE_DIM, IMAGE_DIM, 0, NULL, &err), A);
278       imageB[i] = init_image(queues[i], create_image_2d(ctx, (CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR),
279                                                         &format, IMAGE_DIM, IMAGE_DIM, 0, NULL, &err), B);
280       imageC[i] = create_image_2d(ctx, (CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR),
281                                   &format, IMAGE_DIM, IMAGE_DIM, 0, NULL, &err);
282 
283       if ((imageA[i] == NULL) || (imageB[i] == NULL) || (imageC[i] == NULL)) {
284         if (i == 0) {
285           log_error("Failed to allocate even 1 set of buffers.\n");
286           failed = 1;
287           goto cleanup;
288         }
289         num_devices_limited = i;
290         break;
291       }
292     }
293 
294     // For each partition, we will execute the test kernel with each of the 3 buffers migrated to one of the migrate options
295     for (migrateA=(enum migrations)(0); migrateA<NUMBER_OF_MIGRATIONS; migrateA = (enum migrations)((int)migrateA + 1)) {
296       if (migrateMemObject(migrateA, queues, imageA, num_devices_limited, flagsA, d) != CL_SUCCESS) {
297         failed = 1;
298         goto cleanup;
299       }
300       for (migrateC=(enum migrations)(0); migrateC<NUMBER_OF_MIGRATIONS; migrateC = (enum migrations)((int)migrateC + 1)) {
301         if (migrateMemObject(migrateC, queues, imageC, num_devices_limited, flagsC, d) != CL_SUCCESS) {
302           failed = 1;
303           goto cleanup;
304         }
305         for (migrateB=(enum migrations)(0); migrateB<NUMBER_OF_MIGRATIONS; migrateB = (enum migrations)((int)migrateB + 1)) {
306           if (migrateMemObject(migrateB, queues, imageB, num_devices_limited, flagsB, d) != CL_SUCCESS) {
307             failed = 1;
308             goto cleanup;
309           }
310           // Run the test on each of the partitions.
311           for (i=0; i<num_devices_limited; i++) {
312             cl_uint x;
313 
314             x = i + test_number;
315 
316             if ((err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (const void *)&imageC[i])) != CL_SUCCESS) {
317               print_error(err, "Failed set kernel argument 0.");
318               failed = 1;
319               goto cleanup;
320             }
321 
322             if ((err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (const void *)&imageA[i])) != CL_SUCCESS) {
323               print_error(err, "Failed set kernel argument 1.");
324               failed = 1;
325               goto cleanup;
326             }
327 
328             if ((err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (const void *)&imageB[i])) != CL_SUCCESS) {
329               print_error(err, "Failed set kernel argument 2.");
330               failed = 1;
331               goto cleanup;
332             }
333 
334             if ((err = clSetKernelArg(kernel, 3, sizeof(cl_sampler), (const void *)&sampler)) != CL_SUCCESS) {
335               print_error(err, "Failed set kernel argument 3.");
336               failed = 1;
337               goto cleanup;
338             }
339 
340             if ((err = clSetKernelArg(kernel, 4, sizeof(cl_uint), (const void *)&x)) != CL_SUCCESS) {
341               print_error(err, "Failed set kernel argument 4.");
342               failed = 1;
343               goto cleanup;
344             }
345 
346             if ((err = clEnqueueNDRangeKernel(queues[i], kernel, 2, NULL, wgs, wls, 0, NULL, NULL)) != CL_SUCCESS) {
347                 print_error(err, "Failed enqueuing the NDRange kernel.");
348                 failed = 1;
349                 goto cleanup;
350             }
351           }
352           // Verify the results as long as neither input is an undefined migration
353           const size_t origin[3] = {0, 0, 0};
354           const size_t region[3] = {IMAGE_DIM, IMAGE_DIM, 1};
355 
356           for (i=0; i<num_devices_limited; i++, test_number++) {
357             if (((flagsA[i] | flagsB[i]) & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) == 0) {
358               if ((err = clEnqueueReadImage(queues[i], imageC[i], CL_TRUE,
359                                             origin, region, 0, 0, C, 0, NULL, NULL)) != CL_SUCCESS) {
360                 print_error(err, "Failed reading output buffer.");
361                 failed = 1;
362                 goto cleanup;
363               }
364               for (j=0; j<MEM_OBJ_SIZE; j++) {
365                 cl_uint expected;
366 
367                 expected = A[j] ^ B[j] ^ test_number;
368                 if (C[j] != expected) {
369                   log_error("Failed on device %d,  work item %4d,  expected 0x%08x got 0x%08x (0x%08x ^ 0x%08x ^ 0x%08x)\n", i, j, expected, C[j], A[j], B[j], test_number);
370                   failed = 1;
371                 }
372               }
373               if (failed) goto cleanup;
374             }
375           }
376 
377           if (restoreImage(queues, imageB, num_devices_limited, flagsB, B) != CL_SUCCESS) {
378             failed = 1;
379             goto cleanup;
380           }
381         }
382       }
383       if (restoreImage(queues, imageA, num_devices_limited, flagsA, A) != CL_SUCCESS) {
384         failed = 1;
385         goto cleanup;
386       }
387     }
388 
389   cleanup:
390     // Clean up all the allocted resources create by the test. This includes sub-devices,
391     // command queues, and memory buffers.
392 
393     for (i=0; i<max_sub_devices; i++) {
394       // Memory buffer cleanup
395       if (imageA[i]) {
396         if ((err = clReleaseMemObject(imageA[i])) != CL_SUCCESS) {
397           print_error(err, "Failed releasing memory object.");
398           failed = 1;
399         }
400       }
401       if (imageB[i]) {
402         if ((err = clReleaseMemObject(imageB[i])) != CL_SUCCESS) {
403           print_error(err, "Failed releasing memory object.");
404           failed = 1;
405         }
406       }
407       if (imageC[i]) {
408         if ((err = clReleaseMemObject(imageC[i])) != CL_SUCCESS) {
409           print_error(err, "Failed releasing memory object.");
410           failed = 1;
411         }
412       }
413 
414       if (num_devices > 1) {
415         // Command queue cleanup
416         if (queues[i]) {
417           if ((err = clReleaseCommandQueue(queues[i])) != CL_SUCCESS) {
418             print_error(err, "Failed releasing command queue.");
419             failed = 1;
420           }
421         }
422 
423         // Sub-device cleanup
424         if (devices[i]) {
425           if ((err = clReleaseDevice(devices[i])) != CL_SUCCESS) {
426             print_error(err, "Failed releasing sub device.");
427             failed = 1;
428           }
429         }
430         devices[i] = 0;
431       }
432     }
433 
434     // Sampler cleanup
435     if (sampler) {
436       if ((err = clReleaseSampler(sampler)) != CL_SUCCESS) {
437     print_error(err, "Failed releasing sampler.");
438     failed = 1;
439       }
440       sampler = NULL;
441     }
442 
443     // Context, program, and kernel cleanup
444     if (program) {
445       if ((err = clReleaseProgram(program)) != CL_SUCCESS) {
446     print_error(err, "Failed releasing program.");
447     failed = 1;
448       }
449       program = NULL;
450     }
451 
452     if (kernel) {
453       if ((err = clReleaseKernel(kernel)) != CL_SUCCESS) {
454     print_error(err, "Failed releasing kernel.");
455     failed = 1;
456       }
457       kernel = NULL;
458     }
459 
460     if (ctx && (ctx != context)) {
461       if ((err = clReleaseContext(ctx)) != CL_SUCCESS) {
462     print_error(err, "Failed releasing context.");
463     failed = 1;
464       }
465     }
466     ctx = NULL;
467 
468     if (failed) goto cleanup_allocations;
469   } while (domains);
470 
471 cleanup_allocations:
472   if (devices) free(devices);
473   if (queues)  free(queues);
474   if (flagsA)  free(flagsA);
475   if (flagsB)  free(flagsB);
476   if (flagsC)  free(flagsC);
477   if (imageA)  free(imageA);
478   if (imageB)  free(imageB);
479   if (imageC)  free(imageC);
480 
481   return ((failed) ? -1 : 0);
482 }
483