1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <stdlib.h>
18
19 #include "procs.h"
20 #include "harness/errorHelpers.h"
21
22 #define MAX_SUB_DEVICES 16 // Limit the sub-devices to ensure no out of resource errors.
23 #define MEM_OBJ_SIZE 1024
24 #define IMAGE_DIM 16
25
26 // Kernel source code
27 static const char *image_migrate_kernel_code =
28 "__kernel void test_image_migrate(write_only image2d_t dst, read_only image2d_t src1,\n"
29 " read_only image2d_t src2, sampler_t sampler, uint x)\n"
30 "{\n"
31 " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
32 " int2 coords = (int2) {tidX, tidY};\n"
33 " uint4 val = read_imageui(src1, sampler, coords) ^\n"
34 " read_imageui(src2, sampler, coords) ^\n"
35 " x;\n"
36 " write_imageui(dst, coords, val);\n"
37 "}\n";
38
39 enum migrations { MIGRATE_PREFERRED, // migrate to the preferred sub-device
40 MIGRATE_NON_PREFERRED, // migrate to a randomly chosen non-preferred sub-device
41 MIGRATE_RANDOM, // migrate to a randomly chosen sub-device with randomly chosen flags
42 NUMBER_OF_MIGRATIONS };
43
init_image(cl_command_queue cmd_q,cl_mem image,cl_uint * data)44 static cl_mem init_image(cl_command_queue cmd_q, cl_mem image, cl_uint *data)
45 {
46 cl_int err;
47
48 size_t origin[3] = {0, 0, 0};
49 size_t region[3] = {IMAGE_DIM, IMAGE_DIM, 1};
50
51 if (image) {
52 if ((err = clEnqueueWriteImage(cmd_q, image, CL_TRUE,
53 origin, region, 0, 0, data, 0, NULL, NULL)) != CL_SUCCESS) {
54 print_error(err, "Failed on enqueue write of image data.");
55 }
56 }
57
58 return image;
59 }
60
migrateMemObject(enum migrations migrate,cl_command_queue * queues,cl_mem * mem_objects,cl_uint num_devices,cl_mem_migration_flags * flags,MTdata d)61 static cl_int migrateMemObject(enum migrations migrate, cl_command_queue *queues, cl_mem *mem_objects,
62 cl_uint num_devices, cl_mem_migration_flags *flags, MTdata d)
63 {
64 cl_uint i, j;
65 cl_int err = CL_SUCCESS;
66
67 for (i=0; i<num_devices; i++) {
68 j = genrand_int32(d) % num_devices;
69 flags[i] = 0;
70 switch (migrate) {
71 case MIGRATE_PREFERRED:
72 // Force the device to be preferred
73 j = i;
74 break;
75 case MIGRATE_NON_PREFERRED:
76 // Coerce the device to be non-preferred
77 if ((j == i) && (num_devices > 1)) j = (j+1) % num_devices;
78 break;
79 case MIGRATE_RANDOM:
80 // Choose a random set of flags
81 flags[i] = (cl_mem_migration_flags)(genrand_int32(d) & (CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED));
82 break;
83 default: log_error("Unhandled migration type: %d\n", migrate); return -1;
84 }
85 if ((err = clEnqueueMigrateMemObjects(queues[j], 1, (const cl_mem *)(&mem_objects[i]),
86 flags[i], 0, NULL, NULL)) != CL_SUCCESS) {
87 print_error(err, "Failed migrating memory object.");
88 }
89 }
90 return err;
91 }
92
restoreImage(cl_command_queue * queues,cl_mem * mem_objects,cl_uint num_devices,cl_mem_migration_flags * flags,cl_uint * buffer)93 static cl_int restoreImage(cl_command_queue *queues, cl_mem *mem_objects, cl_uint num_devices,
94 cl_mem_migration_flags *flags, cl_uint *buffer)
95 {
96 cl_uint i;
97 cl_int err;
98
99 const size_t origin[3] = {0, 0, 0};
100 const size_t region[3] = {IMAGE_DIM, IMAGE_DIM, 1};
101
102 // If the image was previously migrated with undefined content, reload the content.
103
104 for (i=0; i<num_devices; i++) {
105 if (flags[i] & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) {
106 if ((err = clEnqueueWriteImage(queues[i], mem_objects[i], CL_TRUE,
107 origin, region, 0, 0, buffer, 0, NULL, NULL)) != CL_SUCCESS) {
108 print_error(err, "Failed on restoration enqueue write of image data.");
109 return err;
110 }
111 }
112 }
113 return CL_SUCCESS;
114 }
115
test_image_migrate(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)116 int test_image_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
117 {
118 int failed = 0;
119 cl_uint i, j;
120 cl_int err;
121 cl_uint max_sub_devices = 0;
122 cl_uint num_devices, num_devices_limited;
123 cl_uint A[MEM_OBJ_SIZE], B[MEM_OBJ_SIZE], C[MEM_OBJ_SIZE];
124 cl_uint test_number = 1;
125 cl_device_affinity_domain domain, domains;
126 cl_device_id *devices;
127 cl_command_queue *queues;
128 cl_mem_migration_flags *flagsA, *flagsB, *flagsC;
129 cl_device_partition_property property[] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, 0, 0};
130 cl_mem *imageA, *imageB, *imageC;
131 cl_image_format format;
132 cl_sampler sampler = NULL;
133 cl_program program = NULL;
134 cl_kernel kernel = NULL;
135 cl_context ctx = NULL;
136 enum migrations migrateA, migrateB, migrateC;
137 MTdata d = init_genrand(gRandomSeed);
138 const size_t wgs[2] = {IMAGE_DIM, IMAGE_DIM};
139 const size_t wls[2] = {1, 1};
140
141 // Check for image support.
142 if(checkForImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED) {
143 log_info("Device does not support images. Skipping test.\n");
144 return 0;
145 }
146
147 // Allocate arrays whose size varies according to the maximum number of sub-devices.
148 if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_sub_devices), &max_sub_devices, NULL)) != CL_SUCCESS) {
149 print_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_COMPUTE_UNITS) failed");
150 return -1;
151 }
152 if (max_sub_devices < 1) {
153 log_error("ERROR: Invalid number of compute units returned.\n");
154 return -1;
155 }
156
157 devices = (cl_device_id *)malloc(max_sub_devices * sizeof(cl_device_id));
158 queues = (cl_command_queue *)malloc(max_sub_devices * sizeof(cl_command_queue));
159 flagsA = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
160 flagsB = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
161 flagsC = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
162 imageA = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
163 imageB = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
164 imageC = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
165
166 if ((devices == NULL) || (queues == NULL) ||
167 (flagsA == NULL) || (flagsB == NULL) || (flagsC == NULL) ||
168 (imageA == NULL) || (imageB == NULL) || (imageC == NULL)) {
169 log_error("ERROR: Failed to successfully allocate required local buffers.\n");
170 failed = -1;
171 goto cleanup_allocations;
172 }
173
174 for (i=0; i<max_sub_devices; i++) {
175 devices[i] = NULL;
176 queues [i] = NULL;
177 imageA[i] = imageB[i] = imageC[i] = NULL;
178 }
179
180 for (i=0; i<MEM_OBJ_SIZE; i++) {
181 A[i] = genrand_int32(d);
182 B[i] = genrand_int32(d);
183 }
184
185 // Set image format.
186 format.image_channel_order = CL_RGBA;
187 format.image_channel_data_type = CL_UNSIGNED_INT32;
188
189
190 // Attempt to partition the device along each of the allowed affinity domain.
191 if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(domains), &domains, NULL)) != CL_SUCCESS) {
192 print_error(err, "clGetDeviceInfo(CL_PARTITION_AFFINITY_DOMAIN) failed");
193 return -1;
194 }
195
196 domains &= (CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE |
197 CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE | CL_DEVICE_AFFINITY_DOMAIN_NUMA);
198
199 do {
200 if (domains) {
201 for (domain = 1; (domain & domains) == 0; domain <<= 1) {};
202 domains &= ~domain;
203 } else {
204 domain = 0;
205 }
206
207 // Determine the number of partitions for the device given the specific domain.
208 if (domain) {
209 property[1] = domain;
210 err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, -1, NULL, &num_devices);
211 if ((err != CL_SUCCESS) || (num_devices == 0)) {
212 print_error(err, "Obtaining the number of partions by affinity failed.");
213 failed = 1;
214 goto cleanup;
215 }
216 } else {
217 num_devices = 1;
218 }
219
220 if (num_devices > 1) {
221 // Create each of the sub-devices and a corresponding context.
222 if ((err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, num_devices, devices, &num_devices)) != CL_SUCCESS) {
223 print_error(err, "Failed creating sub devices.");
224 failed = 1;
225 goto cleanup;
226 }
227
228 // Create a context containing all the sub-devices
229 ctx = clCreateContext(NULL, num_devices, devices, notify_callback, NULL, &err);
230 if (ctx == NULL) {
231 print_error(err, "Failed creating context containing the sub-devices.");
232 failed = 1;
233 goto cleanup;
234 }
235
236 // Create a command queue for each sub-device
237 for (i=0; i<num_devices; i++) {
238 if (devices[i]) {
239 if ((queues[i] = clCreateCommandQueue(ctx, devices[i], 0, &err)) == NULL) {
240 print_error(err, "Failed creating command queues.");
241 failed = 1;
242 goto cleanup;
243 }
244 }
245 }
246 } else {
247 // No partitioning available. Just exercise the APIs on a single device.
248 devices[0] = deviceID;
249 queues[0] = queue;
250 ctx = context;
251 }
252
253 // Build the kernel program.
254 if ((err = create_single_kernel_helper(ctx, &program, &kernel, 1,
255 &image_migrate_kernel_code,
256 "test_image_migrate")))
257 {
258 print_error(err, "Failed creating kernel.");
259 failed = 1;
260 goto cleanup;
261 }
262
263 // Create sampler.
264 sampler = clCreateSampler(ctx, CL_FALSE, CL_ADDRESS_CLAMP, CL_FILTER_NEAREST, &err );
265 if ((err != CL_SUCCESS) || !sampler) {
266 print_error(err, "Failed to create a sampler.");
267 failed = 1;
268 goto cleanup;
269 }
270
271 num_devices_limited = num_devices;
272
273 // Allocate memory buffers. 3 buffers (2 input, 1 output) for each sub-device.
274 // If we run out of memory, then restrict the number of sub-devices to be tested.
275 for (i=0; i<num_devices; i++) {
276 imageA[i] = init_image(queues[i], create_image_2d(ctx, (CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR),
277 &format, IMAGE_DIM, IMAGE_DIM, 0, NULL, &err), A);
278 imageB[i] = init_image(queues[i], create_image_2d(ctx, (CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR),
279 &format, IMAGE_DIM, IMAGE_DIM, 0, NULL, &err), B);
280 imageC[i] = create_image_2d(ctx, (CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR),
281 &format, IMAGE_DIM, IMAGE_DIM, 0, NULL, &err);
282
283 if ((imageA[i] == NULL) || (imageB[i] == NULL) || (imageC[i] == NULL)) {
284 if (i == 0) {
285 log_error("Failed to allocate even 1 set of buffers.\n");
286 failed = 1;
287 goto cleanup;
288 }
289 num_devices_limited = i;
290 break;
291 }
292 }
293
294 // For each partition, we will execute the test kernel with each of the 3 buffers migrated to one of the migrate options
295 for (migrateA=(enum migrations)(0); migrateA<NUMBER_OF_MIGRATIONS; migrateA = (enum migrations)((int)migrateA + 1)) {
296 if (migrateMemObject(migrateA, queues, imageA, num_devices_limited, flagsA, d) != CL_SUCCESS) {
297 failed = 1;
298 goto cleanup;
299 }
300 for (migrateC=(enum migrations)(0); migrateC<NUMBER_OF_MIGRATIONS; migrateC = (enum migrations)((int)migrateC + 1)) {
301 if (migrateMemObject(migrateC, queues, imageC, num_devices_limited, flagsC, d) != CL_SUCCESS) {
302 failed = 1;
303 goto cleanup;
304 }
305 for (migrateB=(enum migrations)(0); migrateB<NUMBER_OF_MIGRATIONS; migrateB = (enum migrations)((int)migrateB + 1)) {
306 if (migrateMemObject(migrateB, queues, imageB, num_devices_limited, flagsB, d) != CL_SUCCESS) {
307 failed = 1;
308 goto cleanup;
309 }
310 // Run the test on each of the partitions.
311 for (i=0; i<num_devices_limited; i++) {
312 cl_uint x;
313
314 x = i + test_number;
315
316 if ((err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (const void *)&imageC[i])) != CL_SUCCESS) {
317 print_error(err, "Failed set kernel argument 0.");
318 failed = 1;
319 goto cleanup;
320 }
321
322 if ((err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (const void *)&imageA[i])) != CL_SUCCESS) {
323 print_error(err, "Failed set kernel argument 1.");
324 failed = 1;
325 goto cleanup;
326 }
327
328 if ((err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (const void *)&imageB[i])) != CL_SUCCESS) {
329 print_error(err, "Failed set kernel argument 2.");
330 failed = 1;
331 goto cleanup;
332 }
333
334 if ((err = clSetKernelArg(kernel, 3, sizeof(cl_sampler), (const void *)&sampler)) != CL_SUCCESS) {
335 print_error(err, "Failed set kernel argument 3.");
336 failed = 1;
337 goto cleanup;
338 }
339
340 if ((err = clSetKernelArg(kernel, 4, sizeof(cl_uint), (const void *)&x)) != CL_SUCCESS) {
341 print_error(err, "Failed set kernel argument 4.");
342 failed = 1;
343 goto cleanup;
344 }
345
346 if ((err = clEnqueueNDRangeKernel(queues[i], kernel, 2, NULL, wgs, wls, 0, NULL, NULL)) != CL_SUCCESS) {
347 print_error(err, "Failed enqueuing the NDRange kernel.");
348 failed = 1;
349 goto cleanup;
350 }
351 }
352 // Verify the results as long as neither input is an undefined migration
353 const size_t origin[3] = {0, 0, 0};
354 const size_t region[3] = {IMAGE_DIM, IMAGE_DIM, 1};
355
356 for (i=0; i<num_devices_limited; i++, test_number++) {
357 if (((flagsA[i] | flagsB[i]) & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) == 0) {
358 if ((err = clEnqueueReadImage(queues[i], imageC[i], CL_TRUE,
359 origin, region, 0, 0, C, 0, NULL, NULL)) != CL_SUCCESS) {
360 print_error(err, "Failed reading output buffer.");
361 failed = 1;
362 goto cleanup;
363 }
364 for (j=0; j<MEM_OBJ_SIZE; j++) {
365 cl_uint expected;
366
367 expected = A[j] ^ B[j] ^ test_number;
368 if (C[j] != expected) {
369 log_error("Failed on device %d, work item %4d, expected 0x%08x got 0x%08x (0x%08x ^ 0x%08x ^ 0x%08x)\n", i, j, expected, C[j], A[j], B[j], test_number);
370 failed = 1;
371 }
372 }
373 if (failed) goto cleanup;
374 }
375 }
376
377 if (restoreImage(queues, imageB, num_devices_limited, flagsB, B) != CL_SUCCESS) {
378 failed = 1;
379 goto cleanup;
380 }
381 }
382 }
383 if (restoreImage(queues, imageA, num_devices_limited, flagsA, A) != CL_SUCCESS) {
384 failed = 1;
385 goto cleanup;
386 }
387 }
388
389 cleanup:
390 // Clean up all the allocted resources create by the test. This includes sub-devices,
391 // command queues, and memory buffers.
392
393 for (i=0; i<max_sub_devices; i++) {
394 // Memory buffer cleanup
395 if (imageA[i]) {
396 if ((err = clReleaseMemObject(imageA[i])) != CL_SUCCESS) {
397 print_error(err, "Failed releasing memory object.");
398 failed = 1;
399 }
400 }
401 if (imageB[i]) {
402 if ((err = clReleaseMemObject(imageB[i])) != CL_SUCCESS) {
403 print_error(err, "Failed releasing memory object.");
404 failed = 1;
405 }
406 }
407 if (imageC[i]) {
408 if ((err = clReleaseMemObject(imageC[i])) != CL_SUCCESS) {
409 print_error(err, "Failed releasing memory object.");
410 failed = 1;
411 }
412 }
413
414 if (num_devices > 1) {
415 // Command queue cleanup
416 if (queues[i]) {
417 if ((err = clReleaseCommandQueue(queues[i])) != CL_SUCCESS) {
418 print_error(err, "Failed releasing command queue.");
419 failed = 1;
420 }
421 }
422
423 // Sub-device cleanup
424 if (devices[i]) {
425 if ((err = clReleaseDevice(devices[i])) != CL_SUCCESS) {
426 print_error(err, "Failed releasing sub device.");
427 failed = 1;
428 }
429 }
430 devices[i] = 0;
431 }
432 }
433
434 // Sampler cleanup
435 if (sampler) {
436 if ((err = clReleaseSampler(sampler)) != CL_SUCCESS) {
437 print_error(err, "Failed releasing sampler.");
438 failed = 1;
439 }
440 sampler = NULL;
441 }
442
443 // Context, program, and kernel cleanup
444 if (program) {
445 if ((err = clReleaseProgram(program)) != CL_SUCCESS) {
446 print_error(err, "Failed releasing program.");
447 failed = 1;
448 }
449 program = NULL;
450 }
451
452 if (kernel) {
453 if ((err = clReleaseKernel(kernel)) != CL_SUCCESS) {
454 print_error(err, "Failed releasing kernel.");
455 failed = 1;
456 }
457 kernel = NULL;
458 }
459
460 if (ctx && (ctx != context)) {
461 if ((err = clReleaseContext(ctx)) != CL_SUCCESS) {
462 print_error(err, "Failed releasing context.");
463 failed = 1;
464 }
465 }
466 ctx = NULL;
467
468 if (failed) goto cleanup_allocations;
469 } while (domains);
470
471 cleanup_allocations:
472 if (devices) free(devices);
473 if (queues) free(queues);
474 if (flagsA) free(flagsA);
475 if (flagsB) free(flagsB);
476 if (flagsC) free(flagsC);
477 if (imageA) free(imageA);
478 if (imageB) free(imageB);
479 if (imageC) free(imageC);
480
481 return ((failed) ? -1 : 0);
482 }
483