1 //
2 // Copyright (c) 2022 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include <vulkan_interop_common.hpp>
18 #include <string>
19 #include "harness/errorHelpers.h"
20 #include <algorithm>
21
22 #define MAX_2D_IMAGES 5
23 #define MAX_2D_IMAGE_WIDTH 1024
24 #define MAX_2D_IMAGE_HEIGHT 1024
25 #define MAX_2D_IMAGE_ELEMENT_SIZE 16
26 #define MAX_2D_IMAGE_MIP_LEVELS 11
27 #define MAX_2D_IMAGE_DESCRIPTORS MAX_2D_IMAGES *MAX_2D_IMAGE_MIP_LEVELS
28 #define NUM_THREADS_PER_GROUP_X 32
29 #define NUM_THREADS_PER_GROUP_Y 32
30 #define NUM_BLOCKS(size, blockSize) \
31 (ROUND_UP((size), (blockSize)) / (blockSize))
32
33 #define ASSERT(x) \
34 if (!(x)) \
35 { \
36 fprintf(stderr, "Assertion \"%s\" failed at %s:%d\n", #x, __FILE__, \
37 __LINE__); \
38 exit(1); \
39 }
40
41 #define ASSERT_LEQ(x, y) \
42 if (x > y) \
43 { \
44 ASSERT(0); \
45 }
46
47 namespace {
48 struct Params
49 {
50 uint32_t numImage2DDescriptors;
51 };
52 }
53 static cl_uchar uuid[CL_UUID_SIZE_KHR];
54 static cl_device_id deviceId = NULL;
55 size_t max_width = MAX_2D_IMAGE_WIDTH;
56 size_t max_height = MAX_2D_IMAGE_HEIGHT;
57
58 const char *kernel_text_numImage_1 = " \
59 __constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
60 __kernel void image2DKernel(read_only image2d_t InputImage, write_only image2d_t OutImage, int num2DImages, int baseWidth, int baseHeight, int numMipLevels)\n\
61 {\n\
62 int threadIdxX = get_global_id(0);\n\
63 int threadIdxY = get_global_id(1);\n\
64 int numThreadsX = get_global_size(0); \n\
65 int numThreadsY = get_global_size(1);\n\
66 if (threadIdxX >= baseWidth || threadIdxY >= baseHeight)\n\
67 {\n\
68 return;\n\
69 }\n\
70 %s dataA = read_image%s(InputImage, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
71 %s dataB = read_image%s(InputImage, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
72 write_image%s(OutImage, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
73 write_image%s(OutImage, (int2)( threadIdxX, threadIdxY), dataB);\n\
74 \n\
75 }";
76
77 const char *kernel_text_numImage_2 = " \
78 __constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
79 __kernel void image2DKernel(read_only image2d_t InputImage_1, write_only image2d_t OutImage_1, read_only image2d_t InputImage_2,write_only image2d_t OutImage_2,int num2DImages, int baseWidth, int baseHeight, int numMipLevels) \n\
80 {\n\
81 int threadIdxX = get_global_id(0);\n\
82 int threadIdxY = get_global_id(1);\n\
83 int numThreadsX = get_global_size(0);\n\
84 int numThreadsY = get_global_size(1);\n\
85 if (threadIdxX >= baseWidth || threadIdxY >= baseHeight) \n\
86 {\n\
87 return;\n\
88 }\n\
89 %s dataA = read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
90 %s dataB = read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
91 %s dataC = read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
92 %s dataD = read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
93 write_image%s(OutImage_1, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
94 write_image%s(OutImage_1, (int2)(threadIdxX, threadIdxY), dataB);\n\
95 write_image%s(OutImage_2, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataC);\n\
96 write_image%s(OutImage_2, (int2)(threadIdxX, threadIdxY), dataD);\n\
97 \n\
98 }";
99
100 const char *kernel_text_numImage_4 = " \
101 __constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
102 __kernel void image2DKernel(read_only image2d_t InputImage_1, write_only image2d_t OutImage_1, read_only image2d_t InputImage_2, write_only image2d_t OutImage_2, read_only image2d_t InputImage_3, write_only image2d_t OutImage_3, read_only image2d_t InputImage_4, write_only image2d_t OutImage_4, int num2DImages, int baseWidth, int baseHeight, int numMipLevels) \n\
103 {\n\
104 int threadIdxX = get_global_id(0);\n\
105 int threadIdxY = get_global_id(1);\n\
106 int numThreadsX = get_global_size(0);\n\
107 int numThreadsY = get_global_size(1);\n\
108 if (threadIdxX >= baseWidth || threadIdxY >= baseHeight) \n\
109 {\n\
110 return;\n\
111 }\n\
112 %s dataA = read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
113 %s dataB = read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
114 %s dataC = read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
115 %s dataD = read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
116 %s dataE = read_image%s(InputImage_3, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
117 %s dataF = read_image%s(InputImage_3, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
118 %s dataG = read_image%s(InputImage_4, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
119 %s dataH = read_image%s(InputImage_4, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
120 write_image%s(OutImage_1, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
121 write_image%s(OutImage_1, (int2)(threadIdxX, threadIdxY), dataB);\n\
122 write_image%s(OutImage_2, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataC);\n\
123 write_image%s(OutImage_2, (int2)(threadIdxX, threadIdxY), dataD);\n\
124 write_image%s(OutImage_3, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataE);\n\
125 write_image%s(OutImage_3, (int2)(threadIdxX, threadIdxY), dataF);\n\
126 write_image%s(OutImage_4, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataG);\n\
127 write_image%s(OutImage_4, (int2)(threadIdxX, threadIdxY), dataH);\n\
128 \n\
129 }";
130
131 const uint32_t num2DImagesList[] = { 1, 2, 4 };
132 const uint32_t widthList[] = { 4, 64, 183, 1024 };
133 const uint32_t heightList[] = { 4, 64, 365 };
134
getKernelType(VulkanFormat format,cl_kernel kernel_float,cl_kernel kernel_signed,cl_kernel kernel_unsigned)135 const cl_kernel getKernelType(VulkanFormat format, cl_kernel kernel_float,
136 cl_kernel kernel_signed,
137 cl_kernel kernel_unsigned)
138 {
139 cl_kernel kernel;
140 switch (format)
141 {
142 case VULKAN_FORMAT_R32G32B32A32_SFLOAT: kernel = kernel_float; break;
143
144 case VULKAN_FORMAT_R32G32B32A32_UINT: kernel = kernel_unsigned; break;
145
146 case VULKAN_FORMAT_R32G32B32A32_SINT: kernel = kernel_signed; break;
147
148 case VULKAN_FORMAT_R16G16B16A16_UINT: kernel = kernel_unsigned; break;
149
150 case VULKAN_FORMAT_R16G16B16A16_SINT: kernel = kernel_signed; break;
151
152 case VULKAN_FORMAT_R8G8B8A8_UINT: kernel = kernel_unsigned; break;
153
154 case VULKAN_FORMAT_R8G8B8A8_SINT: kernel = kernel_signed; break;
155
156 case VULKAN_FORMAT_R32G32_SFLOAT: kernel = kernel_float; break;
157
158 case VULKAN_FORMAT_R32G32_UINT: kernel = kernel_unsigned; break;
159
160 case VULKAN_FORMAT_R32G32_SINT: kernel = kernel_signed; break;
161
162 case VULKAN_FORMAT_R16G16_UINT: kernel = kernel_unsigned; break;
163
164 case VULKAN_FORMAT_R16G16_SINT: kernel = kernel_signed; break;
165
166 case VULKAN_FORMAT_R8G8_UINT: kernel = kernel_unsigned; break;
167
168 case VULKAN_FORMAT_R8G8_SINT: kernel = kernel_signed; break;
169
170 case VULKAN_FORMAT_R32_SFLOAT: kernel = kernel_float; break;
171
172 case VULKAN_FORMAT_R32_UINT: kernel = kernel_unsigned; break;
173
174 case VULKAN_FORMAT_R32_SINT: kernel = kernel_signed; break;
175
176 case VULKAN_FORMAT_R16_UINT: kernel = kernel_unsigned; break;
177
178 case VULKAN_FORMAT_R16_SINT: kernel = kernel_signed; break;
179
180 case VULKAN_FORMAT_R8_UINT: kernel = kernel_unsigned; break;
181
182 case VULKAN_FORMAT_R8_SINT: kernel = kernel_signed; break;
183
184 default:
185 log_error(" Unsupported format");
186 ASSERT(0);
187 break;
188 }
189 return kernel;
190 }
191
run_test_with_two_queue(cl_context & context,cl_command_queue & cmd_queue1,cl_command_queue & cmd_queue2,cl_kernel * kernel_unsigned,cl_kernel * kernel_signed,cl_kernel * kernel_float,VulkanDevice & vkDevice)192 int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
193 cl_command_queue &cmd_queue2,
194 cl_kernel *kernel_unsigned,
195 cl_kernel *kernel_signed, cl_kernel *kernel_float,
196 VulkanDevice &vkDevice)
197 {
198 cl_int err = CL_SUCCESS;
199 size_t origin[3] = { 0, 0, 0 };
200 size_t region[3] = { 1, 1, 1 };
201
202 cl_kernel updateKernelCQ1, updateKernelCQ2;
203 std::vector<VulkanFormat> vkFormatList = getSupportedVulkanFormatList();
204 const std::vector<VulkanExternalMemoryHandleType>
205 vkExternalMemoryHandleTypeList =
206 getSupportedVulkanExternalMemoryHandleTypeList();
207 char magicValue = 0;
208
209 VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
210 VulkanDeviceMemory vkParamsDeviceMemory(
211 vkDevice, vkParamsBuffer.getSize(),
212 getVulkanMemoryType(vkDevice,
213 VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
214 vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
215
216 uint64_t maxImage2DSize =
217 max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
218 VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
219 VulkanDeviceMemory vkSrcBufferDeviceMemory(
220 vkDevice, vkSrcBuffer.getSize(),
221 getVulkanMemoryType(vkDevice,
222 VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
223 vkSrcBufferDeviceMemory.bindBuffer(vkSrcBuffer);
224
225 char *srcBufferPtr, *dstBufferPtr;
226 srcBufferPtr = (char *)malloc(maxImage2DSize);
227 dstBufferPtr = (char *)malloc(maxImage2DSize);
228
229 VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
230 vkDescriptorSetLayoutBindingList.addBinding(
231 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
232 vkDescriptorSetLayoutBindingList.addBinding(
233 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS);
234 VulkanDescriptorSetLayout vkDescriptorSetLayout(
235 vkDevice, vkDescriptorSetLayoutBindingList);
236 VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
237
238 VulkanDescriptorPool vkDescriptorPool(vkDevice,
239 vkDescriptorSetLayoutBindingList);
240 VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
241 vkDescriptorSetLayout);
242
243 VulkanCommandPool vkCommandPool(vkDevice);
244 VulkanCommandBuffer vkCopyCommandBuffer(vkDevice, vkCommandPool);
245 VulkanCommandBuffer vkShaderCommandBuffer(vkDevice, vkCommandPool);
246 VulkanQueue &vkQueue = vkDevice.getQueue();
247
248 VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
249 getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
250 VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
251 VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
252 clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
253 clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
254
255 clVk2CLExternalSemaphore = new clExternalSemaphore(
256 vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
257 clCl2VkExternalSemaphore = new clExternalSemaphore(
258 vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
259
260 std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory1;
261 std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory2;
262 std::vector<clExternalMemoryImage *> externalMemory1;
263 std::vector<clExternalMemoryImage *> externalMemory2;
264 std::vector<char> vkImage2DShader;
265
266 for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
267 {
268 VulkanFormat vkFormat = vkFormatList[fIdx];
269 log_info("Format: %d\n", vkFormat);
270 uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
271 ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
272 log_info("elementSize= %d\n", elementSize);
273
274 std::string fileName = "image2D_"
275 + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv";
276 log_info("Load %s file", fileName.c_str());
277 vkImage2DShader = readFile(fileName);
278 VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader);
279
280 VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
281 vkImage2DShaderModule);
282
283 for (size_t wIdx = 0; wIdx < ARRAY_SIZE(widthList); wIdx++)
284 {
285 uint32_t width = widthList[wIdx];
286 log_info("Width: %d\n", width);
287 if (width > max_width) continue;
288 region[0] = width;
289 for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
290 {
291 uint32_t height = heightList[hIdx];
292 log_info("Height: %d", height);
293 if (height > max_height) continue;
294 region[1] = height;
295
296 uint32_t numMipLevels = 1;
297 log_info("Number of mipmap levels: %d\n", numMipLevels);
298
299 magicValue++;
300 char *vkSrcBufferDeviceMemoryPtr =
301 (char *)vkSrcBufferDeviceMemory.map();
302 uint64_t srcBufSize = 0;
303 memset(vkSrcBufferDeviceMemoryPtr, 0, maxImage2DSize);
304 memset(srcBufferPtr, 0, maxImage2DSize);
305 uint32_t mipLevel = 0;
306 for (uint32_t row = 0;
307 row < std::max(height >> mipLevel, uint32_t(1)); row++)
308 {
309 for (uint32_t col = 0;
310 col < std::max(width >> mipLevel, uint32_t(1)); col++)
311 {
312 for (uint32_t elementByte = 0;
313 elementByte < elementSize; elementByte++)
314 {
315 vkSrcBufferDeviceMemoryPtr[srcBufSize] =
316 (char)(magicValue + mipLevel + row + col);
317 srcBufferPtr[srcBufSize] =
318 (char)(magicValue + mipLevel + row + col);
319 srcBufSize++;
320 }
321 }
322 }
323 srcBufSize = ROUND_UP(
324 srcBufSize,
325 std::max(
326 elementSize,
327 (uint32_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT));
328 vkSrcBufferDeviceMemory.unmap();
329
330 for (size_t niIdx = 0; niIdx < ARRAY_SIZE(num2DImagesList);
331 niIdx++)
332 {
333 uint32_t num2DImages = num2DImagesList[niIdx] + 1;
334 // added one image for cross-cq case for updateKernelCQ2
335 log_info("Number of images: %d\n", num2DImages);
336 ASSERT_LEQ(num2DImages, (uint32_t)MAX_2D_IMAGES);
337 uint32_t num_2D_image;
338 if (useSingleImageKernel)
339 {
340 num_2D_image = 1;
341 }
342 else
343 {
344 num_2D_image = num2DImages;
345 }
346 Params *params = (Params *)vkParamsDeviceMemory.map();
347 params->numImage2DDescriptors = num_2D_image * numMipLevels;
348 vkParamsDeviceMemory.unmap();
349 vkDescriptorSet.update(0, vkParamsBuffer);
350 for (size_t emhtIdx = 0;
351 emhtIdx < vkExternalMemoryHandleTypeList.size();
352 emhtIdx++)
353 {
354 VulkanExternalMemoryHandleType
355 vkExternalMemoryHandleType =
356 vkExternalMemoryHandleTypeList[emhtIdx];
357 if ((true == disableNTHandleType)
358 && (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
359 == vkExternalMemoryHandleType))
360 {
361 // Skip running for WIN32 NT handle.
362 continue;
363 }
364 log_info("External memory handle type: %d \n",
365 vkExternalMemoryHandleType);
366 VulkanImageTiling vulkanImageTiling =
367 vkClExternalMemoryHandleTilingAssumption(
368 deviceId,
369 vkExternalMemoryHandleTypeList[emhtIdx], &err);
370 ASSERT_SUCCESS(err,
371 "Failed to query OpenCL tiling mode");
372
373 VulkanImage2D vkDummyImage2D(
374 vkDevice, vkFormatList[0], widthList[0],
375 heightList[0], vulkanImageTiling, 1,
376 vkExternalMemoryHandleType);
377 const VulkanMemoryTypeList &memoryTypeList =
378 vkDummyImage2D.getMemoryTypeList();
379
380 for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
381 mtIdx++)
382 {
383 const VulkanMemoryType &memoryType =
384 memoryTypeList[mtIdx];
385 log_info("Memory type index: %d\n",
386 (uint32_t)memoryType);
387 log_info("Memory type property: %d\n",
388 memoryType.getMemoryTypeProperty());
389 if (!useDeviceLocal)
390 {
391 if (VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL
392 == memoryType.getMemoryTypeProperty())
393 {
394 continue;
395 }
396 }
397
398 size_t totalImageMemSize = 0;
399 uint64_t interImageOffset = 0;
400 {
401 VulkanImage2D vkImage2D(
402 vkDevice, vkFormat, width, height,
403 vulkanImageTiling, numMipLevels,
404 vkExternalMemoryHandleType);
405 ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize);
406 totalImageMemSize =
407 ROUND_UP(vkImage2D.getSize(),
408 vkImage2D.getAlignment());
409 }
410 VulkanImage2DList vkImage2DList(
411 num2DImages, vkDevice, vkFormat, width, height,
412 vulkanImageTiling, numMipLevels,
413 vkExternalMemoryHandleType);
414 for (size_t bIdx = 0; bIdx < num2DImages; bIdx++)
415 {
416 vkImage2DListDeviceMemory1.push_back(
417 new VulkanDeviceMemory(
418 vkDevice, vkImage2DList[bIdx],
419 memoryType,
420 vkExternalMemoryHandleType));
421 vkImage2DListDeviceMemory1[bIdx]->bindImage(
422 vkImage2DList[bIdx], 0);
423 externalMemory1.push_back(
424 new clExternalMemoryImage(
425 *vkImage2DListDeviceMemory1[bIdx],
426 vkExternalMemoryHandleType, context,
427 totalImageMemSize, width, height, 0,
428 vkImage2DList[bIdx], deviceId));
429 }
430 VulkanImageViewList vkImage2DViewList(
431 vkDevice, vkImage2DList);
432 VulkanImage2DList vkImage2DList2(
433 num2DImages, vkDevice, vkFormat, width, height,
434 vulkanImageTiling, numMipLevels,
435 vkExternalMemoryHandleType);
436 for (size_t bIdx = 0; bIdx < num2DImages; bIdx++)
437 {
438 vkImage2DListDeviceMemory2.push_back(
439 new VulkanDeviceMemory(
440 vkDevice, vkImage2DList2[bIdx],
441 memoryType,
442 vkExternalMemoryHandleType));
443 vkImage2DListDeviceMemory2[bIdx]->bindImage(
444 vkImage2DList2[bIdx], 0);
445 externalMemory2.push_back(
446 new clExternalMemoryImage(
447 *vkImage2DListDeviceMemory2[bIdx],
448 vkExternalMemoryHandleType, context,
449 totalImageMemSize, width, height, 0,
450 vkImage2DList2[bIdx], deviceId));
451 }
452
453 cl_mem external_mem_image1[5];
454 cl_mem external_mem_image2[5];
455 for (int i = 0; i < num2DImages; i++)
456 {
457 external_mem_image1[i] =
458 externalMemory1[i]
459 ->getExternalMemoryImage();
460 external_mem_image2[i] =
461 externalMemory2[i]
462 ->getExternalMemoryImage();
463 }
464
465 clCl2VkExternalSemaphore->signal(cmd_queue1);
466 if (!useSingleImageKernel)
467 {
468 vkDescriptorSet.updateArray(1,
469 vkImage2DViewList);
470 vkCopyCommandBuffer.begin();
471 vkCopyCommandBuffer.pipelineBarrier(
472 vkImage2DList,
473 VULKAN_IMAGE_LAYOUT_UNDEFINED,
474 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
475 for (size_t i2DIdx = 0;
476 i2DIdx < vkImage2DList.size(); i2DIdx++)
477 {
478 vkCopyCommandBuffer.copyBufferToImage(
479 vkSrcBuffer, vkImage2DList[i2DIdx],
480 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
481 }
482 vkCopyCommandBuffer.pipelineBarrier(
483 vkImage2DList,
484 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
485 VULKAN_IMAGE_LAYOUT_GENERAL);
486 vkCopyCommandBuffer.end();
487 memset(dstBufferPtr, 0, srcBufSize);
488 vkQueue.submit(vkCopyCommandBuffer);
489 vkShaderCommandBuffer.begin();
490 vkShaderCommandBuffer.bindPipeline(
491 vkComputePipeline);
492 vkShaderCommandBuffer.bindDescriptorSets(
493 vkComputePipeline, vkPipelineLayout,
494 vkDescriptorSet);
495 vkShaderCommandBuffer.dispatch(
496 NUM_BLOCKS(width, NUM_THREADS_PER_GROUP_X),
497 NUM_BLOCKS(height,
498 NUM_THREADS_PER_GROUP_Y / 2),
499 1);
500 vkShaderCommandBuffer.end();
501 }
502 for (uint32_t iter = 0; iter < innerIterations;
503 iter++)
504 {
505 if (useSingleImageKernel)
506 {
507 for (size_t i2DIdx = 0;
508 i2DIdx < vkImage2DList.size();
509 i2DIdx++)
510 {
511 vkDescriptorSet.update(
512 1, vkImage2DViewList[i2DIdx]);
513 vkCopyCommandBuffer.begin();
514 vkCopyCommandBuffer.pipelineBarrier(
515 vkImage2DList,
516 VULKAN_IMAGE_LAYOUT_UNDEFINED,
517 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
518
519 vkCopyCommandBuffer.copyBufferToImage(
520 vkSrcBuffer, vkImage2DList[i2DIdx],
521 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
522 vkCopyCommandBuffer.pipelineBarrier(
523 vkImage2DList,
524 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
525 VULKAN_IMAGE_LAYOUT_GENERAL);
526 vkCopyCommandBuffer.end();
527 memset(dstBufferPtr, 0, srcBufSize);
528 vkQueue.submit(vkCopyCommandBuffer);
529 vkShaderCommandBuffer.begin();
530 vkShaderCommandBuffer.bindPipeline(
531 vkComputePipeline);
532 vkShaderCommandBuffer
533 .bindDescriptorSets(
534 vkComputePipeline,
535 vkPipelineLayout,
536 vkDescriptorSet);
537 vkShaderCommandBuffer.dispatch(
538 NUM_BLOCKS(width,
539 NUM_THREADS_PER_GROUP_X),
540 NUM_BLOCKS(height,
541 NUM_THREADS_PER_GROUP_Y
542 / 2),
543 1);
544 vkShaderCommandBuffer.end();
545 if (i2DIdx < vkImage2DList.size() - 1)
546 {
547 vkQueue.submit(
548 vkShaderCommandBuffer);
549 }
550 }
551 }
552 vkQueue.submit(vkCl2VkSemaphore,
553 vkShaderCommandBuffer,
554 vkVk2CLSemaphore);
555 clVk2CLExternalSemaphore->wait(cmd_queue1);
556 switch (num2DImages)
557 {
558 case 2:
559 updateKernelCQ1 = getKernelType(
560 vkFormat, kernel_float[0],
561 kernel_signed[0],
562 kernel_unsigned[0]);
563 break;
564 case 3:
565 updateKernelCQ1 = getKernelType(
566 vkFormat, kernel_float[1],
567 kernel_signed[1],
568 kernel_unsigned[1]);
569 break;
570 case 5:
571 updateKernelCQ1 = getKernelType(
572 vkFormat, kernel_float[2],
573 kernel_signed[2],
574 kernel_unsigned[2]);
575 break;
576 }
577 updateKernelCQ2 = getKernelType(
578 vkFormat, kernel_float[3], kernel_signed[3],
579 kernel_unsigned[3]);
580 // similar kernel-type based on vkFormat
581 int j = 0;
582 // Setting arguments of updateKernelCQ2
583
584 err = clSetKernelArg(updateKernelCQ2, 0,
585 sizeof(cl_mem),
586 &external_mem_image1[0]);
587 err |= clSetKernelArg(updateKernelCQ2, 1,
588 sizeof(cl_mem),
589 &external_mem_image2[0]);
590 err |= clSetKernelArg(
591 updateKernelCQ2, 2, sizeof(cl_mem),
592 &external_mem_image1[num2DImages - 1]);
593 err |= clSetKernelArg(
594 updateKernelCQ2, 3, sizeof(cl_mem),
595 &external_mem_image2[num2DImages - 1]);
596 err |= clSetKernelArg(updateKernelCQ2, 4,
597 sizeof(unsigned int),
598 &num2DImages);
599 err |= clSetKernelArg(updateKernelCQ2, 5,
600 sizeof(unsigned int),
601 &width);
602 err |= clSetKernelArg(updateKernelCQ2, 6,
603 sizeof(unsigned int),
604 &height);
605 err |= clSetKernelArg(updateKernelCQ2, 7,
606 sizeof(unsigned int),
607 &numMipLevels);
608 for (int i = 0; i < num2DImages - 1; i++, ++j)
609 {
610 err = clSetKernelArg(
611 updateKernelCQ1, j, sizeof(cl_mem),
612 &external_mem_image1[i]);
613 err |= clSetKernelArg(
614 updateKernelCQ1, ++j, sizeof(cl_mem),
615 &external_mem_image2[i]);
616 }
617 err |= clSetKernelArg(updateKernelCQ1, j,
618 sizeof(unsigned int),
619 &num2DImages);
620 err |= clSetKernelArg(updateKernelCQ1, ++j,
621 sizeof(unsigned int),
622 &width);
623 err |= clSetKernelArg(updateKernelCQ1, ++j,
624 sizeof(unsigned int),
625 &height);
626 err |= clSetKernelArg(updateKernelCQ1, ++j,
627 sizeof(unsigned int),
628 &numMipLevels);
629
630 if (err != CL_SUCCESS)
631 {
632 print_error(
633 err,
634 "Error: Failed to set arg values \n");
635 goto CLEANUP;
636 }
637 // clVk2CLExternalSemaphore->wait(cmd_queue1);
638 size_t global_work_size[3] = { width, height,
639 1 };
640 cl_event first_launch;
641 err = clEnqueueNDRangeKernel(
642 cmd_queue1, updateKernelCQ1, 2, NULL,
643 global_work_size, NULL, 0, NULL,
644 &first_launch);
645 if (err != CL_SUCCESS)
646 {
647 goto CLEANUP;
648 }
649 err = clEnqueueNDRangeKernel(
650 cmd_queue2, updateKernelCQ2, 2, NULL,
651 global_work_size, NULL, 1, &first_launch,
652 NULL);
653 if (err != CL_SUCCESS)
654 {
655 goto CLEANUP;
656 }
657
658 clFinish(cmd_queue2);
659 clCl2VkExternalSemaphore->signal(cmd_queue2);
660 }
661
662 unsigned int flags = 0;
663 size_t mipmapLevelOffset = 0;
664 cl_event eventReadImage = NULL;
665 clFinish(cmd_queue2);
666 for (int i = 0; i < num2DImages; i++)
667 {
668 err = clEnqueueReadImage(
669 cmd_queue1, external_mem_image2[i], CL_TRUE,
670 origin, region, 0, 0, dstBufferPtr, 0, NULL,
671 &eventReadImage);
672
673 if (err != CL_SUCCESS)
674 {
675 print_error(err,
676 "clEnqueueReadImage failed with"
677 "error\n");
678 }
679
680 if (memcmp(srcBufferPtr, dstBufferPtr,
681 srcBufSize))
682 {
683 log_info("Source and destination buffers "
684 "don't match\n");
685 if (debug_trace)
686 {
687 log_info("Source buffer contents: \n");
688 for (uint64_t sIdx = 0;
689 sIdx < srcBufSize; sIdx++)
690 {
691 log_info(
692 "%d ",
693 (int)vkSrcBufferDeviceMemoryPtr
694 [sIdx]);
695 }
696 log_info("Destination buffer contents:"
697 "\n");
698 for (uint64_t dIdx = 0;
699 dIdx < srcBufSize; dIdx++)
700 {
701 log_info("%d ",
702 (int)dstBufferPtr[dIdx]);
703 }
704 }
705 err = -1;
706 break;
707 }
708 }
709 for (int i = 0; i < num2DImages; i++)
710 {
711 delete vkImage2DListDeviceMemory1[i];
712 delete vkImage2DListDeviceMemory2[i];
713 delete externalMemory1[i];
714 delete externalMemory2[i];
715 }
716 vkImage2DListDeviceMemory1.erase(
717 vkImage2DListDeviceMemory1.begin(),
718 vkImage2DListDeviceMemory1.begin()
719 + num2DImages);
720 vkImage2DListDeviceMemory2.erase(
721 vkImage2DListDeviceMemory2.begin(),
722 vkImage2DListDeviceMemory2.begin()
723 + num2DImages);
724 externalMemory1.erase(externalMemory1.begin(),
725 externalMemory1.begin()
726 + num2DImages);
727 externalMemory2.erase(externalMemory2.begin(),
728 externalMemory2.begin()
729 + num2DImages);
730 if (CL_SUCCESS != err)
731 {
732 goto CLEANUP;
733 }
734 }
735 }
736 }
737 }
738 }
739
740 vkImage2DShader.clear();
741 }
742 CLEANUP:
743 if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
744 if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
745
746 if (srcBufferPtr) free(srcBufferPtr);
747 if (dstBufferPtr) free(dstBufferPtr);
748 return err;
749 }
750
run_test_with_one_queue(cl_context & context,cl_command_queue & cmd_queue1,cl_kernel * kernel_unsigned,cl_kernel * kernel_signed,cl_kernel * kernel_float,VulkanDevice & vkDevice)751 int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
752 cl_kernel *kernel_unsigned,
753 cl_kernel *kernel_signed, cl_kernel *kernel_float,
754 VulkanDevice &vkDevice)
755 {
756 cl_int err = CL_SUCCESS;
757 size_t origin[3] = { 0, 0, 0 };
758 size_t region[3] = { 1, 1, 1 };
759 cl_kernel updateKernelCQ1;
760 std::vector<VulkanFormat> vkFormatList = getSupportedVulkanFormatList();
761 const std::vector<VulkanExternalMemoryHandleType>
762 vkExternalMemoryHandleTypeList =
763 getSupportedVulkanExternalMemoryHandleTypeList();
764 char magicValue = 0;
765
766 VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
767 VulkanDeviceMemory vkParamsDeviceMemory(
768 vkDevice, vkParamsBuffer.getSize(),
769 getVulkanMemoryType(vkDevice,
770 VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
771 vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
772
773 uint64_t maxImage2DSize =
774 max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
775 VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
776 VulkanDeviceMemory vkSrcBufferDeviceMemory(
777 vkDevice, vkSrcBuffer.getSize(),
778 getVulkanMemoryType(vkDevice,
779 VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
780 vkSrcBufferDeviceMemory.bindBuffer(vkSrcBuffer);
781
782 char *srcBufferPtr, *dstBufferPtr;
783 srcBufferPtr = (char *)malloc(maxImage2DSize);
784 dstBufferPtr = (char *)malloc(maxImage2DSize);
785
786 VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
787 vkDescriptorSetLayoutBindingList.addBinding(
788 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
789 vkDescriptorSetLayoutBindingList.addBinding(
790 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS);
791 VulkanDescriptorSetLayout vkDescriptorSetLayout(
792 vkDevice, vkDescriptorSetLayoutBindingList);
793 VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
794
795 VulkanDescriptorPool vkDescriptorPool(vkDevice,
796 vkDescriptorSetLayoutBindingList);
797 VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
798 vkDescriptorSetLayout);
799
800 VulkanCommandPool vkCommandPool(vkDevice);
801 VulkanCommandBuffer vkCopyCommandBuffer(vkDevice, vkCommandPool);
802 VulkanCommandBuffer vkShaderCommandBuffer(vkDevice, vkCommandPool);
803 VulkanQueue &vkQueue = vkDevice.getQueue();
804
805 VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
806 getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
807 VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
808 VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
809 clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
810 clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
811
812 clVk2CLExternalSemaphore = new clExternalSemaphore(
813 vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
814 clCl2VkExternalSemaphore = new clExternalSemaphore(
815 vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
816
817 std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory1;
818 std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory2;
819 std::vector<clExternalMemoryImage *> externalMemory1;
820 std::vector<clExternalMemoryImage *> externalMemory2;
821 std::vector<char> vkImage2DShader;
822
823 for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
824 {
825 VulkanFormat vkFormat = vkFormatList[fIdx];
826 log_info("Format: %d\n", vkFormat);
827 uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
828 ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
829 log_info("elementSize= %d\n", elementSize);
830
831 std::string fileName = "image2D_"
832 + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv";
833 log_info("Load %s file", fileName.c_str());
834 vkImage2DShader = readFile(fileName);
835 VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader);
836
837 VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
838 vkImage2DShaderModule);
839
840 for (size_t wIdx = 0; wIdx < ARRAY_SIZE(widthList); wIdx++)
841 {
842 uint32_t width = widthList[wIdx];
843 log_info("Width: %d\n", width);
844 if (width > max_width) continue;
845 region[0] = width;
846 for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
847 {
848 uint32_t height = heightList[hIdx];
849 log_info("Height: %d\n", height);
850 if (height > max_height) continue;
851 region[1] = height;
852
853 uint32_t numMipLevels = 1;
854 log_info("Number of mipmap levels: %d\n", numMipLevels);
855
856 magicValue++;
857 char *vkSrcBufferDeviceMemoryPtr =
858 (char *)vkSrcBufferDeviceMemory.map();
859 uint64_t srcBufSize = 0;
860 memset(vkSrcBufferDeviceMemoryPtr, 0, maxImage2DSize);
861 memset(srcBufferPtr, 0, maxImage2DSize);
862 uint32_t mipLevel = 0;
863 for (uint32_t row = 0;
864 row < std::max(height >> mipLevel, uint32_t(1)); row++)
865 {
866 for (uint32_t col = 0;
867 col < std::max(width >> mipLevel, uint32_t(1)); col++)
868 {
869 for (uint32_t elementByte = 0;
870 elementByte < elementSize; elementByte++)
871 {
872 vkSrcBufferDeviceMemoryPtr[srcBufSize] =
873 (char)(magicValue + mipLevel + row + col);
874 srcBufferPtr[srcBufSize] =
875 (char)(magicValue + mipLevel + row + col);
876 srcBufSize++;
877 }
878 }
879 }
880 srcBufSize = ROUND_UP(
881 srcBufSize,
882 std::max(
883 elementSize,
884 (uint32_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT));
885 vkSrcBufferDeviceMemory.unmap();
886
887 for (size_t niIdx = 0; niIdx < ARRAY_SIZE(num2DImagesList);
888 niIdx++)
889 {
890 uint32_t num2DImages = num2DImagesList[niIdx];
891 log_info("Number of images: %d\n", num2DImages);
892 ASSERT_LEQ(num2DImages, (uint32_t)MAX_2D_IMAGES);
893
894 Params *params = (Params *)vkParamsDeviceMemory.map();
895 uint32_t num_2D_image;
896 if (useSingleImageKernel)
897 {
898 num_2D_image = 1;
899 }
900 else
901 {
902 num_2D_image = num2DImages;
903 }
904 params->numImage2DDescriptors = num_2D_image * numMipLevels;
905 vkParamsDeviceMemory.unmap();
906 vkDescriptorSet.update(0, vkParamsBuffer);
907 for (size_t emhtIdx = 0;
908 emhtIdx < vkExternalMemoryHandleTypeList.size();
909 emhtIdx++)
910 {
911 VulkanExternalMemoryHandleType
912 vkExternalMemoryHandleType =
913 vkExternalMemoryHandleTypeList[emhtIdx];
914 log_info("External memory handle type: %d \n",
915 vkExternalMemoryHandleType);
916 if ((true == disableNTHandleType)
917 && (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
918 == vkExternalMemoryHandleType))
919 {
920 // Skip running for WIN32 NT handle.
921 continue;
922 }
923
924 VulkanImageTiling vulkanImageTiling =
925 vkClExternalMemoryHandleTilingAssumption(
926 deviceId,
927 vkExternalMemoryHandleTypeList[emhtIdx], &err);
928 ASSERT_SUCCESS(err,
929 "Failed to query OpenCL tiling mode");
930
931 VulkanImage2D vkDummyImage2D(
932 vkDevice, vkFormatList[0], widthList[0],
933 heightList[0], vulkanImageTiling, 1,
934 vkExternalMemoryHandleType);
935 const VulkanMemoryTypeList &memoryTypeList =
936 vkDummyImage2D.getMemoryTypeList();
937
938 for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
939 mtIdx++)
940 {
941 const VulkanMemoryType &memoryType =
942 memoryTypeList[mtIdx];
943 log_info("Memory type index: %d\n",
944 (uint32_t)memoryType);
945 log_info("Memory type property: %d\n",
946 memoryType.getMemoryTypeProperty());
947 if (!useDeviceLocal)
948 {
949 if (VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL
950 == memoryType.getMemoryTypeProperty())
951 {
952 continue;
953 }
954 }
955 size_t totalImageMemSize = 0;
956 uint64_t interImageOffset = 0;
957 {
958 VulkanImage2D vkImage2D(
959 vkDevice, vkFormat, width, height,
960 vulkanImageTiling, numMipLevels,
961 vkExternalMemoryHandleType);
962 ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize);
963 totalImageMemSize =
964 ROUND_UP(vkImage2D.getSize(),
965 vkImage2D.getAlignment());
966 }
967 VulkanImage2DList vkImage2DList(
968 num2DImages, vkDevice, vkFormat, width, height,
969 vulkanImageTiling, numMipLevels,
970 vkExternalMemoryHandleType);
971 for (size_t bIdx = 0; bIdx < vkImage2DList.size();
972 bIdx++)
973 {
974 // Create list of Vulkan device memories and
975 // bind the list of Vulkan images.
976 vkImage2DListDeviceMemory1.push_back(
977 new VulkanDeviceMemory(
978 vkDevice, vkImage2DList[bIdx],
979 memoryType,
980 vkExternalMemoryHandleType));
981 vkImage2DListDeviceMemory1[bIdx]->bindImage(
982 vkImage2DList[bIdx], 0);
983 externalMemory1.push_back(
984 new clExternalMemoryImage(
985 *vkImage2DListDeviceMemory1[bIdx],
986 vkExternalMemoryHandleType, context,
987 totalImageMemSize, width, height, 0,
988 vkImage2DList[bIdx], deviceId));
989 }
990 VulkanImageViewList vkImage2DViewList(
991 vkDevice, vkImage2DList);
992
993 VulkanImage2DList vkImage2DList2(
994 num2DImages, vkDevice, vkFormat, width, height,
995 vulkanImageTiling, numMipLevels,
996 vkExternalMemoryHandleType);
997 for (size_t bIdx = 0; bIdx < vkImage2DList2.size();
998 bIdx++)
999 {
1000 vkImage2DListDeviceMemory2.push_back(
1001 new VulkanDeviceMemory(
1002 vkDevice, vkImage2DList2[bIdx],
1003 memoryType,
1004 vkExternalMemoryHandleType));
1005 vkImage2DListDeviceMemory2[bIdx]->bindImage(
1006 vkImage2DList2[bIdx], 0);
1007 externalMemory2.push_back(
1008 new clExternalMemoryImage(
1009 *vkImage2DListDeviceMemory2[bIdx],
1010 vkExternalMemoryHandleType, context,
1011 totalImageMemSize, width, height, 0,
1012 vkImage2DList2[bIdx], deviceId));
1013 }
1014
1015 cl_mem external_mem_image1[4];
1016 cl_mem external_mem_image2[4];
1017 for (int i = 0; i < num2DImages; i++)
1018 {
1019 external_mem_image1[i] =
1020 externalMemory1[i]
1021 ->getExternalMemoryImage();
1022 external_mem_image2[i] =
1023 externalMemory2[i]
1024 ->getExternalMemoryImage();
1025 }
1026
1027 clCl2VkExternalSemaphore->signal(cmd_queue1);
1028 if (!useSingleImageKernel)
1029 {
1030 vkDescriptorSet.updateArray(1,
1031 vkImage2DViewList);
1032 vkCopyCommandBuffer.begin();
1033 vkCopyCommandBuffer.pipelineBarrier(
1034 vkImage2DList,
1035 VULKAN_IMAGE_LAYOUT_UNDEFINED,
1036 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1037 for (size_t i2DIdx = 0;
1038 i2DIdx < vkImage2DList.size(); i2DIdx++)
1039 {
1040 vkCopyCommandBuffer.copyBufferToImage(
1041 vkSrcBuffer, vkImage2DList[i2DIdx],
1042 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1043 }
1044 vkCopyCommandBuffer.pipelineBarrier(
1045 vkImage2DList,
1046 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1047 VULKAN_IMAGE_LAYOUT_GENERAL);
1048 vkCopyCommandBuffer.end();
1049 memset(dstBufferPtr, 0, srcBufSize);
1050 vkQueue.submit(vkCopyCommandBuffer);
1051 vkShaderCommandBuffer.begin();
1052 vkShaderCommandBuffer.bindPipeline(
1053 vkComputePipeline);
1054 vkShaderCommandBuffer.bindDescriptorSets(
1055 vkComputePipeline, vkPipelineLayout,
1056 vkDescriptorSet);
1057 vkShaderCommandBuffer.dispatch(
1058 NUM_BLOCKS(width, NUM_THREADS_PER_GROUP_X),
1059 NUM_BLOCKS(height,
1060 NUM_THREADS_PER_GROUP_Y / 2),
1061 1);
1062 vkShaderCommandBuffer.end();
1063 }
1064 for (uint32_t iter = 0; iter < innerIterations;
1065 iter++)
1066 {
1067 if (useSingleImageKernel)
1068 {
1069 for (size_t i2DIdx = 0;
1070 i2DIdx < vkImage2DList.size();
1071 i2DIdx++)
1072 {
1073 vkDescriptorSet.update(
1074 1, vkImage2DViewList[i2DIdx]);
1075 vkCopyCommandBuffer.begin();
1076 vkCopyCommandBuffer.pipelineBarrier(
1077 vkImage2DList,
1078 VULKAN_IMAGE_LAYOUT_UNDEFINED,
1079 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1080
1081 vkCopyCommandBuffer.copyBufferToImage(
1082 vkSrcBuffer, vkImage2DList[i2DIdx],
1083 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1084 vkCopyCommandBuffer.pipelineBarrier(
1085 vkImage2DList,
1086 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1087 VULKAN_IMAGE_LAYOUT_GENERAL);
1088 vkCopyCommandBuffer.end();
1089 memset(dstBufferPtr, 0, srcBufSize);
1090 vkQueue.submit(vkCopyCommandBuffer);
1091 vkShaderCommandBuffer.begin();
1092 vkShaderCommandBuffer.bindPipeline(
1093 vkComputePipeline);
1094 vkShaderCommandBuffer
1095 .bindDescriptorSets(
1096 vkComputePipeline,
1097 vkPipelineLayout,
1098 vkDescriptorSet);
1099 vkShaderCommandBuffer.dispatch(
1100 NUM_BLOCKS(width,
1101 NUM_THREADS_PER_GROUP_X),
1102 NUM_BLOCKS(height,
1103 NUM_THREADS_PER_GROUP_Y
1104 / 2),
1105 1);
1106 vkShaderCommandBuffer.end();
1107 if (i2DIdx < vkImage2DList.size() - 1)
1108 {
1109 vkQueue.submit(
1110 vkShaderCommandBuffer);
1111 }
1112 }
1113 }
1114 vkQueue.submit(vkCl2VkSemaphore,
1115 vkShaderCommandBuffer,
1116 vkVk2CLSemaphore);
1117 clVk2CLExternalSemaphore->wait(cmd_queue1);
1118 switch (num2DImages)
1119 {
1120 case 1:
1121 updateKernelCQ1 = getKernelType(
1122 vkFormat, kernel_float[0],
1123 kernel_signed[0],
1124 kernel_unsigned[0]);
1125 break;
1126 case 2:
1127 updateKernelCQ1 = getKernelType(
1128 vkFormat, kernel_float[1],
1129 kernel_signed[1],
1130 kernel_unsigned[1]);
1131 break;
1132 case 4:
1133 updateKernelCQ1 = getKernelType(
1134 vkFormat, kernel_float[2],
1135 kernel_signed[2],
1136 kernel_unsigned[2]);
1137 break;
1138 }
1139 int j = 0;
1140 for (int i = 0; i < num2DImages; i++, ++j)
1141 {
1142 err = clSetKernelArg(
1143 updateKernelCQ1, j, sizeof(cl_mem),
1144 &external_mem_image1[i]);
1145 err |= clSetKernelArg(
1146 updateKernelCQ1, ++j, sizeof(cl_mem),
1147 &external_mem_image2[i]);
1148 }
1149 err |= clSetKernelArg(updateKernelCQ1, j,
1150 sizeof(unsigned int),
1151 &num2DImages);
1152 err |= clSetKernelArg(updateKernelCQ1, ++j,
1153 sizeof(unsigned int),
1154 &width);
1155 err |= clSetKernelArg(updateKernelCQ1, ++j,
1156 sizeof(unsigned int),
1157 &height);
1158 err |= clSetKernelArg(updateKernelCQ1, ++j,
1159 sizeof(unsigned int),
1160 &numMipLevels);
1161
1162 if (err != CL_SUCCESS)
1163 {
1164 print_error(err,
1165 "Error: Failed to set arg "
1166 "values for kernel-1\n");
1167 goto CLEANUP;
1168 }
1169
1170 size_t global_work_size[3] = { width, height,
1171 1 };
1172 err = clEnqueueNDRangeKernel(
1173 cmd_queue1, updateKernelCQ1, 2, NULL,
1174 global_work_size, NULL, 0, NULL, NULL);
1175 if (err != CL_SUCCESS)
1176 {
1177 goto CLEANUP;
1178 }
1179 clCl2VkExternalSemaphore->signal(cmd_queue1);
1180 }
1181
1182 unsigned int flags = 0;
1183 size_t mipmapLevelOffset = 0;
1184 cl_event eventReadImage = NULL;
1185 for (int i = 0; i < num2DImages; i++)
1186 {
1187 err = clEnqueueReadImage(
1188 cmd_queue1, external_mem_image2[i], CL_TRUE,
1189 origin, region, 0, 0, dstBufferPtr, 0, NULL,
1190 &eventReadImage);
1191
1192 if (err != CL_SUCCESS)
1193 {
1194 print_error(err,
1195 "clEnqueueReadImage failed with"
1196 "error\n");
1197 }
1198
1199 if (memcmp(srcBufferPtr, dstBufferPtr,
1200 srcBufSize))
1201 {
1202 log_info("Source and destination buffers "
1203 "don't match\n");
1204 if (debug_trace)
1205 {
1206 log_info("Source buffer contents: \n");
1207 for (uint64_t sIdx = 0;
1208 sIdx < srcBufSize; sIdx++)
1209 {
1210 log_info(
1211 "%d",
1212 (int)vkSrcBufferDeviceMemoryPtr
1213 [sIdx]);
1214 }
1215 log_info(
1216 "Destination buffer contents:");
1217 for (uint64_t dIdx = 0;
1218 dIdx < srcBufSize; dIdx++)
1219 {
1220 log_info("%d",
1221 (int)dstBufferPtr[dIdx]);
1222 }
1223 }
1224 err = -1;
1225 break;
1226 }
1227 }
1228 for (int i = 0; i < num2DImages; i++)
1229 {
1230 delete vkImage2DListDeviceMemory1[i];
1231 delete vkImage2DListDeviceMemory2[i];
1232 delete externalMemory1[i];
1233 delete externalMemory2[i];
1234 }
1235 vkImage2DListDeviceMemory1.erase(
1236 vkImage2DListDeviceMemory1.begin(),
1237 vkImage2DListDeviceMemory1.begin()
1238 + num2DImages);
1239 vkImage2DListDeviceMemory2.erase(
1240 vkImage2DListDeviceMemory2.begin(),
1241 vkImage2DListDeviceMemory2.begin()
1242 + num2DImages);
1243 externalMemory1.erase(externalMemory1.begin(),
1244 externalMemory1.begin()
1245 + num2DImages);
1246 externalMemory2.erase(externalMemory2.begin(),
1247 externalMemory2.begin()
1248 + num2DImages);
1249 if (CL_SUCCESS != err)
1250 {
1251 goto CLEANUP;
1252 }
1253 }
1254 }
1255 }
1256 }
1257 }
1258 vkImage2DShader.clear();
1259 }
1260 CLEANUP:
1261 if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
1262 if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
1263
1264 if (srcBufferPtr) free(srcBufferPtr);
1265 if (dstBufferPtr) free(dstBufferPtr);
1266 return err;
1267 }
1268
test_image_common(cl_device_id device_,cl_context context_,cl_command_queue queue_,int numElements_)1269 int test_image_common(cl_device_id device_, cl_context context_,
1270 cl_command_queue queue_, int numElements_)
1271 {
1272 int current_device = 0;
1273 int device_count = 0;
1274 int devices_prohibited = 0;
1275 cl_int err = CL_SUCCESS;
1276 cl_platform_id platform = NULL;
1277 size_t extensionSize = 0;
1278 cl_uint num_devices = 0;
1279 cl_uint device_no = 0;
1280 cl_device_id *devices;
1281 char *extensions = NULL;
1282 const char *program_source_const;
1283 cl_command_queue cmd_queue1 = NULL;
1284 cl_command_queue cmd_queue2 = NULL;
1285 cl_context context = NULL;
1286 const uint32_t num_kernels = ARRAY_SIZE(num2DImagesList) + 1;
1287 // One kernel for Cross-CQ case
1288 const uint32_t num_kernel_types = 3;
1289 const char *kernel_source[num_kernels] = { kernel_text_numImage_1,
1290 kernel_text_numImage_2,
1291 kernel_text_numImage_4 };
1292 char source_1[4096];
1293 char source_2[4096];
1294 char source_3[4096];
1295 size_t program_source_length;
1296 cl_program program[num_kernel_types];
1297 cl_kernel kernel_float[num_kernels] = { NULL, NULL, NULL, NULL };
1298 cl_kernel kernel_signed[num_kernels] = { NULL, NULL, NULL, NULL };
1299 cl_kernel kernel_unsigned[num_kernels] = { NULL, NULL, NULL, NULL };
1300 cl_mem external_mem_image1;
1301 cl_mem external_mem_image2;
1302
1303 VulkanDevice vkDevice;
1304
1305 cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
1306 // get the platform ID
1307 err = clGetPlatformIDs(1, &platform, NULL);
1308 if (err != CL_SUCCESS)
1309 {
1310 print_error(err, "Error: Failed to get platform\n");
1311 goto CLEANUP;
1312 }
1313
1314 err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
1315 if (CL_SUCCESS != err)
1316 {
1317 print_error(err, "clGetDeviceIDs failed in returning no. of devices\n");
1318 goto CLEANUP;
1319 }
1320 devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
1321 if (NULL == devices)
1322 {
1323 err = CL_OUT_OF_HOST_MEMORY;
1324 print_error(err, "Unable to allocate memory for devices\n");
1325 goto CLEANUP;
1326 }
1327 err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices,
1328 NULL);
1329 if (CL_SUCCESS != err)
1330 {
1331 print_error(err, "Failed to get deviceID.\n");
1332 goto CLEANUP;
1333 }
1334 contextProperties[1] = (cl_context_properties)platform;
1335 log_info("Assigned contextproperties for platform\n");
1336 for (device_no = 0; device_no < num_devices; device_no++)
1337 {
1338 err = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0, NULL,
1339 &extensionSize);
1340 if (CL_SUCCESS != err)
1341 {
1342 print_error(
1343 err,
1344 "Error in clGetDeviceInfo for getting device_extension size\n");
1345 goto CLEANUP;
1346 }
1347 extensions = (char *)malloc(extensionSize);
1348 if (NULL == extensions)
1349 {
1350 err = CL_OUT_OF_HOST_MEMORY;
1351 print_error(err, "Unable to allocate memory for extensions\n");
1352 goto CLEANUP;
1353 }
1354 err = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS,
1355 extensionSize, extensions, NULL);
1356 if (CL_SUCCESS != err)
1357 {
1358 print_error(
1359 err, "Error in clGetDeviceInfo for getting device_extension\n");
1360 goto CLEANUP;
1361 }
1362 err = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR,
1363 CL_UUID_SIZE_KHR, uuid, &extensionSize);
1364 if (CL_SUCCESS != err)
1365 {
1366 print_error(err, "clGetDeviceInfo failed with error");
1367 goto CLEANUP;
1368 }
1369 err =
1370 memcmp(uuid, vkDevice.getPhysicalDevice().getUUID(), VK_UUID_SIZE);
1371 if (err == 0)
1372 {
1373 break;
1374 }
1375 }
1376 if (device_no >= num_devices)
1377 {
1378 err = EXIT_FAILURE;
1379 print_error(err,
1380 "OpenCL error:"
1381 "No Vulkan-OpenCL Interop capable GPU found.\n");
1382 goto CLEANUP;
1383 }
1384 deviceId = devices[device_no];
1385 err = setMaxImageDimensions(deviceId, max_width, max_height);
1386 if (CL_SUCCESS != err)
1387 {
1388 print_error(err, "error setting max image dimensions");
1389 goto CLEANUP;
1390 }
1391 log_info("Set max_width to %lu and max_height to %lu\n", max_width,
1392 max_height);
1393 context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
1394 NULL, NULL, &err);
1395 if (CL_SUCCESS != err)
1396 {
1397 print_error(err, "error creating context");
1398 goto CLEANUP;
1399 }
1400 log_info("Successfully created context !!!\n");
1401
1402 cmd_queue1 = clCreateCommandQueue(context, devices[device_no], 0, &err);
1403 if (CL_SUCCESS != err)
1404 {
1405 err = CL_INVALID_COMMAND_QUEUE;
1406 print_error(err, "Error: Failed to create command queue!\n");
1407 goto CLEANUP;
1408 }
1409 log_info("clCreateCommandQueue successfull \n");
1410
1411 cmd_queue2 = clCreateCommandQueue(context, devices[device_no], 0, &err);
1412 if (CL_SUCCESS != err)
1413 {
1414 err = CL_INVALID_COMMAND_QUEUE;
1415 print_error(err, "Error: Failed to create command queue!\n");
1416 goto CLEANUP;
1417 }
1418 log_info("clCreateCommandQueue2 successful \n");
1419
1420 for (int i = 0; i < num_kernels; i++)
1421 {
1422 switch (i)
1423 {
1424 case 0:
1425 sprintf(source_1, kernel_source[i], "float4", "f", "float4",
1426 "f", "f", "f");
1427 sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
1428 "i", "i");
1429 sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
1430 "ui", "ui", "ui");
1431 break;
1432 case 1:
1433 sprintf(source_1, kernel_source[i], "float4", "f", "float4",
1434 "f", "float4", "f", "float4", "f", "f", "f", "f", "f");
1435 sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
1436 "int4", "i", "int4", "i", "i", "i", "i", "i");
1437 sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
1438 "ui", "uint4", "ui", "uint4", "ui", "ui", "ui", "ui",
1439 "ui");
1440 break;
1441 case 2:
1442 sprintf(source_1, kernel_source[i], "float4", "f", "float4",
1443 "f", "float4", "f", "float4", "f", "float4", "f",
1444 "float4", "f", "float4", "f", "float4", "f", "f", "f",
1445 "f", "f", "f", "f", "f", "f");
1446 sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
1447 "int4", "i", "int4", "i", "int4", "i", "int4", "i",
1448 "int4", "i", "int4", "i", "i", "i", "i", "i", "i", "i",
1449 "i", "i");
1450 sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
1451 "ui", "uint4", "ui", "uint4", "ui", "uint4", "ui",
1452 "uint4", "ui", "uint4", "ui", "uint4", "ui", "ui", "ui",
1453 "ui", "ui", "ui", "ui", "ui", "ui");
1454 break;
1455 case 3:
1456 // Addtional case for creating updateKernelCQ2 which takes two
1457 // images
1458 sprintf(source_1, kernel_source[1], "float4", "f", "float4",
1459 "f", "float4", "f", "float4", "f", "f", "f", "f", "f");
1460 sprintf(source_2, kernel_source[1], "int4", "i", "int4", "i",
1461 "int4", "i", "int4", "i", "i", "i", "i", "i");
1462 sprintf(source_3, kernel_source[1], "uint4", "ui", "uint4",
1463 "ui", "uint4", "ui", "uint4", "ui", "ui", "ui", "ui",
1464 "ui");
1465 break;
1466 }
1467 const char *sourceTexts[num_kernel_types] = { source_1, source_2,
1468 source_3 };
1469 for (int k = 0; k < num_kernel_types; k++)
1470 {
1471 program_source_length = strlen(sourceTexts[k]);
1472 program[k] = clCreateProgramWithSource(
1473 context, 1, &sourceTexts[k], &program_source_length, &err);
1474 err |= clBuildProgram(program[k], 0, NULL, NULL, NULL, NULL);
1475 }
1476
1477 if (err != CL_SUCCESS)
1478 {
1479 print_error(err, "Error: Failed to build program");
1480 goto CLEANUP;
1481 }
1482 // create the kernel
1483 kernel_float[i] = clCreateKernel(program[0], "image2DKernel", &err);
1484 if (err != CL_SUCCESS)
1485 {
1486 print_error(err, "clCreateKernel failed");
1487 goto CLEANUP;
1488 }
1489 kernel_signed[i] = clCreateKernel(program[1], "image2DKernel", &err);
1490 if (err != CL_SUCCESS)
1491 {
1492 print_error(err, "clCreateKernel failed");
1493 goto CLEANUP;
1494 }
1495 kernel_unsigned[i] = clCreateKernel(program[2], "image2DKernel", &err);
1496 if (err != CL_SUCCESS)
1497 {
1498 print_error(err, "clCreateKernel failed ");
1499 goto CLEANUP;
1500 }
1501 }
1502 if (numCQ == 2)
1503 {
1504 err = run_test_with_two_queue(context, cmd_queue1, cmd_queue2,
1505 kernel_unsigned, kernel_signed,
1506 kernel_float, vkDevice);
1507 }
1508 else
1509 {
1510 err = run_test_with_one_queue(context, cmd_queue1, kernel_unsigned,
1511 kernel_signed, kernel_float, vkDevice);
1512 }
1513 CLEANUP:
1514 for (int i = 0; i < num_kernels; i++)
1515 {
1516 if (kernel_float[i])
1517 {
1518 clReleaseKernel(kernel_float[i]);
1519 }
1520 if (kernel_unsigned[i])
1521 {
1522 clReleaseKernel(kernel_unsigned[i]);
1523 }
1524 if (kernel_signed[i])
1525 {
1526 clReleaseKernel(kernel_signed[i]);
1527 }
1528 }
1529 for (int i = 0; i < num_kernel_types; i++)
1530 {
1531 if (program[i])
1532 {
1533 clReleaseProgram(program[i]);
1534 }
1535 }
1536 if (cmd_queue1) clReleaseCommandQueue(cmd_queue1);
1537 if (cmd_queue2) clReleaseCommandQueue(cmd_queue2);
1538 if (context) clReleaseContext(context);
1539
1540 if (extensions) free(extensions);
1541 if (devices) free(devices);
1542
1543 return err;
1544 }
1545