xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/vulkan/test_vulkan_interop_image.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2022 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include <vulkan_interop_common.hpp>
18 #include <string>
19 #include "harness/errorHelpers.h"
20 #include <algorithm>
21 
22 #define MAX_2D_IMAGES 5
23 #define MAX_2D_IMAGE_WIDTH 1024
24 #define MAX_2D_IMAGE_HEIGHT 1024
25 #define MAX_2D_IMAGE_ELEMENT_SIZE 16
26 #define MAX_2D_IMAGE_MIP_LEVELS 11
27 #define MAX_2D_IMAGE_DESCRIPTORS MAX_2D_IMAGES *MAX_2D_IMAGE_MIP_LEVELS
28 #define NUM_THREADS_PER_GROUP_X 32
29 #define NUM_THREADS_PER_GROUP_Y 32
30 #define NUM_BLOCKS(size, blockSize)                                            \
31     (ROUND_UP((size), (blockSize)) / (blockSize))
32 
33 #define ASSERT(x)                                                              \
34     if (!(x))                                                                  \
35     {                                                                          \
36         fprintf(stderr, "Assertion \"%s\" failed at %s:%d\n", #x, __FILE__,    \
37                 __LINE__);                                                     \
38         exit(1);                                                               \
39     }
40 
41 #define ASSERT_LEQ(x, y)                                                       \
42     if (x > y)                                                                 \
43     {                                                                          \
44         ASSERT(0);                                                             \
45     }
46 
47 namespace {
48 struct Params
49 {
50     uint32_t numImage2DDescriptors;
51 };
52 }
53 static cl_uchar uuid[CL_UUID_SIZE_KHR];
54 static cl_device_id deviceId = NULL;
55 size_t max_width = MAX_2D_IMAGE_WIDTH;
56 size_t max_height = MAX_2D_IMAGE_HEIGHT;
57 
58 const char *kernel_text_numImage_1 = " \
59 __constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
60 __kernel void image2DKernel(read_only image2d_t InputImage, write_only image2d_t OutImage, int num2DImages, int baseWidth, int baseHeight, int numMipLevels)\n\
61 {\n\
62     int threadIdxX = get_global_id(0);\n\
63     int threadIdxY = get_global_id(1);\n\
64     int numThreadsX = get_global_size(0);                                                                                                  \n\
65     int numThreadsY = get_global_size(1);\n\
66     if (threadIdxX >= baseWidth || threadIdxY >= baseHeight)\n\
67     {\n\
68         return;\n\
69     }\n\
70     %s dataA =  read_image%s(InputImage, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
71     %s dataB =  read_image%s(InputImage, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
72     write_image%s(OutImage, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
73     write_image%s(OutImage, (int2)( threadIdxX, threadIdxY), dataB);\n\
74 \n\
75 }";
76 
77 const char *kernel_text_numImage_2 = " \
78 __constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
79 __kernel void image2DKernel(read_only image2d_t InputImage_1, write_only image2d_t OutImage_1, read_only image2d_t InputImage_2,write_only image2d_t OutImage_2,int num2DImages, int baseWidth, int baseHeight, int numMipLevels)    \n\
80 {\n\
81     int threadIdxX = get_global_id(0);\n\
82     int threadIdxY = get_global_id(1);\n\
83     int numThreadsX = get_global_size(0);\n\
84     int numThreadsY = get_global_size(1);\n\
85     if (threadIdxX >= baseWidth || threadIdxY >= baseHeight) \n\
86     {\n\
87         return;\n\
88     }\n\
89     %s dataA =  read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
90     %s dataB =  read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
91     %s dataC =  read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
92     %s dataD =  read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
93     write_image%s(OutImage_1, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
94     write_image%s(OutImage_1, (int2)(threadIdxX, threadIdxY), dataB);\n\
95     write_image%s(OutImage_2, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataC);\n\
96     write_image%s(OutImage_2, (int2)(threadIdxX, threadIdxY), dataD);\n\
97 \n\
98 }";
99 
100 const char *kernel_text_numImage_4 = " \
101 __constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
102 __kernel void image2DKernel(read_only image2d_t InputImage_1, write_only image2d_t OutImage_1, read_only image2d_t InputImage_2, write_only image2d_t OutImage_2, read_only image2d_t InputImage_3, write_only image2d_t OutImage_3, read_only image2d_t InputImage_4, write_only image2d_t OutImage_4, int num2DImages, int baseWidth, int baseHeight, int numMipLevels)    \n\
103 {\n\
104     int threadIdxX = get_global_id(0);\n\
105     int threadIdxY = get_global_id(1);\n\
106     int numThreadsX = get_global_size(0);\n\
107     int numThreadsY = get_global_size(1);\n\
108     if (threadIdxX >= baseWidth || threadIdxY >= baseHeight) \n\
109     {\n\
110         return;\n\
111     }\n\
112     %s dataA =  read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
113     %s dataB =  read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
114     %s dataC =  read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
115     %s dataD =  read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
116     %s dataE =  read_image%s(InputImage_3, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
117     %s dataF =  read_image%s(InputImage_3, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
118     %s dataG =  read_image%s(InputImage_4, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
119     %s dataH =  read_image%s(InputImage_4, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
120     write_image%s(OutImage_1, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
121     write_image%s(OutImage_1, (int2)(threadIdxX, threadIdxY), dataB);\n\
122     write_image%s(OutImage_2, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataC);\n\
123     write_image%s(OutImage_2, (int2)(threadIdxX, threadIdxY), dataD);\n\
124     write_image%s(OutImage_3, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataE);\n\
125     write_image%s(OutImage_3, (int2)(threadIdxX, threadIdxY), dataF);\n\
126     write_image%s(OutImage_4, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataG);\n\
127     write_image%s(OutImage_4, (int2)(threadIdxX, threadIdxY), dataH);\n\
128 \n\
129 }";
130 
131 const uint32_t num2DImagesList[] = { 1, 2, 4 };
132 const uint32_t widthList[] = { 4, 64, 183, 1024 };
133 const uint32_t heightList[] = { 4, 64, 365 };
134 
getKernelType(VulkanFormat format,cl_kernel kernel_float,cl_kernel kernel_signed,cl_kernel kernel_unsigned)135 const cl_kernel getKernelType(VulkanFormat format, cl_kernel kernel_float,
136                               cl_kernel kernel_signed,
137                               cl_kernel kernel_unsigned)
138 {
139     cl_kernel kernel;
140     switch (format)
141     {
142         case VULKAN_FORMAT_R32G32B32A32_SFLOAT: kernel = kernel_float; break;
143 
144         case VULKAN_FORMAT_R32G32B32A32_UINT: kernel = kernel_unsigned; break;
145 
146         case VULKAN_FORMAT_R32G32B32A32_SINT: kernel = kernel_signed; break;
147 
148         case VULKAN_FORMAT_R16G16B16A16_UINT: kernel = kernel_unsigned; break;
149 
150         case VULKAN_FORMAT_R16G16B16A16_SINT: kernel = kernel_signed; break;
151 
152         case VULKAN_FORMAT_R8G8B8A8_UINT: kernel = kernel_unsigned; break;
153 
154         case VULKAN_FORMAT_R8G8B8A8_SINT: kernel = kernel_signed; break;
155 
156         case VULKAN_FORMAT_R32G32_SFLOAT: kernel = kernel_float; break;
157 
158         case VULKAN_FORMAT_R32G32_UINT: kernel = kernel_unsigned; break;
159 
160         case VULKAN_FORMAT_R32G32_SINT: kernel = kernel_signed; break;
161 
162         case VULKAN_FORMAT_R16G16_UINT: kernel = kernel_unsigned; break;
163 
164         case VULKAN_FORMAT_R16G16_SINT: kernel = kernel_signed; break;
165 
166         case VULKAN_FORMAT_R8G8_UINT: kernel = kernel_unsigned; break;
167 
168         case VULKAN_FORMAT_R8G8_SINT: kernel = kernel_signed; break;
169 
170         case VULKAN_FORMAT_R32_SFLOAT: kernel = kernel_float; break;
171 
172         case VULKAN_FORMAT_R32_UINT: kernel = kernel_unsigned; break;
173 
174         case VULKAN_FORMAT_R32_SINT: kernel = kernel_signed; break;
175 
176         case VULKAN_FORMAT_R16_UINT: kernel = kernel_unsigned; break;
177 
178         case VULKAN_FORMAT_R16_SINT: kernel = kernel_signed; break;
179 
180         case VULKAN_FORMAT_R8_UINT: kernel = kernel_unsigned; break;
181 
182         case VULKAN_FORMAT_R8_SINT: kernel = kernel_signed; break;
183 
184         default:
185             log_error(" Unsupported format");
186             ASSERT(0);
187             break;
188     }
189     return kernel;
190 }
191 
run_test_with_two_queue(cl_context & context,cl_command_queue & cmd_queue1,cl_command_queue & cmd_queue2,cl_kernel * kernel_unsigned,cl_kernel * kernel_signed,cl_kernel * kernel_float,VulkanDevice & vkDevice)192 int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
193                             cl_command_queue &cmd_queue2,
194                             cl_kernel *kernel_unsigned,
195                             cl_kernel *kernel_signed, cl_kernel *kernel_float,
196                             VulkanDevice &vkDevice)
197 {
198     cl_int err = CL_SUCCESS;
199     size_t origin[3] = { 0, 0, 0 };
200     size_t region[3] = { 1, 1, 1 };
201 
202     cl_kernel updateKernelCQ1, updateKernelCQ2;
203     std::vector<VulkanFormat> vkFormatList = getSupportedVulkanFormatList();
204     const std::vector<VulkanExternalMemoryHandleType>
205         vkExternalMemoryHandleTypeList =
206             getSupportedVulkanExternalMemoryHandleTypeList();
207     char magicValue = 0;
208 
209     VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
210     VulkanDeviceMemory vkParamsDeviceMemory(
211         vkDevice, vkParamsBuffer.getSize(),
212         getVulkanMemoryType(vkDevice,
213                             VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
214     vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
215 
216     uint64_t maxImage2DSize =
217         max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
218     VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
219     VulkanDeviceMemory vkSrcBufferDeviceMemory(
220         vkDevice, vkSrcBuffer.getSize(),
221         getVulkanMemoryType(vkDevice,
222                             VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
223     vkSrcBufferDeviceMemory.bindBuffer(vkSrcBuffer);
224 
225     char *srcBufferPtr, *dstBufferPtr;
226     srcBufferPtr = (char *)malloc(maxImage2DSize);
227     dstBufferPtr = (char *)malloc(maxImage2DSize);
228 
229     VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
230     vkDescriptorSetLayoutBindingList.addBinding(
231         0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
232     vkDescriptorSetLayoutBindingList.addBinding(
233         1, VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS);
234     VulkanDescriptorSetLayout vkDescriptorSetLayout(
235         vkDevice, vkDescriptorSetLayoutBindingList);
236     VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
237 
238     VulkanDescriptorPool vkDescriptorPool(vkDevice,
239                                           vkDescriptorSetLayoutBindingList);
240     VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
241                                         vkDescriptorSetLayout);
242 
243     VulkanCommandPool vkCommandPool(vkDevice);
244     VulkanCommandBuffer vkCopyCommandBuffer(vkDevice, vkCommandPool);
245     VulkanCommandBuffer vkShaderCommandBuffer(vkDevice, vkCommandPool);
246     VulkanQueue &vkQueue = vkDevice.getQueue();
247 
248     VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
249         getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
250     VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
251     VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
252     clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
253     clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
254 
255     clVk2CLExternalSemaphore = new clExternalSemaphore(
256         vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
257     clCl2VkExternalSemaphore = new clExternalSemaphore(
258         vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
259 
260     std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory1;
261     std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory2;
262     std::vector<clExternalMemoryImage *> externalMemory1;
263     std::vector<clExternalMemoryImage *> externalMemory2;
264     std::vector<char> vkImage2DShader;
265 
266     for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
267     {
268         VulkanFormat vkFormat = vkFormatList[fIdx];
269         log_info("Format: %d\n", vkFormat);
270         uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
271         ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
272         log_info("elementSize= %d\n", elementSize);
273 
274         std::string fileName = "image2D_"
275             + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv";
276         log_info("Load %s file", fileName.c_str());
277         vkImage2DShader = readFile(fileName);
278         VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader);
279 
280         VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
281                                                 vkImage2DShaderModule);
282 
283         for (size_t wIdx = 0; wIdx < ARRAY_SIZE(widthList); wIdx++)
284         {
285             uint32_t width = widthList[wIdx];
286             log_info("Width: %d\n", width);
287             if (width > max_width) continue;
288             region[0] = width;
289             for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
290             {
291                 uint32_t height = heightList[hIdx];
292                 log_info("Height: %d", height);
293                 if (height > max_height) continue;
294                 region[1] = height;
295 
296                 uint32_t numMipLevels = 1;
297                 log_info("Number of mipmap levels: %d\n", numMipLevels);
298 
299                 magicValue++;
300                 char *vkSrcBufferDeviceMemoryPtr =
301                     (char *)vkSrcBufferDeviceMemory.map();
302                 uint64_t srcBufSize = 0;
303                 memset(vkSrcBufferDeviceMemoryPtr, 0, maxImage2DSize);
304                 memset(srcBufferPtr, 0, maxImage2DSize);
305                 uint32_t mipLevel = 0;
306                 for (uint32_t row = 0;
307                      row < std::max(height >> mipLevel, uint32_t(1)); row++)
308                 {
309                     for (uint32_t col = 0;
310                          col < std::max(width >> mipLevel, uint32_t(1)); col++)
311                     {
312                         for (uint32_t elementByte = 0;
313                              elementByte < elementSize; elementByte++)
314                         {
315                             vkSrcBufferDeviceMemoryPtr[srcBufSize] =
316                                 (char)(magicValue + mipLevel + row + col);
317                             srcBufferPtr[srcBufSize] =
318                                 (char)(magicValue + mipLevel + row + col);
319                             srcBufSize++;
320                         }
321                     }
322                 }
323                 srcBufSize = ROUND_UP(
324                     srcBufSize,
325                     std::max(
326                         elementSize,
327                         (uint32_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT));
328                 vkSrcBufferDeviceMemory.unmap();
329 
330                 for (size_t niIdx = 0; niIdx < ARRAY_SIZE(num2DImagesList);
331                      niIdx++)
332                 {
333                     uint32_t num2DImages = num2DImagesList[niIdx] + 1;
334                     // added one image for cross-cq case for updateKernelCQ2
335                     log_info("Number of images: %d\n", num2DImages);
336                     ASSERT_LEQ(num2DImages, (uint32_t)MAX_2D_IMAGES);
337                     uint32_t num_2D_image;
338                     if (useSingleImageKernel)
339                     {
340                         num_2D_image = 1;
341                     }
342                     else
343                     {
344                         num_2D_image = num2DImages;
345                     }
346                     Params *params = (Params *)vkParamsDeviceMemory.map();
347                     params->numImage2DDescriptors = num_2D_image * numMipLevels;
348                     vkParamsDeviceMemory.unmap();
349                     vkDescriptorSet.update(0, vkParamsBuffer);
350                     for (size_t emhtIdx = 0;
351                          emhtIdx < vkExternalMemoryHandleTypeList.size();
352                          emhtIdx++)
353                     {
354                         VulkanExternalMemoryHandleType
355                             vkExternalMemoryHandleType =
356                                 vkExternalMemoryHandleTypeList[emhtIdx];
357                         if ((true == disableNTHandleType)
358                             && (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
359                                 == vkExternalMemoryHandleType))
360                         {
361                             // Skip running for WIN32 NT handle.
362                             continue;
363                         }
364                         log_info("External memory handle type: %d \n",
365                                  vkExternalMemoryHandleType);
366                         VulkanImageTiling vulkanImageTiling =
367                             vkClExternalMemoryHandleTilingAssumption(
368                                 deviceId,
369                                 vkExternalMemoryHandleTypeList[emhtIdx], &err);
370                         ASSERT_SUCCESS(err,
371                                        "Failed to query OpenCL tiling mode");
372 
373                         VulkanImage2D vkDummyImage2D(
374                             vkDevice, vkFormatList[0], widthList[0],
375                             heightList[0], vulkanImageTiling, 1,
376                             vkExternalMemoryHandleType);
377                         const VulkanMemoryTypeList &memoryTypeList =
378                             vkDummyImage2D.getMemoryTypeList();
379 
380                         for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
381                              mtIdx++)
382                         {
383                             const VulkanMemoryType &memoryType =
384                                 memoryTypeList[mtIdx];
385                             log_info("Memory type index: %d\n",
386                                      (uint32_t)memoryType);
387                             log_info("Memory type property: %d\n",
388                                      memoryType.getMemoryTypeProperty());
389                             if (!useDeviceLocal)
390                             {
391                                 if (VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL
392                                     == memoryType.getMemoryTypeProperty())
393                                 {
394                                     continue;
395                                 }
396                             }
397 
398                             size_t totalImageMemSize = 0;
399                             uint64_t interImageOffset = 0;
400                             {
401                                 VulkanImage2D vkImage2D(
402                                     vkDevice, vkFormat, width, height,
403                                     vulkanImageTiling, numMipLevels,
404                                     vkExternalMemoryHandleType);
405                                 ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize);
406                                 totalImageMemSize =
407                                     ROUND_UP(vkImage2D.getSize(),
408                                              vkImage2D.getAlignment());
409                             }
410                             VulkanImage2DList vkImage2DList(
411                                 num2DImages, vkDevice, vkFormat, width, height,
412                                 vulkanImageTiling, numMipLevels,
413                                 vkExternalMemoryHandleType);
414                             for (size_t bIdx = 0; bIdx < num2DImages; bIdx++)
415                             {
416                                 vkImage2DListDeviceMemory1.push_back(
417                                     new VulkanDeviceMemory(
418                                         vkDevice, vkImage2DList[bIdx],
419                                         memoryType,
420                                         vkExternalMemoryHandleType));
421                                 vkImage2DListDeviceMemory1[bIdx]->bindImage(
422                                     vkImage2DList[bIdx], 0);
423                                 externalMemory1.push_back(
424                                     new clExternalMemoryImage(
425                                         *vkImage2DListDeviceMemory1[bIdx],
426                                         vkExternalMemoryHandleType, context,
427                                         totalImageMemSize, width, height, 0,
428                                         vkImage2DList[bIdx], deviceId));
429                             }
430                             VulkanImageViewList vkImage2DViewList(
431                                 vkDevice, vkImage2DList);
432                             VulkanImage2DList vkImage2DList2(
433                                 num2DImages, vkDevice, vkFormat, width, height,
434                                 vulkanImageTiling, numMipLevels,
435                                 vkExternalMemoryHandleType);
436                             for (size_t bIdx = 0; bIdx < num2DImages; bIdx++)
437                             {
438                                 vkImage2DListDeviceMemory2.push_back(
439                                     new VulkanDeviceMemory(
440                                         vkDevice, vkImage2DList2[bIdx],
441                                         memoryType,
442                                         vkExternalMemoryHandleType));
443                                 vkImage2DListDeviceMemory2[bIdx]->bindImage(
444                                     vkImage2DList2[bIdx], 0);
445                                 externalMemory2.push_back(
446                                     new clExternalMemoryImage(
447                                         *vkImage2DListDeviceMemory2[bIdx],
448                                         vkExternalMemoryHandleType, context,
449                                         totalImageMemSize, width, height, 0,
450                                         vkImage2DList2[bIdx], deviceId));
451                             }
452 
453                             cl_mem external_mem_image1[5];
454                             cl_mem external_mem_image2[5];
455                             for (int i = 0; i < num2DImages; i++)
456                             {
457                                 external_mem_image1[i] =
458                                     externalMemory1[i]
459                                         ->getExternalMemoryImage();
460                                 external_mem_image2[i] =
461                                     externalMemory2[i]
462                                         ->getExternalMemoryImage();
463                             }
464 
465                             clCl2VkExternalSemaphore->signal(cmd_queue1);
466                             if (!useSingleImageKernel)
467                             {
468                                 vkDescriptorSet.updateArray(1,
469                                                             vkImage2DViewList);
470                                 vkCopyCommandBuffer.begin();
471                                 vkCopyCommandBuffer.pipelineBarrier(
472                                     vkImage2DList,
473                                     VULKAN_IMAGE_LAYOUT_UNDEFINED,
474                                     VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
475                                 for (size_t i2DIdx = 0;
476                                      i2DIdx < vkImage2DList.size(); i2DIdx++)
477                                 {
478                                     vkCopyCommandBuffer.copyBufferToImage(
479                                         vkSrcBuffer, vkImage2DList[i2DIdx],
480                                         VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
481                                 }
482                                 vkCopyCommandBuffer.pipelineBarrier(
483                                     vkImage2DList,
484                                     VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
485                                     VULKAN_IMAGE_LAYOUT_GENERAL);
486                                 vkCopyCommandBuffer.end();
487                                 memset(dstBufferPtr, 0, srcBufSize);
488                                 vkQueue.submit(vkCopyCommandBuffer);
489                                 vkShaderCommandBuffer.begin();
490                                 vkShaderCommandBuffer.bindPipeline(
491                                     vkComputePipeline);
492                                 vkShaderCommandBuffer.bindDescriptorSets(
493                                     vkComputePipeline, vkPipelineLayout,
494                                     vkDescriptorSet);
495                                 vkShaderCommandBuffer.dispatch(
496                                     NUM_BLOCKS(width, NUM_THREADS_PER_GROUP_X),
497                                     NUM_BLOCKS(height,
498                                                NUM_THREADS_PER_GROUP_Y / 2),
499                                     1);
500                                 vkShaderCommandBuffer.end();
501                             }
502                             for (uint32_t iter = 0; iter < innerIterations;
503                                  iter++)
504                             {
505                                 if (useSingleImageKernel)
506                                 {
507                                     for (size_t i2DIdx = 0;
508                                          i2DIdx < vkImage2DList.size();
509                                          i2DIdx++)
510                                     {
511                                         vkDescriptorSet.update(
512                                             1, vkImage2DViewList[i2DIdx]);
513                                         vkCopyCommandBuffer.begin();
514                                         vkCopyCommandBuffer.pipelineBarrier(
515                                             vkImage2DList,
516                                             VULKAN_IMAGE_LAYOUT_UNDEFINED,
517                                             VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
518 
519                                         vkCopyCommandBuffer.copyBufferToImage(
520                                             vkSrcBuffer, vkImage2DList[i2DIdx],
521                                             VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
522                                         vkCopyCommandBuffer.pipelineBarrier(
523                                             vkImage2DList,
524                                             VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
525                                             VULKAN_IMAGE_LAYOUT_GENERAL);
526                                         vkCopyCommandBuffer.end();
527                                         memset(dstBufferPtr, 0, srcBufSize);
528                                         vkQueue.submit(vkCopyCommandBuffer);
529                                         vkShaderCommandBuffer.begin();
530                                         vkShaderCommandBuffer.bindPipeline(
531                                             vkComputePipeline);
532                                         vkShaderCommandBuffer
533                                             .bindDescriptorSets(
534                                                 vkComputePipeline,
535                                                 vkPipelineLayout,
536                                                 vkDescriptorSet);
537                                         vkShaderCommandBuffer.dispatch(
538                                             NUM_BLOCKS(width,
539                                                        NUM_THREADS_PER_GROUP_X),
540                                             NUM_BLOCKS(height,
541                                                        NUM_THREADS_PER_GROUP_Y
542                                                            / 2),
543                                             1);
544                                         vkShaderCommandBuffer.end();
545                                         if (i2DIdx < vkImage2DList.size() - 1)
546                                         {
547                                             vkQueue.submit(
548                                                 vkShaderCommandBuffer);
549                                         }
550                                     }
551                                 }
552                                 vkQueue.submit(vkCl2VkSemaphore,
553                                                vkShaderCommandBuffer,
554                                                vkVk2CLSemaphore);
555                                 clVk2CLExternalSemaphore->wait(cmd_queue1);
556                                 switch (num2DImages)
557                                 {
558                                     case 2:
559                                         updateKernelCQ1 = getKernelType(
560                                             vkFormat, kernel_float[0],
561                                             kernel_signed[0],
562                                             kernel_unsigned[0]);
563                                         break;
564                                     case 3:
565                                         updateKernelCQ1 = getKernelType(
566                                             vkFormat, kernel_float[1],
567                                             kernel_signed[1],
568                                             kernel_unsigned[1]);
569                                         break;
570                                     case 5:
571                                         updateKernelCQ1 = getKernelType(
572                                             vkFormat, kernel_float[2],
573                                             kernel_signed[2],
574                                             kernel_unsigned[2]);
575                                         break;
576                                 }
577                                 updateKernelCQ2 = getKernelType(
578                                     vkFormat, kernel_float[3], kernel_signed[3],
579                                     kernel_unsigned[3]);
580                                 // similar kernel-type based on vkFormat
581                                 int j = 0;
582                                 // Setting arguments of updateKernelCQ2
583 
584                                 err = clSetKernelArg(updateKernelCQ2, 0,
585                                                      sizeof(cl_mem),
586                                                      &external_mem_image1[0]);
587                                 err |= clSetKernelArg(updateKernelCQ2, 1,
588                                                       sizeof(cl_mem),
589                                                       &external_mem_image2[0]);
590                                 err |= clSetKernelArg(
591                                     updateKernelCQ2, 2, sizeof(cl_mem),
592                                     &external_mem_image1[num2DImages - 1]);
593                                 err |= clSetKernelArg(
594                                     updateKernelCQ2, 3, sizeof(cl_mem),
595                                     &external_mem_image2[num2DImages - 1]);
596                                 err |= clSetKernelArg(updateKernelCQ2, 4,
597                                                       sizeof(unsigned int),
598                                                       &num2DImages);
599                                 err |= clSetKernelArg(updateKernelCQ2, 5,
600                                                       sizeof(unsigned int),
601                                                       &width);
602                                 err |= clSetKernelArg(updateKernelCQ2, 6,
603                                                       sizeof(unsigned int),
604                                                       &height);
605                                 err |= clSetKernelArg(updateKernelCQ2, 7,
606                                                       sizeof(unsigned int),
607                                                       &numMipLevels);
608                                 for (int i = 0; i < num2DImages - 1; i++, ++j)
609                                 {
610                                     err = clSetKernelArg(
611                                         updateKernelCQ1, j, sizeof(cl_mem),
612                                         &external_mem_image1[i]);
613                                     err |= clSetKernelArg(
614                                         updateKernelCQ1, ++j, sizeof(cl_mem),
615                                         &external_mem_image2[i]);
616                                 }
617                                 err |= clSetKernelArg(updateKernelCQ1, j,
618                                                       sizeof(unsigned int),
619                                                       &num2DImages);
620                                 err |= clSetKernelArg(updateKernelCQ1, ++j,
621                                                       sizeof(unsigned int),
622                                                       &width);
623                                 err |= clSetKernelArg(updateKernelCQ1, ++j,
624                                                       sizeof(unsigned int),
625                                                       &height);
626                                 err |= clSetKernelArg(updateKernelCQ1, ++j,
627                                                       sizeof(unsigned int),
628                                                       &numMipLevels);
629 
630                                 if (err != CL_SUCCESS)
631                                 {
632                                     print_error(
633                                         err,
634                                         "Error: Failed to set arg values \n");
635                                     goto CLEANUP;
636                                 }
637                                 // clVk2CLExternalSemaphore->wait(cmd_queue1);
638                                 size_t global_work_size[3] = { width, height,
639                                                                1 };
640                                 cl_event first_launch;
641                                 err = clEnqueueNDRangeKernel(
642                                     cmd_queue1, updateKernelCQ1, 2, NULL,
643                                     global_work_size, NULL, 0, NULL,
644                                     &first_launch);
645                                 if (err != CL_SUCCESS)
646                                 {
647                                     goto CLEANUP;
648                                 }
649                                 err = clEnqueueNDRangeKernel(
650                                     cmd_queue2, updateKernelCQ2, 2, NULL,
651                                     global_work_size, NULL, 1, &first_launch,
652                                     NULL);
653                                 if (err != CL_SUCCESS)
654                                 {
655                                     goto CLEANUP;
656                                 }
657 
658                                 clFinish(cmd_queue2);
659                                 clCl2VkExternalSemaphore->signal(cmd_queue2);
660                             }
661 
662                             unsigned int flags = 0;
663                             size_t mipmapLevelOffset = 0;
664                             cl_event eventReadImage = NULL;
665                             clFinish(cmd_queue2);
666                             for (int i = 0; i < num2DImages; i++)
667                             {
668                                 err = clEnqueueReadImage(
669                                     cmd_queue1, external_mem_image2[i], CL_TRUE,
670                                     origin, region, 0, 0, dstBufferPtr, 0, NULL,
671                                     &eventReadImage);
672 
673                                 if (err != CL_SUCCESS)
674                                 {
675                                     print_error(err,
676                                                 "clEnqueueReadImage failed with"
677                                                 "error\n");
678                                 }
679 
680                                 if (memcmp(srcBufferPtr, dstBufferPtr,
681                                            srcBufSize))
682                                 {
683                                     log_info("Source and destination buffers "
684                                              "don't match\n");
685                                     if (debug_trace)
686                                     {
687                                         log_info("Source buffer contents: \n");
688                                         for (uint64_t sIdx = 0;
689                                              sIdx < srcBufSize; sIdx++)
690                                         {
691                                             log_info(
692                                                 "%d ",
693                                                 (int)vkSrcBufferDeviceMemoryPtr
694                                                     [sIdx]);
695                                         }
696                                         log_info("Destination buffer contents:"
697                                                  "\n");
698                                         for (uint64_t dIdx = 0;
699                                              dIdx < srcBufSize; dIdx++)
700                                         {
701                                             log_info("%d ",
702                                                      (int)dstBufferPtr[dIdx]);
703                                         }
704                                     }
705                                     err = -1;
706                                     break;
707                                 }
708                             }
709                             for (int i = 0; i < num2DImages; i++)
710                             {
711                                 delete vkImage2DListDeviceMemory1[i];
712                                 delete vkImage2DListDeviceMemory2[i];
713                                 delete externalMemory1[i];
714                                 delete externalMemory2[i];
715                             }
716                             vkImage2DListDeviceMemory1.erase(
717                                 vkImage2DListDeviceMemory1.begin(),
718                                 vkImage2DListDeviceMemory1.begin()
719                                     + num2DImages);
720                             vkImage2DListDeviceMemory2.erase(
721                                 vkImage2DListDeviceMemory2.begin(),
722                                 vkImage2DListDeviceMemory2.begin()
723                                     + num2DImages);
724                             externalMemory1.erase(externalMemory1.begin(),
725                                                   externalMemory1.begin()
726                                                       + num2DImages);
727                             externalMemory2.erase(externalMemory2.begin(),
728                                                   externalMemory2.begin()
729                                                       + num2DImages);
730                             if (CL_SUCCESS != err)
731                             {
732                                 goto CLEANUP;
733                             }
734                         }
735                     }
736                 }
737             }
738         }
739 
740         vkImage2DShader.clear();
741     }
742 CLEANUP:
743     if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
744     if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
745 
746     if (srcBufferPtr) free(srcBufferPtr);
747     if (dstBufferPtr) free(dstBufferPtr);
748     return err;
749 }
750 
run_test_with_one_queue(cl_context & context,cl_command_queue & cmd_queue1,cl_kernel * kernel_unsigned,cl_kernel * kernel_signed,cl_kernel * kernel_float,VulkanDevice & vkDevice)751 int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
752                             cl_kernel *kernel_unsigned,
753                             cl_kernel *kernel_signed, cl_kernel *kernel_float,
754                             VulkanDevice &vkDevice)
755 {
756     cl_int err = CL_SUCCESS;
757     size_t origin[3] = { 0, 0, 0 };
758     size_t region[3] = { 1, 1, 1 };
759     cl_kernel updateKernelCQ1;
760     std::vector<VulkanFormat> vkFormatList = getSupportedVulkanFormatList();
761     const std::vector<VulkanExternalMemoryHandleType>
762         vkExternalMemoryHandleTypeList =
763             getSupportedVulkanExternalMemoryHandleTypeList();
764     char magicValue = 0;
765 
766     VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
767     VulkanDeviceMemory vkParamsDeviceMemory(
768         vkDevice, vkParamsBuffer.getSize(),
769         getVulkanMemoryType(vkDevice,
770                             VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
771     vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
772 
773     uint64_t maxImage2DSize =
774         max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
775     VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
776     VulkanDeviceMemory vkSrcBufferDeviceMemory(
777         vkDevice, vkSrcBuffer.getSize(),
778         getVulkanMemoryType(vkDevice,
779                             VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
780     vkSrcBufferDeviceMemory.bindBuffer(vkSrcBuffer);
781 
782     char *srcBufferPtr, *dstBufferPtr;
783     srcBufferPtr = (char *)malloc(maxImage2DSize);
784     dstBufferPtr = (char *)malloc(maxImage2DSize);
785 
786     VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
787     vkDescriptorSetLayoutBindingList.addBinding(
788         0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
789     vkDescriptorSetLayoutBindingList.addBinding(
790         1, VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS);
791     VulkanDescriptorSetLayout vkDescriptorSetLayout(
792         vkDevice, vkDescriptorSetLayoutBindingList);
793     VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
794 
795     VulkanDescriptorPool vkDescriptorPool(vkDevice,
796                                           vkDescriptorSetLayoutBindingList);
797     VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
798                                         vkDescriptorSetLayout);
799 
800     VulkanCommandPool vkCommandPool(vkDevice);
801     VulkanCommandBuffer vkCopyCommandBuffer(vkDevice, vkCommandPool);
802     VulkanCommandBuffer vkShaderCommandBuffer(vkDevice, vkCommandPool);
803     VulkanQueue &vkQueue = vkDevice.getQueue();
804 
805     VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
806         getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
807     VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
808     VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
809     clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
810     clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
811 
812     clVk2CLExternalSemaphore = new clExternalSemaphore(
813         vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
814     clCl2VkExternalSemaphore = new clExternalSemaphore(
815         vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
816 
817     std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory1;
818     std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory2;
819     std::vector<clExternalMemoryImage *> externalMemory1;
820     std::vector<clExternalMemoryImage *> externalMemory2;
821     std::vector<char> vkImage2DShader;
822 
823     for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
824     {
825         VulkanFormat vkFormat = vkFormatList[fIdx];
826         log_info("Format: %d\n", vkFormat);
827         uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
828         ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
829         log_info("elementSize= %d\n", elementSize);
830 
831         std::string fileName = "image2D_"
832             + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv";
833         log_info("Load %s file", fileName.c_str());
834         vkImage2DShader = readFile(fileName);
835         VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader);
836 
837         VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
838                                                 vkImage2DShaderModule);
839 
840         for (size_t wIdx = 0; wIdx < ARRAY_SIZE(widthList); wIdx++)
841         {
842             uint32_t width = widthList[wIdx];
843             log_info("Width: %d\n", width);
844             if (width > max_width) continue;
845             region[0] = width;
846             for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
847             {
848                 uint32_t height = heightList[hIdx];
849                 log_info("Height: %d\n", height);
850                 if (height > max_height) continue;
851                 region[1] = height;
852 
853                 uint32_t numMipLevels = 1;
854                 log_info("Number of mipmap levels: %d\n", numMipLevels);
855 
856                 magicValue++;
857                 char *vkSrcBufferDeviceMemoryPtr =
858                     (char *)vkSrcBufferDeviceMemory.map();
859                 uint64_t srcBufSize = 0;
860                 memset(vkSrcBufferDeviceMemoryPtr, 0, maxImage2DSize);
861                 memset(srcBufferPtr, 0, maxImage2DSize);
862                 uint32_t mipLevel = 0;
863                 for (uint32_t row = 0;
864                      row < std::max(height >> mipLevel, uint32_t(1)); row++)
865                 {
866                     for (uint32_t col = 0;
867                          col < std::max(width >> mipLevel, uint32_t(1)); col++)
868                     {
869                         for (uint32_t elementByte = 0;
870                              elementByte < elementSize; elementByte++)
871                         {
872                             vkSrcBufferDeviceMemoryPtr[srcBufSize] =
873                                 (char)(magicValue + mipLevel + row + col);
874                             srcBufferPtr[srcBufSize] =
875                                 (char)(magicValue + mipLevel + row + col);
876                             srcBufSize++;
877                         }
878                     }
879                 }
880                 srcBufSize = ROUND_UP(
881                     srcBufSize,
882                     std::max(
883                         elementSize,
884                         (uint32_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT));
885                 vkSrcBufferDeviceMemory.unmap();
886 
887                 for (size_t niIdx = 0; niIdx < ARRAY_SIZE(num2DImagesList);
888                      niIdx++)
889                 {
890                     uint32_t num2DImages = num2DImagesList[niIdx];
891                     log_info("Number of images: %d\n", num2DImages);
892                     ASSERT_LEQ(num2DImages, (uint32_t)MAX_2D_IMAGES);
893 
894                     Params *params = (Params *)vkParamsDeviceMemory.map();
895                     uint32_t num_2D_image;
896                     if (useSingleImageKernel)
897                     {
898                         num_2D_image = 1;
899                     }
900                     else
901                     {
902                         num_2D_image = num2DImages;
903                     }
904                     params->numImage2DDescriptors = num_2D_image * numMipLevels;
905                     vkParamsDeviceMemory.unmap();
906                     vkDescriptorSet.update(0, vkParamsBuffer);
907                     for (size_t emhtIdx = 0;
908                          emhtIdx < vkExternalMemoryHandleTypeList.size();
909                          emhtIdx++)
910                     {
911                         VulkanExternalMemoryHandleType
912                             vkExternalMemoryHandleType =
913                                 vkExternalMemoryHandleTypeList[emhtIdx];
914                         log_info("External memory handle type: %d \n",
915                                  vkExternalMemoryHandleType);
916                         if ((true == disableNTHandleType)
917                             && (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
918                                 == vkExternalMemoryHandleType))
919                         {
920                             // Skip running for WIN32 NT handle.
921                             continue;
922                         }
923 
924                         VulkanImageTiling vulkanImageTiling =
925                             vkClExternalMemoryHandleTilingAssumption(
926                                 deviceId,
927                                 vkExternalMemoryHandleTypeList[emhtIdx], &err);
928                         ASSERT_SUCCESS(err,
929                                        "Failed to query OpenCL tiling mode");
930 
931                         VulkanImage2D vkDummyImage2D(
932                             vkDevice, vkFormatList[0], widthList[0],
933                             heightList[0], vulkanImageTiling, 1,
934                             vkExternalMemoryHandleType);
935                         const VulkanMemoryTypeList &memoryTypeList =
936                             vkDummyImage2D.getMemoryTypeList();
937 
938                         for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
939                              mtIdx++)
940                         {
941                             const VulkanMemoryType &memoryType =
942                                 memoryTypeList[mtIdx];
943                             log_info("Memory type index: %d\n",
944                                      (uint32_t)memoryType);
945                             log_info("Memory type property: %d\n",
946                                      memoryType.getMemoryTypeProperty());
947                             if (!useDeviceLocal)
948                             {
949                                 if (VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL
950                                     == memoryType.getMemoryTypeProperty())
951                                 {
952                                     continue;
953                                 }
954                             }
955                             size_t totalImageMemSize = 0;
956                             uint64_t interImageOffset = 0;
957                             {
958                                 VulkanImage2D vkImage2D(
959                                     vkDevice, vkFormat, width, height,
960                                     vulkanImageTiling, numMipLevels,
961                                     vkExternalMemoryHandleType);
962                                 ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize);
963                                 totalImageMemSize =
964                                     ROUND_UP(vkImage2D.getSize(),
965                                              vkImage2D.getAlignment());
966                             }
967                             VulkanImage2DList vkImage2DList(
968                                 num2DImages, vkDevice, vkFormat, width, height,
969                                 vulkanImageTiling, numMipLevels,
970                                 vkExternalMemoryHandleType);
971                             for (size_t bIdx = 0; bIdx < vkImage2DList.size();
972                                  bIdx++)
973                             {
974                                 // Create list of Vulkan device memories and
975                                 // bind the list of Vulkan images.
976                                 vkImage2DListDeviceMemory1.push_back(
977                                     new VulkanDeviceMemory(
978                                         vkDevice, vkImage2DList[bIdx],
979                                         memoryType,
980                                         vkExternalMemoryHandleType));
981                                 vkImage2DListDeviceMemory1[bIdx]->bindImage(
982                                     vkImage2DList[bIdx], 0);
983                                 externalMemory1.push_back(
984                                     new clExternalMemoryImage(
985                                         *vkImage2DListDeviceMemory1[bIdx],
986                                         vkExternalMemoryHandleType, context,
987                                         totalImageMemSize, width, height, 0,
988                                         vkImage2DList[bIdx], deviceId));
989                             }
990                             VulkanImageViewList vkImage2DViewList(
991                                 vkDevice, vkImage2DList);
992 
993                             VulkanImage2DList vkImage2DList2(
994                                 num2DImages, vkDevice, vkFormat, width, height,
995                                 vulkanImageTiling, numMipLevels,
996                                 vkExternalMemoryHandleType);
997                             for (size_t bIdx = 0; bIdx < vkImage2DList2.size();
998                                  bIdx++)
999                             {
1000                                 vkImage2DListDeviceMemory2.push_back(
1001                                     new VulkanDeviceMemory(
1002                                         vkDevice, vkImage2DList2[bIdx],
1003                                         memoryType,
1004                                         vkExternalMemoryHandleType));
1005                                 vkImage2DListDeviceMemory2[bIdx]->bindImage(
1006                                     vkImage2DList2[bIdx], 0);
1007                                 externalMemory2.push_back(
1008                                     new clExternalMemoryImage(
1009                                         *vkImage2DListDeviceMemory2[bIdx],
1010                                         vkExternalMemoryHandleType, context,
1011                                         totalImageMemSize, width, height, 0,
1012                                         vkImage2DList2[bIdx], deviceId));
1013                             }
1014 
1015                             cl_mem external_mem_image1[4];
1016                             cl_mem external_mem_image2[4];
1017                             for (int i = 0; i < num2DImages; i++)
1018                             {
1019                                 external_mem_image1[i] =
1020                                     externalMemory1[i]
1021                                         ->getExternalMemoryImage();
1022                                 external_mem_image2[i] =
1023                                     externalMemory2[i]
1024                                         ->getExternalMemoryImage();
1025                             }
1026 
1027                             clCl2VkExternalSemaphore->signal(cmd_queue1);
1028                             if (!useSingleImageKernel)
1029                             {
1030                                 vkDescriptorSet.updateArray(1,
1031                                                             vkImage2DViewList);
1032                                 vkCopyCommandBuffer.begin();
1033                                 vkCopyCommandBuffer.pipelineBarrier(
1034                                     vkImage2DList,
1035                                     VULKAN_IMAGE_LAYOUT_UNDEFINED,
1036                                     VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1037                                 for (size_t i2DIdx = 0;
1038                                      i2DIdx < vkImage2DList.size(); i2DIdx++)
1039                                 {
1040                                     vkCopyCommandBuffer.copyBufferToImage(
1041                                         vkSrcBuffer, vkImage2DList[i2DIdx],
1042                                         VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1043                                 }
1044                                 vkCopyCommandBuffer.pipelineBarrier(
1045                                     vkImage2DList,
1046                                     VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1047                                     VULKAN_IMAGE_LAYOUT_GENERAL);
1048                                 vkCopyCommandBuffer.end();
1049                                 memset(dstBufferPtr, 0, srcBufSize);
1050                                 vkQueue.submit(vkCopyCommandBuffer);
1051                                 vkShaderCommandBuffer.begin();
1052                                 vkShaderCommandBuffer.bindPipeline(
1053                                     vkComputePipeline);
1054                                 vkShaderCommandBuffer.bindDescriptorSets(
1055                                     vkComputePipeline, vkPipelineLayout,
1056                                     vkDescriptorSet);
1057                                 vkShaderCommandBuffer.dispatch(
1058                                     NUM_BLOCKS(width, NUM_THREADS_PER_GROUP_X),
1059                                     NUM_BLOCKS(height,
1060                                                NUM_THREADS_PER_GROUP_Y / 2),
1061                                     1);
1062                                 vkShaderCommandBuffer.end();
1063                             }
1064                             for (uint32_t iter = 0; iter < innerIterations;
1065                                  iter++)
1066                             {
1067                                 if (useSingleImageKernel)
1068                                 {
1069                                     for (size_t i2DIdx = 0;
1070                                          i2DIdx < vkImage2DList.size();
1071                                          i2DIdx++)
1072                                     {
1073                                         vkDescriptorSet.update(
1074                                             1, vkImage2DViewList[i2DIdx]);
1075                                         vkCopyCommandBuffer.begin();
1076                                         vkCopyCommandBuffer.pipelineBarrier(
1077                                             vkImage2DList,
1078                                             VULKAN_IMAGE_LAYOUT_UNDEFINED,
1079                                             VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1080 
1081                                         vkCopyCommandBuffer.copyBufferToImage(
1082                                             vkSrcBuffer, vkImage2DList[i2DIdx],
1083                                             VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1084                                         vkCopyCommandBuffer.pipelineBarrier(
1085                                             vkImage2DList,
1086                                             VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1087                                             VULKAN_IMAGE_LAYOUT_GENERAL);
1088                                         vkCopyCommandBuffer.end();
1089                                         memset(dstBufferPtr, 0, srcBufSize);
1090                                         vkQueue.submit(vkCopyCommandBuffer);
1091                                         vkShaderCommandBuffer.begin();
1092                                         vkShaderCommandBuffer.bindPipeline(
1093                                             vkComputePipeline);
1094                                         vkShaderCommandBuffer
1095                                             .bindDescriptorSets(
1096                                                 vkComputePipeline,
1097                                                 vkPipelineLayout,
1098                                                 vkDescriptorSet);
1099                                         vkShaderCommandBuffer.dispatch(
1100                                             NUM_BLOCKS(width,
1101                                                        NUM_THREADS_PER_GROUP_X),
1102                                             NUM_BLOCKS(height,
1103                                                        NUM_THREADS_PER_GROUP_Y
1104                                                            / 2),
1105                                             1);
1106                                         vkShaderCommandBuffer.end();
1107                                         if (i2DIdx < vkImage2DList.size() - 1)
1108                                         {
1109                                             vkQueue.submit(
1110                                                 vkShaderCommandBuffer);
1111                                         }
1112                                     }
1113                                 }
1114                                 vkQueue.submit(vkCl2VkSemaphore,
1115                                                vkShaderCommandBuffer,
1116                                                vkVk2CLSemaphore);
1117                                 clVk2CLExternalSemaphore->wait(cmd_queue1);
1118                                 switch (num2DImages)
1119                                 {
1120                                     case 1:
1121                                         updateKernelCQ1 = getKernelType(
1122                                             vkFormat, kernel_float[0],
1123                                             kernel_signed[0],
1124                                             kernel_unsigned[0]);
1125                                         break;
1126                                     case 2:
1127                                         updateKernelCQ1 = getKernelType(
1128                                             vkFormat, kernel_float[1],
1129                                             kernel_signed[1],
1130                                             kernel_unsigned[1]);
1131                                         break;
1132                                     case 4:
1133                                         updateKernelCQ1 = getKernelType(
1134                                             vkFormat, kernel_float[2],
1135                                             kernel_signed[2],
1136                                             kernel_unsigned[2]);
1137                                         break;
1138                                 }
1139                                 int j = 0;
1140                                 for (int i = 0; i < num2DImages; i++, ++j)
1141                                 {
1142                                     err = clSetKernelArg(
1143                                         updateKernelCQ1, j, sizeof(cl_mem),
1144                                         &external_mem_image1[i]);
1145                                     err |= clSetKernelArg(
1146                                         updateKernelCQ1, ++j, sizeof(cl_mem),
1147                                         &external_mem_image2[i]);
1148                                 }
1149                                 err |= clSetKernelArg(updateKernelCQ1, j,
1150                                                       sizeof(unsigned int),
1151                                                       &num2DImages);
1152                                 err |= clSetKernelArg(updateKernelCQ1, ++j,
1153                                                       sizeof(unsigned int),
1154                                                       &width);
1155                                 err |= clSetKernelArg(updateKernelCQ1, ++j,
1156                                                       sizeof(unsigned int),
1157                                                       &height);
1158                                 err |= clSetKernelArg(updateKernelCQ1, ++j,
1159                                                       sizeof(unsigned int),
1160                                                       &numMipLevels);
1161 
1162                                 if (err != CL_SUCCESS)
1163                                 {
1164                                     print_error(err,
1165                                                 "Error: Failed to set arg "
1166                                                 "values for kernel-1\n");
1167                                     goto CLEANUP;
1168                                 }
1169 
1170                                 size_t global_work_size[3] = { width, height,
1171                                                                1 };
1172                                 err = clEnqueueNDRangeKernel(
1173                                     cmd_queue1, updateKernelCQ1, 2, NULL,
1174                                     global_work_size, NULL, 0, NULL, NULL);
1175                                 if (err != CL_SUCCESS)
1176                                 {
1177                                     goto CLEANUP;
1178                                 }
1179                                 clCl2VkExternalSemaphore->signal(cmd_queue1);
1180                             }
1181 
1182                             unsigned int flags = 0;
1183                             size_t mipmapLevelOffset = 0;
1184                             cl_event eventReadImage = NULL;
1185                             for (int i = 0; i < num2DImages; i++)
1186                             {
1187                                 err = clEnqueueReadImage(
1188                                     cmd_queue1, external_mem_image2[i], CL_TRUE,
1189                                     origin, region, 0, 0, dstBufferPtr, 0, NULL,
1190                                     &eventReadImage);
1191 
1192                                 if (err != CL_SUCCESS)
1193                                 {
1194                                     print_error(err,
1195                                                 "clEnqueueReadImage failed with"
1196                                                 "error\n");
1197                                 }
1198 
1199                                 if (memcmp(srcBufferPtr, dstBufferPtr,
1200                                            srcBufSize))
1201                                 {
1202                                     log_info("Source and destination buffers "
1203                                              "don't match\n");
1204                                     if (debug_trace)
1205                                     {
1206                                         log_info("Source buffer contents: \n");
1207                                         for (uint64_t sIdx = 0;
1208                                              sIdx < srcBufSize; sIdx++)
1209                                         {
1210                                             log_info(
1211                                                 "%d",
1212                                                 (int)vkSrcBufferDeviceMemoryPtr
1213                                                     [sIdx]);
1214                                         }
1215                                         log_info(
1216                                             "Destination buffer contents:");
1217                                         for (uint64_t dIdx = 0;
1218                                              dIdx < srcBufSize; dIdx++)
1219                                         {
1220                                             log_info("%d",
1221                                                      (int)dstBufferPtr[dIdx]);
1222                                         }
1223                                     }
1224                                     err = -1;
1225                                     break;
1226                                 }
1227                             }
1228                             for (int i = 0; i < num2DImages; i++)
1229                             {
1230                                 delete vkImage2DListDeviceMemory1[i];
1231                                 delete vkImage2DListDeviceMemory2[i];
1232                                 delete externalMemory1[i];
1233                                 delete externalMemory2[i];
1234                             }
1235                             vkImage2DListDeviceMemory1.erase(
1236                                 vkImage2DListDeviceMemory1.begin(),
1237                                 vkImage2DListDeviceMemory1.begin()
1238                                     + num2DImages);
1239                             vkImage2DListDeviceMemory2.erase(
1240                                 vkImage2DListDeviceMemory2.begin(),
1241                                 vkImage2DListDeviceMemory2.begin()
1242                                     + num2DImages);
1243                             externalMemory1.erase(externalMemory1.begin(),
1244                                                   externalMemory1.begin()
1245                                                       + num2DImages);
1246                             externalMemory2.erase(externalMemory2.begin(),
1247                                                   externalMemory2.begin()
1248                                                       + num2DImages);
1249                             if (CL_SUCCESS != err)
1250                             {
1251                                 goto CLEANUP;
1252                             }
1253                         }
1254                     }
1255                 }
1256             }
1257         }
1258         vkImage2DShader.clear();
1259     }
1260 CLEANUP:
1261     if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
1262     if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
1263 
1264     if (srcBufferPtr) free(srcBufferPtr);
1265     if (dstBufferPtr) free(dstBufferPtr);
1266     return err;
1267 }
1268 
test_image_common(cl_device_id device_,cl_context context_,cl_command_queue queue_,int numElements_)1269 int test_image_common(cl_device_id device_, cl_context context_,
1270                       cl_command_queue queue_, int numElements_)
1271 {
1272     int current_device = 0;
1273     int device_count = 0;
1274     int devices_prohibited = 0;
1275     cl_int err = CL_SUCCESS;
1276     cl_platform_id platform = NULL;
1277     size_t extensionSize = 0;
1278     cl_uint num_devices = 0;
1279     cl_uint device_no = 0;
1280     cl_device_id *devices;
1281     char *extensions = NULL;
1282     const char *program_source_const;
1283     cl_command_queue cmd_queue1 = NULL;
1284     cl_command_queue cmd_queue2 = NULL;
1285     cl_context context = NULL;
1286     const uint32_t num_kernels = ARRAY_SIZE(num2DImagesList) + 1;
1287     // One kernel for Cross-CQ case
1288     const uint32_t num_kernel_types = 3;
1289     const char *kernel_source[num_kernels] = { kernel_text_numImage_1,
1290                                                kernel_text_numImage_2,
1291                                                kernel_text_numImage_4 };
1292     char source_1[4096];
1293     char source_2[4096];
1294     char source_3[4096];
1295     size_t program_source_length;
1296     cl_program program[num_kernel_types];
1297     cl_kernel kernel_float[num_kernels] = { NULL, NULL, NULL, NULL };
1298     cl_kernel kernel_signed[num_kernels] = { NULL, NULL, NULL, NULL };
1299     cl_kernel kernel_unsigned[num_kernels] = { NULL, NULL, NULL, NULL };
1300     cl_mem external_mem_image1;
1301     cl_mem external_mem_image2;
1302 
1303     VulkanDevice vkDevice;
1304 
1305     cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
1306     // get the platform ID
1307     err = clGetPlatformIDs(1, &platform, NULL);
1308     if (err != CL_SUCCESS)
1309     {
1310         print_error(err, "Error: Failed to get platform\n");
1311         goto CLEANUP;
1312     }
1313 
1314     err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
1315     if (CL_SUCCESS != err)
1316     {
1317         print_error(err, "clGetDeviceIDs failed in returning no. of devices\n");
1318         goto CLEANUP;
1319     }
1320     devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
1321     if (NULL == devices)
1322     {
1323         err = CL_OUT_OF_HOST_MEMORY;
1324         print_error(err, "Unable to allocate memory for devices\n");
1325         goto CLEANUP;
1326     }
1327     err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices,
1328                          NULL);
1329     if (CL_SUCCESS != err)
1330     {
1331         print_error(err, "Failed to get deviceID.\n");
1332         goto CLEANUP;
1333     }
1334     contextProperties[1] = (cl_context_properties)platform;
1335     log_info("Assigned contextproperties for platform\n");
1336     for (device_no = 0; device_no < num_devices; device_no++)
1337     {
1338         err = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0, NULL,
1339                               &extensionSize);
1340         if (CL_SUCCESS != err)
1341         {
1342             print_error(
1343                 err,
1344                 "Error in clGetDeviceInfo for getting device_extension size\n");
1345             goto CLEANUP;
1346         }
1347         extensions = (char *)malloc(extensionSize);
1348         if (NULL == extensions)
1349         {
1350             err = CL_OUT_OF_HOST_MEMORY;
1351             print_error(err, "Unable to allocate memory for extensions\n");
1352             goto CLEANUP;
1353         }
1354         err = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS,
1355                               extensionSize, extensions, NULL);
1356         if (CL_SUCCESS != err)
1357         {
1358             print_error(
1359                 err, "Error in clGetDeviceInfo for getting device_extension\n");
1360             goto CLEANUP;
1361         }
1362         err = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR,
1363                               CL_UUID_SIZE_KHR, uuid, &extensionSize);
1364         if (CL_SUCCESS != err)
1365         {
1366             print_error(err, "clGetDeviceInfo failed with error");
1367             goto CLEANUP;
1368         }
1369         err =
1370             memcmp(uuid, vkDevice.getPhysicalDevice().getUUID(), VK_UUID_SIZE);
1371         if (err == 0)
1372         {
1373             break;
1374         }
1375     }
1376     if (device_no >= num_devices)
1377     {
1378         err = EXIT_FAILURE;
1379         print_error(err,
1380                     "OpenCL error:"
1381                     "No Vulkan-OpenCL Interop capable GPU found.\n");
1382         goto CLEANUP;
1383     }
1384     deviceId = devices[device_no];
1385     err = setMaxImageDimensions(deviceId, max_width, max_height);
1386     if (CL_SUCCESS != err)
1387     {
1388         print_error(err, "error setting max image dimensions");
1389         goto CLEANUP;
1390     }
1391     log_info("Set max_width to %lu and max_height to %lu\n", max_width,
1392              max_height);
1393     context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
1394                                       NULL, NULL, &err);
1395     if (CL_SUCCESS != err)
1396     {
1397         print_error(err, "error creating context");
1398         goto CLEANUP;
1399     }
1400     log_info("Successfully created context !!!\n");
1401 
1402     cmd_queue1 = clCreateCommandQueue(context, devices[device_no], 0, &err);
1403     if (CL_SUCCESS != err)
1404     {
1405         err = CL_INVALID_COMMAND_QUEUE;
1406         print_error(err, "Error: Failed to create command queue!\n");
1407         goto CLEANUP;
1408     }
1409     log_info("clCreateCommandQueue successfull \n");
1410 
1411     cmd_queue2 = clCreateCommandQueue(context, devices[device_no], 0, &err);
1412     if (CL_SUCCESS != err)
1413     {
1414         err = CL_INVALID_COMMAND_QUEUE;
1415         print_error(err, "Error: Failed to create command queue!\n");
1416         goto CLEANUP;
1417     }
1418     log_info("clCreateCommandQueue2 successful \n");
1419 
1420     for (int i = 0; i < num_kernels; i++)
1421     {
1422         switch (i)
1423         {
1424             case 0:
1425                 sprintf(source_1, kernel_source[i], "float4", "f", "float4",
1426                         "f", "f", "f");
1427                 sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
1428                         "i", "i");
1429                 sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
1430                         "ui", "ui", "ui");
1431                 break;
1432             case 1:
1433                 sprintf(source_1, kernel_source[i], "float4", "f", "float4",
1434                         "f", "float4", "f", "float4", "f", "f", "f", "f", "f");
1435                 sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
1436                         "int4", "i", "int4", "i", "i", "i", "i", "i");
1437                 sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
1438                         "ui", "uint4", "ui", "uint4", "ui", "ui", "ui", "ui",
1439                         "ui");
1440                 break;
1441             case 2:
1442                 sprintf(source_1, kernel_source[i], "float4", "f", "float4",
1443                         "f", "float4", "f", "float4", "f", "float4", "f",
1444                         "float4", "f", "float4", "f", "float4", "f", "f", "f",
1445                         "f", "f", "f", "f", "f", "f");
1446                 sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
1447                         "int4", "i", "int4", "i", "int4", "i", "int4", "i",
1448                         "int4", "i", "int4", "i", "i", "i", "i", "i", "i", "i",
1449                         "i", "i");
1450                 sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
1451                         "ui", "uint4", "ui", "uint4", "ui", "uint4", "ui",
1452                         "uint4", "ui", "uint4", "ui", "uint4", "ui", "ui", "ui",
1453                         "ui", "ui", "ui", "ui", "ui", "ui");
1454                 break;
1455             case 3:
1456                 // Addtional case for creating updateKernelCQ2 which takes two
1457                 // images
1458                 sprintf(source_1, kernel_source[1], "float4", "f", "float4",
1459                         "f", "float4", "f", "float4", "f", "f", "f", "f", "f");
1460                 sprintf(source_2, kernel_source[1], "int4", "i", "int4", "i",
1461                         "int4", "i", "int4", "i", "i", "i", "i", "i");
1462                 sprintf(source_3, kernel_source[1], "uint4", "ui", "uint4",
1463                         "ui", "uint4", "ui", "uint4", "ui", "ui", "ui", "ui",
1464                         "ui");
1465                 break;
1466         }
1467         const char *sourceTexts[num_kernel_types] = { source_1, source_2,
1468                                                       source_3 };
1469         for (int k = 0; k < num_kernel_types; k++)
1470         {
1471             program_source_length = strlen(sourceTexts[k]);
1472             program[k] = clCreateProgramWithSource(
1473                 context, 1, &sourceTexts[k], &program_source_length, &err);
1474             err |= clBuildProgram(program[k], 0, NULL, NULL, NULL, NULL);
1475         }
1476 
1477         if (err != CL_SUCCESS)
1478         {
1479             print_error(err, "Error: Failed to build program");
1480             goto CLEANUP;
1481         }
1482         // create the kernel
1483         kernel_float[i] = clCreateKernel(program[0], "image2DKernel", &err);
1484         if (err != CL_SUCCESS)
1485         {
1486             print_error(err, "clCreateKernel failed");
1487             goto CLEANUP;
1488         }
1489         kernel_signed[i] = clCreateKernel(program[1], "image2DKernel", &err);
1490         if (err != CL_SUCCESS)
1491         {
1492             print_error(err, "clCreateKernel failed");
1493             goto CLEANUP;
1494         }
1495         kernel_unsigned[i] = clCreateKernel(program[2], "image2DKernel", &err);
1496         if (err != CL_SUCCESS)
1497         {
1498             print_error(err, "clCreateKernel failed ");
1499             goto CLEANUP;
1500         }
1501     }
1502     if (numCQ == 2)
1503     {
1504         err = run_test_with_two_queue(context, cmd_queue1, cmd_queue2,
1505                                       kernel_unsigned, kernel_signed,
1506                                       kernel_float, vkDevice);
1507     }
1508     else
1509     {
1510         err = run_test_with_one_queue(context, cmd_queue1, kernel_unsigned,
1511                                       kernel_signed, kernel_float, vkDevice);
1512     }
1513 CLEANUP:
1514     for (int i = 0; i < num_kernels; i++)
1515     {
1516         if (kernel_float[i])
1517         {
1518             clReleaseKernel(kernel_float[i]);
1519         }
1520         if (kernel_unsigned[i])
1521         {
1522             clReleaseKernel(kernel_unsigned[i]);
1523         }
1524         if (kernel_signed[i])
1525         {
1526             clReleaseKernel(kernel_signed[i]);
1527         }
1528     }
1529     for (int i = 0; i < num_kernel_types; i++)
1530     {
1531         if (program[i])
1532         {
1533             clReleaseProgram(program[i]);
1534         }
1535     }
1536     if (cmd_queue1) clReleaseCommandQueue(cmd_queue1);
1537     if (cmd_queue2) clReleaseCommandQueue(cmd_queue2);
1538     if (context) clReleaseContext(context);
1539 
1540     if (extensions) free(extensions);
1541     if (devices) free(devices);
1542 
1543     return err;
1544 }
1545