xref: /aosp_15_r20/external/angle/src/libANGLE/renderer/vulkan/CLProgramVk.cpp (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1 //
2 // Copyright 2021 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // CLProgramVk.cpp: Implements the class methods for CLProgramVk.
7 
8 #include "libANGLE/renderer/vulkan/CLProgramVk.h"
9 #include "libANGLE/renderer/vulkan/CLContextVk.h"
10 #include "libANGLE/renderer/vulkan/CLDeviceVk.h"
11 #include "libANGLE/renderer/vulkan/clspv_utils.h"
12 #include "libANGLE/renderer/vulkan/vk_cache_utils.h"
13 #include "libANGLE/renderer/vulkan/vk_helpers.h"
14 
15 #include "libANGLE/CLContext.h"
16 #include "libANGLE/CLKernel.h"
17 #include "libANGLE/CLProgram.h"
18 #include "libANGLE/cl_utils.h"
19 
20 #include "common/log_utils.h"
21 #include "common/string_utils.h"
22 #include "common/system_utils.h"
23 
24 #include "clspv/Compiler.h"
25 
26 #include "spirv/unified1/NonSemanticClspvReflection.h"
27 #include "spirv/unified1/spirv.hpp"
28 
29 #include "spirv-tools/libspirv.hpp"
30 #include "spirv-tools/optimizer.hpp"
31 
32 namespace rx
33 {
34 
35 namespace
36 {
37 #if defined(ANGLE_ENABLE_ASSERTS)
38 constexpr bool kAngleDebug = true;
39 #else
40 constexpr bool kAngleDebug = false;
41 #endif
42 
43 // Used by SPIRV-Tools to parse reflection info
ParseReflection(CLProgramVk::SpvReflectionData & reflectionData,const spv_parsed_instruction_t & spvInstr)44 spv_result_t ParseReflection(CLProgramVk::SpvReflectionData &reflectionData,
45                              const spv_parsed_instruction_t &spvInstr)
46 {
47     // Parse spir-v opcodes
48     switch (spvInstr.opcode)
49     {
50         // --- Clspv specific parsing for below cases ---
51         case spv::OpExtInst:
52         {
53             switch (spvInstr.words[4])
54             {
55                 case NonSemanticClspvReflectionKernel:
56                 {
57                     // Extract kernel name and args - add to kernel args map
58                     std::string functionName = reflectionData.spvStrLookup[spvInstr.words[6]];
59                     uint32_t numArgs         = reflectionData.spvIntLookup[spvInstr.words[7]];
60                     reflectionData.kernelArgsMap[functionName] = CLKernelArguments();
61                     reflectionData.kernelArgsMap[functionName].resize(numArgs);
62 
63                     // Store kernel flags and attributes
64                     reflectionData.kernelFlags[functionName] =
65                         reflectionData.spvIntLookup[spvInstr.words[8]];
66                     reflectionData.kernelAttributes[functionName] =
67                         reflectionData.spvStrLookup[spvInstr.words[9]];
68 
69                     // Save kernel name to reflection table for later use/lookup in parser routine
70                     reflectionData.kernelIDs.insert(spvInstr.words[2]);
71                     reflectionData.spvStrLookup[spvInstr.words[2]] = std::string(functionName);
72 
73                     // If we already parsed some args ahead of time, populate them now
74                     if (reflectionData.kernelArgMap.contains(functionName))
75                     {
76                         for (const auto &arg : reflectionData.kernelArgMap)
77                         {
78                             uint32_t ordinal = arg.second.ordinal;
79                             reflectionData.kernelArgsMap[functionName].at(ordinal) =
80                                 std::move(arg.second);
81                         }
82                     }
83                     break;
84                 }
85                 case NonSemanticClspvReflectionArgumentInfo:
86                 {
87                     CLKernelVk::ArgInfo kernelArgInfo;
88                     kernelArgInfo.name = reflectionData.spvStrLookup[spvInstr.words[5]];
89                     // If instruction has more than 5 instruction operands (minus instruction
90                     // name/opcode), that means we have arg qualifiers. ArgumentInfo also counts as
91                     // an operand for OpExtInst. In below example, [ %e %f %g %h ] are the arg
92                     // qualifier operands.
93                     //
94                     // %a = OpExtInst %b %c ArgumentInfo %d [ %e %f %g %h ]
95                     if (spvInstr.num_operands > 5)
96                     {
97                         kernelArgInfo.typeName = reflectionData.spvStrLookup[spvInstr.words[6]];
98                         kernelArgInfo.addressQualifier =
99                             reflectionData.spvIntLookup[spvInstr.words[7]];
100                         kernelArgInfo.accessQualifier =
101                             reflectionData.spvIntLookup[spvInstr.words[8]];
102                         kernelArgInfo.typeQualifier =
103                             reflectionData.spvIntLookup[spvInstr.words[9]];
104                     }
105                     // Store kern arg for later lookup
106                     reflectionData.kernelArgInfos[spvInstr.words[2]] = std::move(kernelArgInfo);
107                     break;
108                 }
109                 case NonSemanticClspvReflectionArgumentPodUniform:
110                 case NonSemanticClspvReflectionArgumentPointerUniform:
111                 case NonSemanticClspvReflectionArgumentPodStorageBuffer:
112                 {
113                     CLKernelArgument kernelArg;
114                     if (spvInstr.num_operands == 11)
115                     {
116                         const CLKernelVk::ArgInfo &kernelArgInfo =
117                             reflectionData.kernelArgInfos[spvInstr.words[11]];
118                         kernelArg.info.name             = kernelArgInfo.name;
119                         kernelArg.info.typeName         = kernelArgInfo.typeName;
120                         kernelArg.info.addressQualifier = kernelArgInfo.addressQualifier;
121                         kernelArg.info.accessQualifier  = kernelArgInfo.accessQualifier;
122                         kernelArg.info.typeQualifier    = kernelArgInfo.typeQualifier;
123                     }
124                     kernelArg.type    = spvInstr.words[4];
125                     kernelArg.used    = true;
126                     kernelArg.ordinal = reflectionData.spvIntLookup[spvInstr.words[6]];
127                     kernelArg.op3     = reflectionData.spvIntLookup[spvInstr.words[7]];
128                     kernelArg.op4     = reflectionData.spvIntLookup[spvInstr.words[8]];
129                     kernelArg.op5     = reflectionData.spvIntLookup[spvInstr.words[9]];
130                     kernelArg.op6     = reflectionData.spvIntLookup[spvInstr.words[10]];
131 
132                     if (reflectionData.kernelIDs.contains(spvInstr.words[5]))
133                     {
134                         CLKernelArguments &kernelArgs =
135                             reflectionData
136                                 .kernelArgsMap[reflectionData.spvStrLookup[spvInstr.words[5]]];
137                         kernelArgs.at(kernelArg.ordinal) = std::move(kernelArg);
138                     }
139                     else
140                     {
141                         // Reflection kernel not yet parsed, place in temp storage for now
142                         reflectionData
143                             .kernelArgMap[reflectionData.spvStrLookup[spvInstr.words[5]]] =
144                             std::move(kernelArg);
145                     }
146 
147                     break;
148                 }
149                 case NonSemanticClspvReflectionArgumentUniform:
150                 case NonSemanticClspvReflectionArgumentWorkgroup:
151                 case NonSemanticClspvReflectionArgumentSampler:
152                 case NonSemanticClspvReflectionArgumentStorageImage:
153                 case NonSemanticClspvReflectionArgumentSampledImage:
154                 case NonSemanticClspvReflectionArgumentStorageBuffer:
155                 case NonSemanticClspvReflectionArgumentStorageTexelBuffer:
156                 case NonSemanticClspvReflectionArgumentUniformTexelBuffer:
157                 case NonSemanticClspvReflectionArgumentPodPushConstant:
158                 case NonSemanticClspvReflectionArgumentPointerPushConstant:
159                 {
160                     CLKernelArgument kernelArg;
161                     if (spvInstr.num_operands == 9)
162                     {
163                         const CLKernelVk::ArgInfo &kernelArgInfo =
164                             reflectionData.kernelArgInfos[spvInstr.words[9]];
165                         kernelArg.info.name             = kernelArgInfo.name;
166                         kernelArg.info.typeName         = kernelArgInfo.typeName;
167                         kernelArg.info.addressQualifier = kernelArgInfo.addressQualifier;
168                         kernelArg.info.accessQualifier  = kernelArgInfo.accessQualifier;
169                         kernelArg.info.typeQualifier    = kernelArgInfo.typeQualifier;
170                     }
171 
172                     kernelArg.type    = spvInstr.words[4];
173                     kernelArg.used    = true;
174                     kernelArg.ordinal = reflectionData.spvIntLookup[spvInstr.words[6]];
175                     kernelArg.op3     = reflectionData.spvIntLookup[spvInstr.words[7]];
176                     kernelArg.op4     = reflectionData.spvIntLookup[spvInstr.words[8]];
177 
178                     if (reflectionData.kernelIDs.contains(spvInstr.words[5]))
179                     {
180                         CLKernelArguments &kernelArgs =
181                             reflectionData
182                                 .kernelArgsMap[reflectionData.spvStrLookup[spvInstr.words[5]]];
183                         kernelArgs.at(kernelArg.ordinal) = std::move(kernelArg);
184                     }
185                     else
186                     {
187                         // Reflection kernel not yet parsed, place in temp storage for now
188                         reflectionData
189                             .kernelArgMap[reflectionData.spvStrLookup[spvInstr.words[5]]] =
190                             std::move(kernelArg);
191                     }
192                     break;
193                 }
194                 case NonSemanticClspvReflectionPushConstantGlobalSize:
195                 case NonSemanticClspvReflectionPushConstantGlobalOffset:
196                 case NonSemanticClspvReflectionPushConstantRegionOffset:
197                 case NonSemanticClspvReflectionPushConstantNumWorkgroups:
198                 case NonSemanticClspvReflectionPushConstantRegionGroupOffset:
199                 case NonSemanticClspvReflectionPushConstantEnqueuedLocalSize:
200                 {
201                     uint32_t offset = reflectionData.spvIntLookup[spvInstr.words[5]];
202                     uint32_t size   = reflectionData.spvIntLookup[spvInstr.words[6]];
203                     reflectionData.pushConstants[spvInstr.words[4]] = {
204                         .stageFlags = 0, .offset = offset, .size = size};
205                     break;
206                 }
207                 case NonSemanticClspvReflectionSpecConstantWorkgroupSize:
208                 {
209                     reflectionData.specConstantIDs[SpecConstantType::WorkgroupSizeX] =
210                         reflectionData.spvIntLookup[spvInstr.words[5]];
211                     reflectionData.specConstantIDs[SpecConstantType::WorkgroupSizeY] =
212                         reflectionData.spvIntLookup[spvInstr.words[6]];
213                     reflectionData.specConstantIDs[SpecConstantType::WorkgroupSizeZ] =
214                         reflectionData.spvIntLookup[spvInstr.words[7]];
215                     reflectionData.specConstantsUsed[SpecConstantType::WorkgroupSizeX] = true;
216                     reflectionData.specConstantsUsed[SpecConstantType::WorkgroupSizeY] = true;
217                     reflectionData.specConstantsUsed[SpecConstantType::WorkgroupSizeZ] = true;
218                     break;
219                 }
220                 case NonSemanticClspvReflectionPropertyRequiredWorkgroupSize:
221                 {
222                     reflectionData.kernelCompileWorkgroupSize
223                         [reflectionData.spvStrLookup[spvInstr.words[5]]] = {
224                         reflectionData.spvIntLookup[spvInstr.words[6]],
225                         reflectionData.spvIntLookup[spvInstr.words[7]],
226                         reflectionData.spvIntLookup[spvInstr.words[8]]};
227                     break;
228                 }
229                 case NonSemanticClspvReflectionSpecConstantWorkDim:
230                 {
231                     reflectionData.specConstantIDs[SpecConstantType::WorkDimension] =
232                         reflectionData.spvIntLookup[spvInstr.words[5]];
233                     reflectionData.specConstantsUsed[SpecConstantType::WorkDimension] = true;
234                     break;
235                 }
236                 case NonSemanticClspvReflectionSpecConstantGlobalOffset:
237                     reflectionData.specConstantIDs[SpecConstantType::GlobalOffsetX] =
238                         reflectionData.spvIntLookup[spvInstr.words[5]];
239                     reflectionData.specConstantIDs[SpecConstantType::GlobalOffsetY] =
240                         reflectionData.spvIntLookup[spvInstr.words[6]];
241                     reflectionData.specConstantIDs[SpecConstantType::GlobalOffsetZ] =
242                         reflectionData.spvIntLookup[spvInstr.words[7]];
243                     reflectionData.specConstantsUsed[SpecConstantType::GlobalOffsetX] = true;
244                     reflectionData.specConstantsUsed[SpecConstantType::GlobalOffsetY] = true;
245                     reflectionData.specConstantsUsed[SpecConstantType::GlobalOffsetZ] = true;
246                     break;
247                 case NonSemanticClspvReflectionPrintfInfo:
248                 {
249                     // Info on the format string used in the builtin printf call in kernel
250                     uint32_t printfID        = reflectionData.spvIntLookup[spvInstr.words[5]];
251                     std::string formatString = reflectionData.spvStrLookup[spvInstr.words[6]];
252                     reflectionData.printfInfoMap[printfID].id              = printfID;
253                     reflectionData.printfInfoMap[printfID].formatSpecifier = formatString;
254                     for (int i = 6; i < spvInstr.num_operands; i++)
255                     {
256                         uint16_t offset = spvInstr.operands[i].offset;
257                         size_t size     = reflectionData.spvIntLookup[spvInstr.words[offset]];
258                         reflectionData.printfInfoMap[printfID].argSizes.push_back(
259                             static_cast<uint32_t>(size));
260                     }
261 
262                     break;
263                 }
264                 case NonSemanticClspvReflectionPrintfBufferStorageBuffer:
265                 {
266                     // Info about the printf storage buffer that contains the formatted content
267                     uint32_t set     = reflectionData.spvIntLookup[spvInstr.words[5]];
268                     uint32_t binding = reflectionData.spvIntLookup[spvInstr.words[6]];
269                     uint32_t size    = reflectionData.spvIntLookup[spvInstr.words[7]];
270                     reflectionData.printfBufferStorage = {set, binding, 0, size};
271                     break;
272                 }
273                 case NonSemanticClspvReflectionPrintfBufferPointerPushConstant:
274                 {
275                     ERR() << "Shouldn't be here. Support of printf builtin function is enabled "
276                              "through "
277                              "PrintfBufferStorageBuffer. Check optins passed down to clspv";
278                     UNREACHABLE();
279                     return SPV_UNSUPPORTED;
280                 }
281                 case NonSemanticClspvReflectionNormalizedSamplerMaskPushConstant:
282                 case NonSemanticClspvReflectionImageArgumentInfoChannelOrderPushConstant:
283                 case NonSemanticClspvReflectionImageArgumentInfoChannelDataTypePushConstant:
284                 {
285                     uint32_t ordinal            = reflectionData.spvIntLookup[spvInstr.words[6]];
286                     uint32_t offset             = reflectionData.spvIntLookup[spvInstr.words[7]];
287                     uint32_t size               = reflectionData.spvIntLookup[spvInstr.words[8]];
288                     VkPushConstantRange pcRange = {.stageFlags = 0, .offset = offset, .size = size};
289                     reflectionData.imagePushConstants[spvInstr.words[4]].push_back(
290                         {.pcRange = pcRange, .ordinal = ordinal});
291                     break;
292                 }
293                 default:
294                     break;
295             }
296             break;
297         }
298         // --- Regular SPIR-V opcode parsing for below cases ---
299         case spv::OpString:
300         {
301             reflectionData.spvStrLookup[spvInstr.words[1]] =
302                 reinterpret_cast<const char *>(&spvInstr.words[2]);
303             break;
304         }
305         case spv::OpConstant:
306         {
307             reflectionData.spvIntLookup[spvInstr.words[2]] = spvInstr.words[3];
308             break;
309         }
310         default:
311             break;
312     }
313     return SPV_SUCCESS;
314 }
315 
ProcessBuildOptions(const std::vector<std::string> & optionTokens,CLProgramVk::BuildType buildType)316 std::string ProcessBuildOptions(const std::vector<std::string> &optionTokens,
317                                 CLProgramVk::BuildType buildType)
318 {
319     std::string processedOptions;
320 
321     // Need to remove/replace options that are not 1-1 mapped to clspv
322     for (const std::string &optionToken : optionTokens)
323     {
324         if (optionToken == "-create-library" && buildType == CLProgramVk::BuildType::LINK)
325         {
326             processedOptions += " --output-format=bc";
327             continue;
328         }
329         processedOptions += " " + optionToken;
330     }
331 
332     switch (buildType)
333     {
334         case CLProgramVk::BuildType::COMPILE:
335             processedOptions += " --output-format=bc";
336             break;
337         case CLProgramVk::BuildType::LINK:
338             processedOptions += " -x ir";
339             break;
340         default:
341             break;
342     }
343 
344     return processedOptions;
345 }
346 
347 }  // namespace
348 
operator ()()349 void CLAsyncBuildTask::operator()()
350 {
351     ANGLE_TRACE_EVENT0("gpu.angle", "CLProgramVk::buildInternal (async)");
352     CLProgramVk::ScopedProgramCallback spc(mNotify);
353     if (!mProgramVk->buildInternal(mDevices, mOptions, mInternalOptions, mBuildType,
354                                    mLinkProgramsList))
355     {
356         ERR() << "Async build failed for program (" << mProgramVk
357               << ")! Check the build status or build log for details.";
358     }
359 }
360 
CLProgramVk(const cl::Program & program)361 CLProgramVk::CLProgramVk(const cl::Program &program)
362     : CLProgramImpl(program),
363       mContext(&program.getContext().getImpl<CLContextVk>()),
364       mAsyncBuildEvent(std::make_shared<angle::WaitableEventDone>())
365 {}
366 
init()367 angle::Result CLProgramVk::init()
368 {
369     cl::DevicePtrs devices;
370     ANGLE_TRY(mContext->getDevices(&devices));
371 
372     // The devices associated with the program object are the devices associated with context
373     for (const cl::DevicePtr &device : devices)
374     {
375         mAssociatedDevicePrograms[device->getNative()] = DeviceProgramData{};
376     }
377 
378     return angle::Result::Continue;
379 }
380 
init(const size_t * lengths,const unsigned char ** binaries,cl_int * binaryStatus)381 angle::Result CLProgramVk::init(const size_t *lengths,
382                                 const unsigned char **binaries,
383                                 cl_int *binaryStatus)
384 {
385     // The devices associated with program come from device_list param from
386     // clCreateProgramWithBinary
387     for (const cl::DevicePtr &device : mProgram.getDevices())
388     {
389         const unsigned char *binaryHandle = *binaries++;
390         size_t binarySize                 = *lengths++;
391 
392         // Check for header
393         if (binarySize < sizeof(ProgramBinaryOutputHeader))
394         {
395             if (binaryStatus)
396             {
397                 *binaryStatus++ = CL_INVALID_BINARY;
398             }
399             ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
400         }
401         binarySize -= sizeof(ProgramBinaryOutputHeader);
402 
403         // Check for valid binary version from header
404         const ProgramBinaryOutputHeader *binaryHeader =
405             reinterpret_cast<const ProgramBinaryOutputHeader *>(binaryHandle);
406         if (binaryHeader == nullptr)
407         {
408             ERR() << "NULL binary header!";
409             if (binaryStatus)
410             {
411                 *binaryStatus++ = CL_INVALID_BINARY;
412             }
413             ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
414         }
415         else if (binaryHeader->headerVersion < kBinaryVersion)
416         {
417             ERR() << "Binary version not compatible with runtime!";
418             if (binaryStatus)
419             {
420                 *binaryStatus++ = CL_INVALID_BINARY;
421             }
422             ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
423         }
424         binaryHandle += sizeof(ProgramBinaryOutputHeader);
425 
426         // See what kind of binary we have (i.e. SPIR-V or LLVM Bitcode)
427         // https://llvm.org/docs/BitCodeFormat.html#llvm-ir-magic-number
428         // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_magic_number
429         constexpr uint32_t LLVM_BC_MAGIC = 0xDEC04342;
430         constexpr uint32_t SPIRV_MAGIC   = 0x07230203;
431         const uint32_t &firstWord        = reinterpret_cast<const uint32_t *>(binaryHandle)[0];
432         bool isBC                        = firstWord == LLVM_BC_MAGIC;
433         bool isSPV                       = firstWord == SPIRV_MAGIC;
434         if (!isBC && !isSPV)
435         {
436             ERR() << "Binary is neither SPIR-V nor LLVM Bitcode!";
437             if (binaryStatus)
438             {
439                 *binaryStatus++ = CL_INVALID_BINARY;
440             }
441             ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
442         }
443 
444         // Add device binary to program
445         DeviceProgramData deviceBinary;
446         deviceBinary.binaryType  = binaryHeader->binaryType;
447         deviceBinary.buildStatus = binaryHeader->buildStatus;
448         switch (deviceBinary.binaryType)
449         {
450             case CL_PROGRAM_BINARY_TYPE_EXECUTABLE:
451                 deviceBinary.binary.assign(binarySize / sizeof(uint32_t), 0);
452                 std::memcpy(deviceBinary.binary.data(), binaryHandle, binarySize);
453                 break;
454             case CL_PROGRAM_BINARY_TYPE_LIBRARY:
455             case CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT:
456                 deviceBinary.IR.assign(binarySize, 0);
457                 std::memcpy(deviceBinary.IR.data(), binaryHandle, binarySize);
458                 break;
459             default:
460                 UNREACHABLE();
461                 ERR() << "Invalid binary type!";
462                 if (binaryStatus)
463                 {
464                     *binaryStatus++ = CL_INVALID_BINARY;
465                 }
466                 ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
467         }
468         mAssociatedDevicePrograms[device->getNative()] = std::move(deviceBinary);
469         if (binaryStatus)
470         {
471             *binaryStatus++ = CL_SUCCESS;
472         }
473     }
474 
475     return angle::Result::Continue;
476 }
477 
~CLProgramVk()478 CLProgramVk::~CLProgramVk()
479 {
480     for (vk::DynamicDescriptorPoolPointer &pool : mDynamicDescriptorPools)
481     {
482         pool.reset();
483     }
484     for (DescriptorSetIndex index : angle::AllEnums<DescriptorSetIndex>())
485     {
486         mMetaDescriptorPools[index].destroy(mContext->getRenderer());
487     }
488 }
489 
build(const cl::DevicePtrs & devices,const char * options,cl::Program * notify)490 angle::Result CLProgramVk::build(const cl::DevicePtrs &devices,
491                                  const char *options,
492                                  cl::Program *notify)
493 {
494     BuildType buildType = !mProgram.getSource().empty() ? BuildType::BUILD : BuildType::BINARY;
495     const cl::DevicePtrs &devicePtrs = !devices.empty() ? devices : mProgram.getDevices();
496 
497     setBuildStatus(devicePtrs, CL_BUILD_IN_PROGRESS);
498 
499     if (notify)
500     {
501         mAsyncBuildEvent =
502             getPlatform()->postMultiThreadWorkerTask(std::make_shared<CLAsyncBuildTask>(
503                 this, devicePtrs, std::string(options ? options : ""), "", buildType,
504                 LinkProgramsList{}, notify));
505         ASSERT(mAsyncBuildEvent != nullptr);
506     }
507     else
508     {
509         if (!buildInternal(devicePtrs, std::string(options ? options : ""), "", buildType,
510                            LinkProgramsList{}))
511         {
512             ANGLE_CL_RETURN_ERROR(CL_BUILD_PROGRAM_FAILURE);
513         }
514     }
515     return angle::Result::Continue;
516 }
517 
compile(const cl::DevicePtrs & devices,const char * options,const cl::ProgramPtrs & inputHeaders,const char ** headerIncludeNames,cl::Program * notify)518 angle::Result CLProgramVk::compile(const cl::DevicePtrs &devices,
519                                    const char *options,
520                                    const cl::ProgramPtrs &inputHeaders,
521                                    const char **headerIncludeNames,
522                                    cl::Program *notify)
523 {
524     const cl::DevicePtrs &devicePtrs = !devices.empty() ? devices : mProgram.getDevices();
525 
526     // Ensure OS temp dir is available
527     std::string internalCompileOpts;
528     Optional<std::string> tmpDir = angle::GetTempDirectory();
529     if (!tmpDir.valid())
530     {
531         ERR() << "Failed to open OS temp dir";
532         ANGLE_CL_RETURN_ERROR(CL_INVALID_OPERATION);
533     }
534     internalCompileOpts += inputHeaders.empty() ? "" : " -I" + tmpDir.value();
535 
536     // Dump input headers to OS temp directory
537     for (size_t i = 0; i < inputHeaders.size(); ++i)
538     {
539         const std::string &inputHeaderSrc =
540             inputHeaders.at(i)->getImpl<CLProgramVk>().mProgram.getSource();
541         std::string headerFilePath(angle::ConcatenatePath(tmpDir.value(), headerIncludeNames[i]));
542 
543         // Sanitize path so we can use "/" as universal path separator
544         angle::MakeForwardSlashThePathSeparator(headerFilePath);
545         size_t baseDirPos = headerFilePath.find_last_of("/");
546 
547         // Ensure parent dir(s) exists
548         if (!angle::CreateDirectories(headerFilePath.substr(0, baseDirPos)))
549         {
550             ERR() << "Failed to create output path(s) for header(s)!";
551             ANGLE_CL_RETURN_ERROR(CL_INVALID_OPERATION);
552         }
553         writeFile(headerFilePath.c_str(), inputHeaderSrc.data(), inputHeaderSrc.size());
554     }
555 
556     setBuildStatus(devicePtrs, CL_BUILD_IN_PROGRESS);
557 
558     // Perform compile
559     if (notify)
560     {
561         mAsyncBuildEvent = mProgram.getContext().getPlatform().getMultiThreadPool()->postWorkerTask(
562             std::make_shared<CLAsyncBuildTask>(
563                 this, devicePtrs, std::string(options ? options : ""), internalCompileOpts,
564                 BuildType::COMPILE, LinkProgramsList{}, notify));
565         ASSERT(mAsyncBuildEvent != nullptr);
566     }
567     else
568     {
569         mAsyncBuildEvent = std::make_shared<angle::WaitableEventDone>();
570         if (!buildInternal(devicePtrs, std::string(options ? options : ""), internalCompileOpts,
571                            BuildType::COMPILE, LinkProgramsList{}))
572         {
573             ANGLE_CL_RETURN_ERROR(CL_COMPILE_PROGRAM_FAILURE);
574         }
575     }
576 
577     return angle::Result::Continue;
578 }
579 
getInfo(cl::ProgramInfo name,size_t valueSize,void * value,size_t * valueSizeRet) const580 angle::Result CLProgramVk::getInfo(cl::ProgramInfo name,
581                                    size_t valueSize,
582                                    void *value,
583                                    size_t *valueSizeRet) const
584 {
585     cl_uint valUInt            = 0u;
586     cl_bool valBool            = CL_FALSE;
587     void *valPointer           = nullptr;
588     const void *copyValue      = nullptr;
589     size_t copySize            = 0u;
590     unsigned char **outputBins = reinterpret_cast<unsigned char **>(value);
591     std::string kernelNamesList;
592     std::vector<size_t> vBinarySizes;
593 
594     switch (name)
595     {
596         case cl::ProgramInfo::NumKernels:
597             for (const auto &deviceProgram : mAssociatedDevicePrograms)
598             {
599                 valUInt += static_cast<decltype(valUInt)>(deviceProgram.second.numKernels());
600             }
601             copyValue = &valUInt;
602             copySize  = sizeof(valUInt);
603             break;
604         case cl::ProgramInfo::BinarySizes:
605         {
606             for (const auto &deviceProgram : mAssociatedDevicePrograms)
607             {
608                 vBinarySizes.push_back(
609                     sizeof(ProgramBinaryOutputHeader) +
610                     (deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE
611                          ? deviceProgram.second.binary.size() * sizeof(uint32_t)
612                          : deviceProgram.second.IR.size()));
613             }
614             valPointer = vBinarySizes.data();
615             copyValue  = valPointer;
616             copySize   = vBinarySizes.size() * sizeof(size_t);
617             break;
618         }
619         case cl::ProgramInfo::Binaries:
620             for (const auto &deviceProgram : mAssociatedDevicePrograms)
621             {
622                 const void *bin =
623                     deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE
624                         ? reinterpret_cast<const void *>(deviceProgram.second.binary.data())
625                         : reinterpret_cast<const void *>(deviceProgram.second.IR.data());
626                 size_t binSize =
627                     deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE
628                         ? deviceProgram.second.binary.size() * sizeof(uint32_t)
629                         : deviceProgram.second.IR.size();
630                 ProgramBinaryOutputHeader header{.headerVersion = kBinaryVersion,
631                                                  .binaryType    = deviceProgram.second.binaryType,
632                                                  .buildStatus   = deviceProgram.second.buildStatus};
633 
634                 if (outputBins != nullptr)
635                 {
636                     if (*outputBins != nullptr)
637                     {
638                         std::memcpy(*outputBins, &header, sizeof(ProgramBinaryOutputHeader));
639                         std::memcpy((*outputBins) + sizeof(ProgramBinaryOutputHeader), bin,
640                                     binSize);
641                     }
642                     outputBins++;
643                 }
644 
645                 // Spec just wants pointer size here
646                 copySize += sizeof(unsigned char *);
647             }
648             // We already copied the (headers + binaries) over - nothing else left to copy
649             copyValue = nullptr;
650             break;
651         case cl::ProgramInfo::KernelNames:
652             for (const auto &deviceProgram : mAssociatedDevicePrograms)
653             {
654                 kernelNamesList = deviceProgram.second.getKernelNames();
655             }
656             valPointer = kernelNamesList.data();
657             copyValue  = valPointer;
658             copySize   = kernelNamesList.size() + 1;
659             break;
660         case cl::ProgramInfo::ScopeGlobalCtorsPresent:
661         case cl::ProgramInfo::ScopeGlobalDtorsPresent:
662             // These are deprecated by version 3.0 and are currently not supported
663             copyValue = &valBool;
664             copySize  = sizeof(cl_bool);
665             break;
666         default:
667             UNREACHABLE();
668     }
669 
670     if ((value != nullptr) && (copyValue != nullptr))
671     {
672         std::memcpy(value, copyValue, copySize);
673     }
674 
675     if (valueSizeRet != nullptr)
676     {
677         *valueSizeRet = copySize;
678     }
679 
680     return angle::Result::Continue;
681 }
682 
getBuildInfo(const cl::Device & device,cl::ProgramBuildInfo name,size_t valueSize,void * value,size_t * valueSizeRet) const683 angle::Result CLProgramVk::getBuildInfo(const cl::Device &device,
684                                         cl::ProgramBuildInfo name,
685                                         size_t valueSize,
686                                         void *value,
687                                         size_t *valueSizeRet) const
688 {
689     cl_uint valUInt                            = 0;
690     cl_build_status valStatus                  = 0;
691     const void *copyValue                      = nullptr;
692     size_t copySize                            = 0;
693     const DeviceProgramData *deviceProgramData = getDeviceProgramData(device.getNative());
694 
695     switch (name)
696     {
697         case cl::ProgramBuildInfo::Status:
698             valStatus = deviceProgramData->buildStatus;
699             copyValue = &valStatus;
700             copySize  = sizeof(valStatus);
701             break;
702         case cl::ProgramBuildInfo::Log:
703             copyValue = deviceProgramData->buildLog.c_str();
704             copySize  = deviceProgramData->buildLog.size() + 1;
705             break;
706         case cl::ProgramBuildInfo::Options:
707             copyValue = mProgramOpts.c_str();
708             copySize  = mProgramOpts.size() + 1;
709             break;
710         case cl::ProgramBuildInfo::BinaryType:
711             valUInt   = deviceProgramData->binaryType;
712             copyValue = &valUInt;
713             copySize  = sizeof(valUInt);
714             break;
715         case cl::ProgramBuildInfo::GlobalVariableTotalSize:
716             // Returns 0 if device does not support program scope global variables.
717             valUInt   = 0;
718             copyValue = &valUInt;
719             copySize  = sizeof(valUInt);
720             break;
721         default:
722             UNREACHABLE();
723     }
724 
725     if ((value != nullptr) && (copyValue != nullptr))
726     {
727         memcpy(value, copyValue, std::min(valueSize, copySize));
728     }
729 
730     if (valueSizeRet != nullptr)
731     {
732         *valueSizeRet = copySize;
733     }
734 
735     return angle::Result::Continue;
736 }
737 
createKernel(const cl::Kernel & kernel,const char * name,CLKernelImpl::Ptr * kernelOut)738 angle::Result CLProgramVk::createKernel(const cl::Kernel &kernel,
739                                         const char *name,
740                                         CLKernelImpl::Ptr *kernelOut)
741 {
742     // Wait for the compile to finish
743     mAsyncBuildEvent->wait();
744 
745     std::scoped_lock<angle::SimpleMutex> sl(mProgramMutex);
746     const auto devProgram = getDeviceProgramData(name);
747     ASSERT(devProgram != nullptr);
748 
749     // Create kernel
750     CLKernelArguments kernelArgs = devProgram->getKernelArguments(name);
751     std::string kernelAttributes = devProgram->getKernelAttributes(name);
752     std::string kernelName       = std::string(name ? name : "");
753     CLKernelVk::Ptr kernelImpl   = CLKernelVk::Ptr(
754         new (std::nothrow) CLKernelVk(kernel, kernelName, kernelAttributes, kernelArgs));
755     if (kernelImpl == nullptr)
756     {
757         ERR() << "Could not create kernel obj!";
758         ANGLE_CL_RETURN_ERROR(CL_OUT_OF_HOST_MEMORY);
759     }
760 
761     ANGLE_TRY(kernelImpl->init());
762     *kernelOut = std::move(kernelImpl);
763 
764     return angle::Result::Continue;
765 }
766 
createKernels(cl_uint numKernels,CLKernelImpl::CreateFuncs & createFuncs,cl_uint * numKernelsRet)767 angle::Result CLProgramVk::createKernels(cl_uint numKernels,
768                                          CLKernelImpl::CreateFuncs &createFuncs,
769                                          cl_uint *numKernelsRet)
770 {
771     size_t numDevKernels = 0;
772     for (const auto &dev : mAssociatedDevicePrograms)
773     {
774         numDevKernels += dev.second.numKernels();
775     }
776     if (numKernelsRet != nullptr)
777     {
778         *numKernelsRet = static_cast<cl_uint>(numDevKernels);
779     }
780 
781     if (numKernels != 0)
782     {
783         for (const auto &dev : mAssociatedDevicePrograms)
784         {
785             for (const auto &kernArgMap : dev.second.getKernelArgsMap())
786             {
787                 createFuncs.emplace_back([this, &kernArgMap](const cl::Kernel &kern) {
788                     CLKernelImpl::Ptr implPtr = nullptr;
789                     ANGLE_CL_IMPL_TRY(this->createKernel(kern, kernArgMap.first.c_str(), &implPtr));
790                     return CLKernelImpl::Ptr(std::move(implPtr));
791                 });
792             }
793         }
794     }
795     return angle::Result::Continue;
796 }
797 
getDeviceProgramData(const _cl_device_id * device) const798 const CLProgramVk::DeviceProgramData *CLProgramVk::getDeviceProgramData(
799     const _cl_device_id *device) const
800 {
801     if (!mAssociatedDevicePrograms.contains(device))
802     {
803         WARN() << "Device (" << device << ") is not associated with program (" << this << ") !";
804         return nullptr;
805     }
806     return &mAssociatedDevicePrograms.at(device);
807 }
808 
getDeviceProgramData(const char * kernelName) const809 const CLProgramVk::DeviceProgramData *CLProgramVk::getDeviceProgramData(
810     const char *kernelName) const
811 {
812     for (const auto &deviceProgram : mAssociatedDevicePrograms)
813     {
814         if (deviceProgram.second.containsKernel(kernelName))
815         {
816             return &deviceProgram.second;
817         }
818     }
819     WARN() << "Kernel name (" << kernelName << ") is not associated with program (" << this
820            << ") !";
821     return nullptr;
822 }
823 
buildInternal(const cl::DevicePtrs & devices,std::string options,std::string internalOptions,BuildType buildType,const LinkProgramsList & LinkProgramsList)824 bool CLProgramVk::buildInternal(const cl::DevicePtrs &devices,
825                                 std::string options,
826                                 std::string internalOptions,
827                                 BuildType buildType,
828                                 const LinkProgramsList &LinkProgramsList)
829 {
830     std::scoped_lock<angle::SimpleMutex> sl(mProgramMutex);
831 
832     // Cache original options string
833     mProgramOpts = options;
834 
835     // Process options and append any other internal (required) options for clspv
836     std::vector<std::string> optionTokens;
837     angle::SplitStringAlongWhitespace(options + " " + internalOptions, &optionTokens);
838     const bool createLibrary     = std::find(optionTokens.begin(), optionTokens.end(),
839                                              "-create-library") != optionTokens.end();
840     std::string processedOptions = ProcessBuildOptions(optionTokens, buildType);
841 
842     // Build for each associated device
843     for (size_t i = 0; i < devices.size(); ++i)
844     {
845         const cl::RefPointer<cl::Device> &device = devices.at(i);
846         DeviceProgramData &deviceProgramData     = mAssociatedDevicePrograms[device->getNative()];
847 
848         // add clspv compiler options based on device features
849         processedOptions += ClspvGetCompilerOptions(&device->getImpl<CLDeviceVk>());
850 
851         if (buildType != BuildType::BINARY)
852         {
853             // Invoke clspv
854             switch (buildType)
855             {
856                 case BuildType::BUILD:
857                 case BuildType::COMPILE:
858                 {
859                     ScopedClspvContext clspvCtx;
860                     const char *clSrc   = mProgram.getSource().c_str();
861                     ClspvError clspvRet = clspvCompileFromSourcesString(
862                         1, NULL, static_cast<const char **>(&clSrc), processedOptions.c_str(),
863                         &clspvCtx.mOutputBin, &clspvCtx.mOutputBinSize, &clspvCtx.mOutputBuildLog);
864                     deviceProgramData.buildLog =
865                         clspvCtx.mOutputBuildLog != nullptr ? clspvCtx.mOutputBuildLog : "";
866                     if (clspvRet != CLSPV_SUCCESS)
867                     {
868                         ERR() << "OpenCL build failed with: ClspvError(" << clspvRet << ")!";
869                         deviceProgramData.buildStatus = CL_BUILD_ERROR;
870                         return false;
871                     }
872 
873                     if (buildType == BuildType::COMPILE)
874                     {
875                         deviceProgramData.IR.assign(clspvCtx.mOutputBinSize, 0);
876                         std::memcpy(deviceProgramData.IR.data(), clspvCtx.mOutputBin,
877                                     clspvCtx.mOutputBinSize);
878                         deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT;
879                     }
880                     else
881                     {
882                         deviceProgramData.binary.assign(clspvCtx.mOutputBinSize / sizeof(uint32_t),
883                                                         0);
884                         std::memcpy(deviceProgramData.binary.data(), clspvCtx.mOutputBin,
885                                     clspvCtx.mOutputBinSize);
886                         deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
887                     }
888                     break;
889                 }
890                 case BuildType::LINK:
891                 {
892                     ScopedClspvContext clspvCtx;
893                     std::vector<size_t> vSizes;
894                     std::vector<const char *> vBins;
895                     const LinkPrograms &linkPrograms = LinkProgramsList.at(i);
896                     for (const CLProgramVk::DeviceProgramData *linkProgramData : linkPrograms)
897                     {
898                         vSizes.push_back(linkProgramData->IR.size());
899                         vBins.push_back(linkProgramData->IR.data());
900                     }
901                     ClspvError clspvRet = clspvCompileFromSourcesString(
902                         linkPrograms.size(), vSizes.data(), vBins.data(), processedOptions.c_str(),
903                         &clspvCtx.mOutputBin, &clspvCtx.mOutputBinSize, &clspvCtx.mOutputBuildLog);
904                     deviceProgramData.buildLog =
905                         clspvCtx.mOutputBuildLog != nullptr ? clspvCtx.mOutputBuildLog : "";
906                     if (clspvRet != CLSPV_SUCCESS)
907                     {
908                         ERR() << "OpenCL build failed with: ClspvError(" << clspvRet << ")!";
909                         deviceProgramData.buildStatus = CL_BUILD_ERROR;
910                         return false;
911                     }
912 
913                     if (createLibrary)
914                     {
915                         deviceProgramData.IR.assign(clspvCtx.mOutputBinSize, 0);
916                         std::memcpy(deviceProgramData.IR.data(), clspvCtx.mOutputBin,
917                                     clspvCtx.mOutputBinSize);
918                         deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY;
919                     }
920                     else
921                     {
922                         deviceProgramData.binary.assign(clspvCtx.mOutputBinSize / sizeof(uint32_t),
923                                                         0);
924                         std::memcpy(deviceProgramData.binary.data(),
925                                     reinterpret_cast<char *>(clspvCtx.mOutputBin),
926                                     clspvCtx.mOutputBinSize);
927                         deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
928                     }
929                     break;
930                 }
931                 default:
932                     UNREACHABLE();
933                     return false;
934             }
935         }
936 
937         // Extract reflection info from spv binary and populate reflection data, as well as create
938         // the shader module
939         if (deviceProgramData.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE)
940         {
941             spvtools::SpirvTools spvTool(SPV_ENV_UNIVERSAL_1_5);
942             bool parseRet = spvTool.Parse(
943                 deviceProgramData.binary,
944                 [](const spv_endianness_t endianess, const spv_parsed_header_t &instruction) {
945                     return SPV_SUCCESS;
946                 },
947                 [&deviceProgramData](const spv_parsed_instruction_t &instruction) {
948                     return ParseReflection(deviceProgramData.reflectionData, instruction);
949                 });
950             if (!parseRet)
951             {
952                 ERR() << "Failed to parse reflection info from SPIR-V!";
953                 deviceProgramData.buildStatus = CL_BUILD_ERROR;
954                 return false;
955             }
956 
957             if (mShader)
958             {
959                 mShader.reset();
960             }
961             // Strip SPIR-V binary if Vk implementation does not support non-semantic info
962             angle::spirv::Blob spvBlob =
963                 !mContext->getFeatures().supportsShaderNonSemanticInfo.enabled
964                     ? stripReflection(&deviceProgramData)
965                     : deviceProgramData.binary;
966             ASSERT(!spvBlob.empty());
967             if (IsError(vk::InitShaderModule(mContext, &mShader, spvBlob.data(),
968                                              spvBlob.size() * sizeof(uint32_t))))
969             {
970                 ERR() << "Failed to init Vulkan Shader Module!";
971                 deviceProgramData.buildStatus = CL_BUILD_ERROR;
972                 return false;
973             }
974 
975             // Setup inital push constant range
976             uint32_t pushConstantMinOffet = UINT32_MAX, pushConstantMaxOffset = 0,
977                      pushConstantMaxSize = 0;
978             for (const auto &pushConstant : deviceProgramData.reflectionData.pushConstants)
979             {
980                 pushConstantMinOffet = pushConstant.second.offset < pushConstantMinOffet
981                                            ? pushConstant.second.offset
982                                            : pushConstantMinOffet;
983                 if (pushConstant.second.offset >= pushConstantMaxOffset)
984                 {
985                     pushConstantMaxOffset = pushConstant.second.offset;
986                     pushConstantMaxSize   = pushConstant.second.size;
987                 }
988             }
989             for (const auto &pushConstant : deviceProgramData.reflectionData.imagePushConstants)
990             {
991                 for (const auto imageConstant : pushConstant.second)
992                 {
993                     pushConstantMinOffet = imageConstant.pcRange.offset < pushConstantMinOffet
994                                                ? imageConstant.pcRange.offset
995                                                : pushConstantMinOffet;
996                     if (imageConstant.pcRange.offset >= pushConstantMaxOffset)
997                     {
998                         pushConstantMaxOffset = imageConstant.pcRange.offset;
999                         pushConstantMaxSize   = imageConstant.pcRange.size;
1000                     }
1001                 }
1002             }
1003             deviceProgramData.pushConstRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
1004             deviceProgramData.pushConstRange.offset =
1005                 pushConstantMinOffet == UINT32_MAX ? 0 : pushConstantMinOffet;
1006             deviceProgramData.pushConstRange.size = pushConstantMaxOffset + pushConstantMaxSize;
1007 
1008             if (kAngleDebug)
1009             {
1010                 if (mContext->getFeatures().clDumpVkSpirv.enabled)
1011                 {
1012                     angle::spirv::Print(deviceProgramData.binary);
1013                 }
1014             }
1015         }
1016         deviceProgramData.buildStatus = CL_BUILD_SUCCESS;
1017     }
1018     return true;
1019 }
1020 
stripReflection(const DeviceProgramData * deviceProgramData)1021 angle::spirv::Blob CLProgramVk::stripReflection(const DeviceProgramData *deviceProgramData)
1022 {
1023     angle::spirv::Blob binaryStripped;
1024     spvtools::Optimizer opt(SPV_ENV_UNIVERSAL_1_5);
1025     opt.RegisterPass(spvtools::CreateStripReflectInfoPass());
1026     spvtools::OptimizerOptions optOptions;
1027     optOptions.set_run_validator(false);
1028     if (!opt.Run(deviceProgramData->binary.data(), deviceProgramData->binary.size(),
1029                  &binaryStripped, optOptions))
1030     {
1031         ERR() << "Could not strip reflection data from binary!";
1032     }
1033     return binaryStripped;
1034 }
1035 
allocateDescriptorSet(const DescriptorSetIndex setIndex,const vk::DescriptorSetLayout & descriptorSetLayout,vk::CommandBufferHelperCommon * commandBuffer,vk::DescriptorSetPointer * descriptorSetOut)1036 angle::Result CLProgramVk::allocateDescriptorSet(const DescriptorSetIndex setIndex,
1037                                                  const vk::DescriptorSetLayout &descriptorSetLayout,
1038                                                  vk::CommandBufferHelperCommon *commandBuffer,
1039                                                  vk::DescriptorSetPointer *descriptorSetOut)
1040 {
1041     if (mDynamicDescriptorPools[setIndex])
1042     {
1043         ANGLE_CL_IMPL_TRY_ERROR(mDynamicDescriptorPools[setIndex]->allocateDescriptorSet(
1044                                     mContext, descriptorSetLayout, descriptorSetOut),
1045                                 CL_INVALID_OPERATION);
1046         commandBuffer->retainResource(descriptorSetOut->get());
1047     }
1048     return angle::Result::Continue;
1049 }
1050 
setBuildStatus(const cl::DevicePtrs & devices,cl_build_status status)1051 void CLProgramVk::setBuildStatus(const cl::DevicePtrs &devices, cl_build_status status)
1052 {
1053     std::scoped_lock<angle::SimpleMutex> sl(mProgramMutex);
1054 
1055     for (const auto &device : devices)
1056     {
1057         ASSERT(mAssociatedDevicePrograms.contains(device->getNative()));
1058         DeviceProgramData &deviceProgram = mAssociatedDevicePrograms.at(device->getNative());
1059         deviceProgram.buildStatus        = status;
1060     }
1061 }
1062 
getPrintfDescriptors(const std::string & kernelName) const1063 const angle::HashMap<uint32_t, ClspvPrintfInfo> *CLProgramVk::getPrintfDescriptors(
1064     const std::string &kernelName) const
1065 {
1066     const DeviceProgramData *deviceProgram = getDeviceProgramData(kernelName.c_str());
1067     if (deviceProgram)
1068     {
1069         return &deviceProgram->reflectionData.printfInfoMap;
1070     }
1071     return nullptr;
1072 }
1073 
1074 }  // namespace rx
1075