1 //
2 // Copyright 2021 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // CLProgramVk.cpp: Implements the class methods for CLProgramVk.
7
8 #include "libANGLE/renderer/vulkan/CLProgramVk.h"
9 #include "libANGLE/renderer/vulkan/CLContextVk.h"
10 #include "libANGLE/renderer/vulkan/CLDeviceVk.h"
11 #include "libANGLE/renderer/vulkan/clspv_utils.h"
12 #include "libANGLE/renderer/vulkan/vk_cache_utils.h"
13 #include "libANGLE/renderer/vulkan/vk_helpers.h"
14
15 #include "libANGLE/CLContext.h"
16 #include "libANGLE/CLKernel.h"
17 #include "libANGLE/CLProgram.h"
18 #include "libANGLE/cl_utils.h"
19
20 #include "common/log_utils.h"
21 #include "common/string_utils.h"
22 #include "common/system_utils.h"
23
24 #include "clspv/Compiler.h"
25
26 #include "spirv/unified1/NonSemanticClspvReflection.h"
27 #include "spirv/unified1/spirv.hpp"
28
29 #include "spirv-tools/libspirv.hpp"
30 #include "spirv-tools/optimizer.hpp"
31
32 namespace rx
33 {
34
35 namespace
36 {
37 #if defined(ANGLE_ENABLE_ASSERTS)
38 constexpr bool kAngleDebug = true;
39 #else
40 constexpr bool kAngleDebug = false;
41 #endif
42
43 // Used by SPIRV-Tools to parse reflection info
ParseReflection(CLProgramVk::SpvReflectionData & reflectionData,const spv_parsed_instruction_t & spvInstr)44 spv_result_t ParseReflection(CLProgramVk::SpvReflectionData &reflectionData,
45 const spv_parsed_instruction_t &spvInstr)
46 {
47 // Parse spir-v opcodes
48 switch (spvInstr.opcode)
49 {
50 // --- Clspv specific parsing for below cases ---
51 case spv::OpExtInst:
52 {
53 switch (spvInstr.words[4])
54 {
55 case NonSemanticClspvReflectionKernel:
56 {
57 // Extract kernel name and args - add to kernel args map
58 std::string functionName = reflectionData.spvStrLookup[spvInstr.words[6]];
59 uint32_t numArgs = reflectionData.spvIntLookup[spvInstr.words[7]];
60 reflectionData.kernelArgsMap[functionName] = CLKernelArguments();
61 reflectionData.kernelArgsMap[functionName].resize(numArgs);
62
63 // Store kernel flags and attributes
64 reflectionData.kernelFlags[functionName] =
65 reflectionData.spvIntLookup[spvInstr.words[8]];
66 reflectionData.kernelAttributes[functionName] =
67 reflectionData.spvStrLookup[spvInstr.words[9]];
68
69 // Save kernel name to reflection table for later use/lookup in parser routine
70 reflectionData.kernelIDs.insert(spvInstr.words[2]);
71 reflectionData.spvStrLookup[spvInstr.words[2]] = std::string(functionName);
72
73 // If we already parsed some args ahead of time, populate them now
74 if (reflectionData.kernelArgMap.contains(functionName))
75 {
76 for (const auto &arg : reflectionData.kernelArgMap)
77 {
78 uint32_t ordinal = arg.second.ordinal;
79 reflectionData.kernelArgsMap[functionName].at(ordinal) =
80 std::move(arg.second);
81 }
82 }
83 break;
84 }
85 case NonSemanticClspvReflectionArgumentInfo:
86 {
87 CLKernelVk::ArgInfo kernelArgInfo;
88 kernelArgInfo.name = reflectionData.spvStrLookup[spvInstr.words[5]];
89 // If instruction has more than 5 instruction operands (minus instruction
90 // name/opcode), that means we have arg qualifiers. ArgumentInfo also counts as
91 // an operand for OpExtInst. In below example, [ %e %f %g %h ] are the arg
92 // qualifier operands.
93 //
94 // %a = OpExtInst %b %c ArgumentInfo %d [ %e %f %g %h ]
95 if (spvInstr.num_operands > 5)
96 {
97 kernelArgInfo.typeName = reflectionData.spvStrLookup[spvInstr.words[6]];
98 kernelArgInfo.addressQualifier =
99 reflectionData.spvIntLookup[spvInstr.words[7]];
100 kernelArgInfo.accessQualifier =
101 reflectionData.spvIntLookup[spvInstr.words[8]];
102 kernelArgInfo.typeQualifier =
103 reflectionData.spvIntLookup[spvInstr.words[9]];
104 }
105 // Store kern arg for later lookup
106 reflectionData.kernelArgInfos[spvInstr.words[2]] = std::move(kernelArgInfo);
107 break;
108 }
109 case NonSemanticClspvReflectionArgumentPodUniform:
110 case NonSemanticClspvReflectionArgumentPointerUniform:
111 case NonSemanticClspvReflectionArgumentPodStorageBuffer:
112 {
113 CLKernelArgument kernelArg;
114 if (spvInstr.num_operands == 11)
115 {
116 const CLKernelVk::ArgInfo &kernelArgInfo =
117 reflectionData.kernelArgInfos[spvInstr.words[11]];
118 kernelArg.info.name = kernelArgInfo.name;
119 kernelArg.info.typeName = kernelArgInfo.typeName;
120 kernelArg.info.addressQualifier = kernelArgInfo.addressQualifier;
121 kernelArg.info.accessQualifier = kernelArgInfo.accessQualifier;
122 kernelArg.info.typeQualifier = kernelArgInfo.typeQualifier;
123 }
124 kernelArg.type = spvInstr.words[4];
125 kernelArg.used = true;
126 kernelArg.ordinal = reflectionData.spvIntLookup[spvInstr.words[6]];
127 kernelArg.op3 = reflectionData.spvIntLookup[spvInstr.words[7]];
128 kernelArg.op4 = reflectionData.spvIntLookup[spvInstr.words[8]];
129 kernelArg.op5 = reflectionData.spvIntLookup[spvInstr.words[9]];
130 kernelArg.op6 = reflectionData.spvIntLookup[spvInstr.words[10]];
131
132 if (reflectionData.kernelIDs.contains(spvInstr.words[5]))
133 {
134 CLKernelArguments &kernelArgs =
135 reflectionData
136 .kernelArgsMap[reflectionData.spvStrLookup[spvInstr.words[5]]];
137 kernelArgs.at(kernelArg.ordinal) = std::move(kernelArg);
138 }
139 else
140 {
141 // Reflection kernel not yet parsed, place in temp storage for now
142 reflectionData
143 .kernelArgMap[reflectionData.spvStrLookup[spvInstr.words[5]]] =
144 std::move(kernelArg);
145 }
146
147 break;
148 }
149 case NonSemanticClspvReflectionArgumentUniform:
150 case NonSemanticClspvReflectionArgumentWorkgroup:
151 case NonSemanticClspvReflectionArgumentSampler:
152 case NonSemanticClspvReflectionArgumentStorageImage:
153 case NonSemanticClspvReflectionArgumentSampledImage:
154 case NonSemanticClspvReflectionArgumentStorageBuffer:
155 case NonSemanticClspvReflectionArgumentStorageTexelBuffer:
156 case NonSemanticClspvReflectionArgumentUniformTexelBuffer:
157 case NonSemanticClspvReflectionArgumentPodPushConstant:
158 case NonSemanticClspvReflectionArgumentPointerPushConstant:
159 {
160 CLKernelArgument kernelArg;
161 if (spvInstr.num_operands == 9)
162 {
163 const CLKernelVk::ArgInfo &kernelArgInfo =
164 reflectionData.kernelArgInfos[spvInstr.words[9]];
165 kernelArg.info.name = kernelArgInfo.name;
166 kernelArg.info.typeName = kernelArgInfo.typeName;
167 kernelArg.info.addressQualifier = kernelArgInfo.addressQualifier;
168 kernelArg.info.accessQualifier = kernelArgInfo.accessQualifier;
169 kernelArg.info.typeQualifier = kernelArgInfo.typeQualifier;
170 }
171
172 kernelArg.type = spvInstr.words[4];
173 kernelArg.used = true;
174 kernelArg.ordinal = reflectionData.spvIntLookup[spvInstr.words[6]];
175 kernelArg.op3 = reflectionData.spvIntLookup[spvInstr.words[7]];
176 kernelArg.op4 = reflectionData.spvIntLookup[spvInstr.words[8]];
177
178 if (reflectionData.kernelIDs.contains(spvInstr.words[5]))
179 {
180 CLKernelArguments &kernelArgs =
181 reflectionData
182 .kernelArgsMap[reflectionData.spvStrLookup[spvInstr.words[5]]];
183 kernelArgs.at(kernelArg.ordinal) = std::move(kernelArg);
184 }
185 else
186 {
187 // Reflection kernel not yet parsed, place in temp storage for now
188 reflectionData
189 .kernelArgMap[reflectionData.spvStrLookup[spvInstr.words[5]]] =
190 std::move(kernelArg);
191 }
192 break;
193 }
194 case NonSemanticClspvReflectionPushConstantGlobalSize:
195 case NonSemanticClspvReflectionPushConstantGlobalOffset:
196 case NonSemanticClspvReflectionPushConstantRegionOffset:
197 case NonSemanticClspvReflectionPushConstantNumWorkgroups:
198 case NonSemanticClspvReflectionPushConstantRegionGroupOffset:
199 case NonSemanticClspvReflectionPushConstantEnqueuedLocalSize:
200 {
201 uint32_t offset = reflectionData.spvIntLookup[spvInstr.words[5]];
202 uint32_t size = reflectionData.spvIntLookup[spvInstr.words[6]];
203 reflectionData.pushConstants[spvInstr.words[4]] = {
204 .stageFlags = 0, .offset = offset, .size = size};
205 break;
206 }
207 case NonSemanticClspvReflectionSpecConstantWorkgroupSize:
208 {
209 reflectionData.specConstantIDs[SpecConstantType::WorkgroupSizeX] =
210 reflectionData.spvIntLookup[spvInstr.words[5]];
211 reflectionData.specConstantIDs[SpecConstantType::WorkgroupSizeY] =
212 reflectionData.spvIntLookup[spvInstr.words[6]];
213 reflectionData.specConstantIDs[SpecConstantType::WorkgroupSizeZ] =
214 reflectionData.spvIntLookup[spvInstr.words[7]];
215 reflectionData.specConstantsUsed[SpecConstantType::WorkgroupSizeX] = true;
216 reflectionData.specConstantsUsed[SpecConstantType::WorkgroupSizeY] = true;
217 reflectionData.specConstantsUsed[SpecConstantType::WorkgroupSizeZ] = true;
218 break;
219 }
220 case NonSemanticClspvReflectionPropertyRequiredWorkgroupSize:
221 {
222 reflectionData.kernelCompileWorkgroupSize
223 [reflectionData.spvStrLookup[spvInstr.words[5]]] = {
224 reflectionData.spvIntLookup[spvInstr.words[6]],
225 reflectionData.spvIntLookup[spvInstr.words[7]],
226 reflectionData.spvIntLookup[spvInstr.words[8]]};
227 break;
228 }
229 case NonSemanticClspvReflectionSpecConstantWorkDim:
230 {
231 reflectionData.specConstantIDs[SpecConstantType::WorkDimension] =
232 reflectionData.spvIntLookup[spvInstr.words[5]];
233 reflectionData.specConstantsUsed[SpecConstantType::WorkDimension] = true;
234 break;
235 }
236 case NonSemanticClspvReflectionSpecConstantGlobalOffset:
237 reflectionData.specConstantIDs[SpecConstantType::GlobalOffsetX] =
238 reflectionData.spvIntLookup[spvInstr.words[5]];
239 reflectionData.specConstantIDs[SpecConstantType::GlobalOffsetY] =
240 reflectionData.spvIntLookup[spvInstr.words[6]];
241 reflectionData.specConstantIDs[SpecConstantType::GlobalOffsetZ] =
242 reflectionData.spvIntLookup[spvInstr.words[7]];
243 reflectionData.specConstantsUsed[SpecConstantType::GlobalOffsetX] = true;
244 reflectionData.specConstantsUsed[SpecConstantType::GlobalOffsetY] = true;
245 reflectionData.specConstantsUsed[SpecConstantType::GlobalOffsetZ] = true;
246 break;
247 case NonSemanticClspvReflectionPrintfInfo:
248 {
249 // Info on the format string used in the builtin printf call in kernel
250 uint32_t printfID = reflectionData.spvIntLookup[spvInstr.words[5]];
251 std::string formatString = reflectionData.spvStrLookup[spvInstr.words[6]];
252 reflectionData.printfInfoMap[printfID].id = printfID;
253 reflectionData.printfInfoMap[printfID].formatSpecifier = formatString;
254 for (int i = 6; i < spvInstr.num_operands; i++)
255 {
256 uint16_t offset = spvInstr.operands[i].offset;
257 size_t size = reflectionData.spvIntLookup[spvInstr.words[offset]];
258 reflectionData.printfInfoMap[printfID].argSizes.push_back(
259 static_cast<uint32_t>(size));
260 }
261
262 break;
263 }
264 case NonSemanticClspvReflectionPrintfBufferStorageBuffer:
265 {
266 // Info about the printf storage buffer that contains the formatted content
267 uint32_t set = reflectionData.spvIntLookup[spvInstr.words[5]];
268 uint32_t binding = reflectionData.spvIntLookup[spvInstr.words[6]];
269 uint32_t size = reflectionData.spvIntLookup[spvInstr.words[7]];
270 reflectionData.printfBufferStorage = {set, binding, 0, size};
271 break;
272 }
273 case NonSemanticClspvReflectionPrintfBufferPointerPushConstant:
274 {
275 ERR() << "Shouldn't be here. Support of printf builtin function is enabled "
276 "through "
277 "PrintfBufferStorageBuffer. Check optins passed down to clspv";
278 UNREACHABLE();
279 return SPV_UNSUPPORTED;
280 }
281 case NonSemanticClspvReflectionNormalizedSamplerMaskPushConstant:
282 case NonSemanticClspvReflectionImageArgumentInfoChannelOrderPushConstant:
283 case NonSemanticClspvReflectionImageArgumentInfoChannelDataTypePushConstant:
284 {
285 uint32_t ordinal = reflectionData.spvIntLookup[spvInstr.words[6]];
286 uint32_t offset = reflectionData.spvIntLookup[spvInstr.words[7]];
287 uint32_t size = reflectionData.spvIntLookup[spvInstr.words[8]];
288 VkPushConstantRange pcRange = {.stageFlags = 0, .offset = offset, .size = size};
289 reflectionData.imagePushConstants[spvInstr.words[4]].push_back(
290 {.pcRange = pcRange, .ordinal = ordinal});
291 break;
292 }
293 default:
294 break;
295 }
296 break;
297 }
298 // --- Regular SPIR-V opcode parsing for below cases ---
299 case spv::OpString:
300 {
301 reflectionData.spvStrLookup[spvInstr.words[1]] =
302 reinterpret_cast<const char *>(&spvInstr.words[2]);
303 break;
304 }
305 case spv::OpConstant:
306 {
307 reflectionData.spvIntLookup[spvInstr.words[2]] = spvInstr.words[3];
308 break;
309 }
310 default:
311 break;
312 }
313 return SPV_SUCCESS;
314 }
315
ProcessBuildOptions(const std::vector<std::string> & optionTokens,CLProgramVk::BuildType buildType)316 std::string ProcessBuildOptions(const std::vector<std::string> &optionTokens,
317 CLProgramVk::BuildType buildType)
318 {
319 std::string processedOptions;
320
321 // Need to remove/replace options that are not 1-1 mapped to clspv
322 for (const std::string &optionToken : optionTokens)
323 {
324 if (optionToken == "-create-library" && buildType == CLProgramVk::BuildType::LINK)
325 {
326 processedOptions += " --output-format=bc";
327 continue;
328 }
329 processedOptions += " " + optionToken;
330 }
331
332 switch (buildType)
333 {
334 case CLProgramVk::BuildType::COMPILE:
335 processedOptions += " --output-format=bc";
336 break;
337 case CLProgramVk::BuildType::LINK:
338 processedOptions += " -x ir";
339 break;
340 default:
341 break;
342 }
343
344 return processedOptions;
345 }
346
347 } // namespace
348
operator ()()349 void CLAsyncBuildTask::operator()()
350 {
351 ANGLE_TRACE_EVENT0("gpu.angle", "CLProgramVk::buildInternal (async)");
352 CLProgramVk::ScopedProgramCallback spc(mNotify);
353 if (!mProgramVk->buildInternal(mDevices, mOptions, mInternalOptions, mBuildType,
354 mLinkProgramsList))
355 {
356 ERR() << "Async build failed for program (" << mProgramVk
357 << ")! Check the build status or build log for details.";
358 }
359 }
360
CLProgramVk(const cl::Program & program)361 CLProgramVk::CLProgramVk(const cl::Program &program)
362 : CLProgramImpl(program),
363 mContext(&program.getContext().getImpl<CLContextVk>()),
364 mAsyncBuildEvent(std::make_shared<angle::WaitableEventDone>())
365 {}
366
init()367 angle::Result CLProgramVk::init()
368 {
369 cl::DevicePtrs devices;
370 ANGLE_TRY(mContext->getDevices(&devices));
371
372 // The devices associated with the program object are the devices associated with context
373 for (const cl::DevicePtr &device : devices)
374 {
375 mAssociatedDevicePrograms[device->getNative()] = DeviceProgramData{};
376 }
377
378 return angle::Result::Continue;
379 }
380
init(const size_t * lengths,const unsigned char ** binaries,cl_int * binaryStatus)381 angle::Result CLProgramVk::init(const size_t *lengths,
382 const unsigned char **binaries,
383 cl_int *binaryStatus)
384 {
385 // The devices associated with program come from device_list param from
386 // clCreateProgramWithBinary
387 for (const cl::DevicePtr &device : mProgram.getDevices())
388 {
389 const unsigned char *binaryHandle = *binaries++;
390 size_t binarySize = *lengths++;
391
392 // Check for header
393 if (binarySize < sizeof(ProgramBinaryOutputHeader))
394 {
395 if (binaryStatus)
396 {
397 *binaryStatus++ = CL_INVALID_BINARY;
398 }
399 ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
400 }
401 binarySize -= sizeof(ProgramBinaryOutputHeader);
402
403 // Check for valid binary version from header
404 const ProgramBinaryOutputHeader *binaryHeader =
405 reinterpret_cast<const ProgramBinaryOutputHeader *>(binaryHandle);
406 if (binaryHeader == nullptr)
407 {
408 ERR() << "NULL binary header!";
409 if (binaryStatus)
410 {
411 *binaryStatus++ = CL_INVALID_BINARY;
412 }
413 ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
414 }
415 else if (binaryHeader->headerVersion < kBinaryVersion)
416 {
417 ERR() << "Binary version not compatible with runtime!";
418 if (binaryStatus)
419 {
420 *binaryStatus++ = CL_INVALID_BINARY;
421 }
422 ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
423 }
424 binaryHandle += sizeof(ProgramBinaryOutputHeader);
425
426 // See what kind of binary we have (i.e. SPIR-V or LLVM Bitcode)
427 // https://llvm.org/docs/BitCodeFormat.html#llvm-ir-magic-number
428 // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_magic_number
429 constexpr uint32_t LLVM_BC_MAGIC = 0xDEC04342;
430 constexpr uint32_t SPIRV_MAGIC = 0x07230203;
431 const uint32_t &firstWord = reinterpret_cast<const uint32_t *>(binaryHandle)[0];
432 bool isBC = firstWord == LLVM_BC_MAGIC;
433 bool isSPV = firstWord == SPIRV_MAGIC;
434 if (!isBC && !isSPV)
435 {
436 ERR() << "Binary is neither SPIR-V nor LLVM Bitcode!";
437 if (binaryStatus)
438 {
439 *binaryStatus++ = CL_INVALID_BINARY;
440 }
441 ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
442 }
443
444 // Add device binary to program
445 DeviceProgramData deviceBinary;
446 deviceBinary.binaryType = binaryHeader->binaryType;
447 deviceBinary.buildStatus = binaryHeader->buildStatus;
448 switch (deviceBinary.binaryType)
449 {
450 case CL_PROGRAM_BINARY_TYPE_EXECUTABLE:
451 deviceBinary.binary.assign(binarySize / sizeof(uint32_t), 0);
452 std::memcpy(deviceBinary.binary.data(), binaryHandle, binarySize);
453 break;
454 case CL_PROGRAM_BINARY_TYPE_LIBRARY:
455 case CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT:
456 deviceBinary.IR.assign(binarySize, 0);
457 std::memcpy(deviceBinary.IR.data(), binaryHandle, binarySize);
458 break;
459 default:
460 UNREACHABLE();
461 ERR() << "Invalid binary type!";
462 if (binaryStatus)
463 {
464 *binaryStatus++ = CL_INVALID_BINARY;
465 }
466 ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
467 }
468 mAssociatedDevicePrograms[device->getNative()] = std::move(deviceBinary);
469 if (binaryStatus)
470 {
471 *binaryStatus++ = CL_SUCCESS;
472 }
473 }
474
475 return angle::Result::Continue;
476 }
477
~CLProgramVk()478 CLProgramVk::~CLProgramVk()
479 {
480 for (vk::DynamicDescriptorPoolPointer &pool : mDynamicDescriptorPools)
481 {
482 pool.reset();
483 }
484 for (DescriptorSetIndex index : angle::AllEnums<DescriptorSetIndex>())
485 {
486 mMetaDescriptorPools[index].destroy(mContext->getRenderer());
487 }
488 }
489
build(const cl::DevicePtrs & devices,const char * options,cl::Program * notify)490 angle::Result CLProgramVk::build(const cl::DevicePtrs &devices,
491 const char *options,
492 cl::Program *notify)
493 {
494 BuildType buildType = !mProgram.getSource().empty() ? BuildType::BUILD : BuildType::BINARY;
495 const cl::DevicePtrs &devicePtrs = !devices.empty() ? devices : mProgram.getDevices();
496
497 setBuildStatus(devicePtrs, CL_BUILD_IN_PROGRESS);
498
499 if (notify)
500 {
501 mAsyncBuildEvent =
502 getPlatform()->postMultiThreadWorkerTask(std::make_shared<CLAsyncBuildTask>(
503 this, devicePtrs, std::string(options ? options : ""), "", buildType,
504 LinkProgramsList{}, notify));
505 ASSERT(mAsyncBuildEvent != nullptr);
506 }
507 else
508 {
509 if (!buildInternal(devicePtrs, std::string(options ? options : ""), "", buildType,
510 LinkProgramsList{}))
511 {
512 ANGLE_CL_RETURN_ERROR(CL_BUILD_PROGRAM_FAILURE);
513 }
514 }
515 return angle::Result::Continue;
516 }
517
compile(const cl::DevicePtrs & devices,const char * options,const cl::ProgramPtrs & inputHeaders,const char ** headerIncludeNames,cl::Program * notify)518 angle::Result CLProgramVk::compile(const cl::DevicePtrs &devices,
519 const char *options,
520 const cl::ProgramPtrs &inputHeaders,
521 const char **headerIncludeNames,
522 cl::Program *notify)
523 {
524 const cl::DevicePtrs &devicePtrs = !devices.empty() ? devices : mProgram.getDevices();
525
526 // Ensure OS temp dir is available
527 std::string internalCompileOpts;
528 Optional<std::string> tmpDir = angle::GetTempDirectory();
529 if (!tmpDir.valid())
530 {
531 ERR() << "Failed to open OS temp dir";
532 ANGLE_CL_RETURN_ERROR(CL_INVALID_OPERATION);
533 }
534 internalCompileOpts += inputHeaders.empty() ? "" : " -I" + tmpDir.value();
535
536 // Dump input headers to OS temp directory
537 for (size_t i = 0; i < inputHeaders.size(); ++i)
538 {
539 const std::string &inputHeaderSrc =
540 inputHeaders.at(i)->getImpl<CLProgramVk>().mProgram.getSource();
541 std::string headerFilePath(angle::ConcatenatePath(tmpDir.value(), headerIncludeNames[i]));
542
543 // Sanitize path so we can use "/" as universal path separator
544 angle::MakeForwardSlashThePathSeparator(headerFilePath);
545 size_t baseDirPos = headerFilePath.find_last_of("/");
546
547 // Ensure parent dir(s) exists
548 if (!angle::CreateDirectories(headerFilePath.substr(0, baseDirPos)))
549 {
550 ERR() << "Failed to create output path(s) for header(s)!";
551 ANGLE_CL_RETURN_ERROR(CL_INVALID_OPERATION);
552 }
553 writeFile(headerFilePath.c_str(), inputHeaderSrc.data(), inputHeaderSrc.size());
554 }
555
556 setBuildStatus(devicePtrs, CL_BUILD_IN_PROGRESS);
557
558 // Perform compile
559 if (notify)
560 {
561 mAsyncBuildEvent = mProgram.getContext().getPlatform().getMultiThreadPool()->postWorkerTask(
562 std::make_shared<CLAsyncBuildTask>(
563 this, devicePtrs, std::string(options ? options : ""), internalCompileOpts,
564 BuildType::COMPILE, LinkProgramsList{}, notify));
565 ASSERT(mAsyncBuildEvent != nullptr);
566 }
567 else
568 {
569 mAsyncBuildEvent = std::make_shared<angle::WaitableEventDone>();
570 if (!buildInternal(devicePtrs, std::string(options ? options : ""), internalCompileOpts,
571 BuildType::COMPILE, LinkProgramsList{}))
572 {
573 ANGLE_CL_RETURN_ERROR(CL_COMPILE_PROGRAM_FAILURE);
574 }
575 }
576
577 return angle::Result::Continue;
578 }
579
getInfo(cl::ProgramInfo name,size_t valueSize,void * value,size_t * valueSizeRet) const580 angle::Result CLProgramVk::getInfo(cl::ProgramInfo name,
581 size_t valueSize,
582 void *value,
583 size_t *valueSizeRet) const
584 {
585 cl_uint valUInt = 0u;
586 cl_bool valBool = CL_FALSE;
587 void *valPointer = nullptr;
588 const void *copyValue = nullptr;
589 size_t copySize = 0u;
590 unsigned char **outputBins = reinterpret_cast<unsigned char **>(value);
591 std::string kernelNamesList;
592 std::vector<size_t> vBinarySizes;
593
594 switch (name)
595 {
596 case cl::ProgramInfo::NumKernels:
597 for (const auto &deviceProgram : mAssociatedDevicePrograms)
598 {
599 valUInt += static_cast<decltype(valUInt)>(deviceProgram.second.numKernels());
600 }
601 copyValue = &valUInt;
602 copySize = sizeof(valUInt);
603 break;
604 case cl::ProgramInfo::BinarySizes:
605 {
606 for (const auto &deviceProgram : mAssociatedDevicePrograms)
607 {
608 vBinarySizes.push_back(
609 sizeof(ProgramBinaryOutputHeader) +
610 (deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE
611 ? deviceProgram.second.binary.size() * sizeof(uint32_t)
612 : deviceProgram.second.IR.size()));
613 }
614 valPointer = vBinarySizes.data();
615 copyValue = valPointer;
616 copySize = vBinarySizes.size() * sizeof(size_t);
617 break;
618 }
619 case cl::ProgramInfo::Binaries:
620 for (const auto &deviceProgram : mAssociatedDevicePrograms)
621 {
622 const void *bin =
623 deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE
624 ? reinterpret_cast<const void *>(deviceProgram.second.binary.data())
625 : reinterpret_cast<const void *>(deviceProgram.second.IR.data());
626 size_t binSize =
627 deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE
628 ? deviceProgram.second.binary.size() * sizeof(uint32_t)
629 : deviceProgram.second.IR.size();
630 ProgramBinaryOutputHeader header{.headerVersion = kBinaryVersion,
631 .binaryType = deviceProgram.second.binaryType,
632 .buildStatus = deviceProgram.second.buildStatus};
633
634 if (outputBins != nullptr)
635 {
636 if (*outputBins != nullptr)
637 {
638 std::memcpy(*outputBins, &header, sizeof(ProgramBinaryOutputHeader));
639 std::memcpy((*outputBins) + sizeof(ProgramBinaryOutputHeader), bin,
640 binSize);
641 }
642 outputBins++;
643 }
644
645 // Spec just wants pointer size here
646 copySize += sizeof(unsigned char *);
647 }
648 // We already copied the (headers + binaries) over - nothing else left to copy
649 copyValue = nullptr;
650 break;
651 case cl::ProgramInfo::KernelNames:
652 for (const auto &deviceProgram : mAssociatedDevicePrograms)
653 {
654 kernelNamesList = deviceProgram.second.getKernelNames();
655 }
656 valPointer = kernelNamesList.data();
657 copyValue = valPointer;
658 copySize = kernelNamesList.size() + 1;
659 break;
660 case cl::ProgramInfo::ScopeGlobalCtorsPresent:
661 case cl::ProgramInfo::ScopeGlobalDtorsPresent:
662 // These are deprecated by version 3.0 and are currently not supported
663 copyValue = &valBool;
664 copySize = sizeof(cl_bool);
665 break;
666 default:
667 UNREACHABLE();
668 }
669
670 if ((value != nullptr) && (copyValue != nullptr))
671 {
672 std::memcpy(value, copyValue, copySize);
673 }
674
675 if (valueSizeRet != nullptr)
676 {
677 *valueSizeRet = copySize;
678 }
679
680 return angle::Result::Continue;
681 }
682
getBuildInfo(const cl::Device & device,cl::ProgramBuildInfo name,size_t valueSize,void * value,size_t * valueSizeRet) const683 angle::Result CLProgramVk::getBuildInfo(const cl::Device &device,
684 cl::ProgramBuildInfo name,
685 size_t valueSize,
686 void *value,
687 size_t *valueSizeRet) const
688 {
689 cl_uint valUInt = 0;
690 cl_build_status valStatus = 0;
691 const void *copyValue = nullptr;
692 size_t copySize = 0;
693 const DeviceProgramData *deviceProgramData = getDeviceProgramData(device.getNative());
694
695 switch (name)
696 {
697 case cl::ProgramBuildInfo::Status:
698 valStatus = deviceProgramData->buildStatus;
699 copyValue = &valStatus;
700 copySize = sizeof(valStatus);
701 break;
702 case cl::ProgramBuildInfo::Log:
703 copyValue = deviceProgramData->buildLog.c_str();
704 copySize = deviceProgramData->buildLog.size() + 1;
705 break;
706 case cl::ProgramBuildInfo::Options:
707 copyValue = mProgramOpts.c_str();
708 copySize = mProgramOpts.size() + 1;
709 break;
710 case cl::ProgramBuildInfo::BinaryType:
711 valUInt = deviceProgramData->binaryType;
712 copyValue = &valUInt;
713 copySize = sizeof(valUInt);
714 break;
715 case cl::ProgramBuildInfo::GlobalVariableTotalSize:
716 // Returns 0 if device does not support program scope global variables.
717 valUInt = 0;
718 copyValue = &valUInt;
719 copySize = sizeof(valUInt);
720 break;
721 default:
722 UNREACHABLE();
723 }
724
725 if ((value != nullptr) && (copyValue != nullptr))
726 {
727 memcpy(value, copyValue, std::min(valueSize, copySize));
728 }
729
730 if (valueSizeRet != nullptr)
731 {
732 *valueSizeRet = copySize;
733 }
734
735 return angle::Result::Continue;
736 }
737
createKernel(const cl::Kernel & kernel,const char * name,CLKernelImpl::Ptr * kernelOut)738 angle::Result CLProgramVk::createKernel(const cl::Kernel &kernel,
739 const char *name,
740 CLKernelImpl::Ptr *kernelOut)
741 {
742 // Wait for the compile to finish
743 mAsyncBuildEvent->wait();
744
745 std::scoped_lock<angle::SimpleMutex> sl(mProgramMutex);
746 const auto devProgram = getDeviceProgramData(name);
747 ASSERT(devProgram != nullptr);
748
749 // Create kernel
750 CLKernelArguments kernelArgs = devProgram->getKernelArguments(name);
751 std::string kernelAttributes = devProgram->getKernelAttributes(name);
752 std::string kernelName = std::string(name ? name : "");
753 CLKernelVk::Ptr kernelImpl = CLKernelVk::Ptr(
754 new (std::nothrow) CLKernelVk(kernel, kernelName, kernelAttributes, kernelArgs));
755 if (kernelImpl == nullptr)
756 {
757 ERR() << "Could not create kernel obj!";
758 ANGLE_CL_RETURN_ERROR(CL_OUT_OF_HOST_MEMORY);
759 }
760
761 ANGLE_TRY(kernelImpl->init());
762 *kernelOut = std::move(kernelImpl);
763
764 return angle::Result::Continue;
765 }
766
createKernels(cl_uint numKernels,CLKernelImpl::CreateFuncs & createFuncs,cl_uint * numKernelsRet)767 angle::Result CLProgramVk::createKernels(cl_uint numKernels,
768 CLKernelImpl::CreateFuncs &createFuncs,
769 cl_uint *numKernelsRet)
770 {
771 size_t numDevKernels = 0;
772 for (const auto &dev : mAssociatedDevicePrograms)
773 {
774 numDevKernels += dev.second.numKernels();
775 }
776 if (numKernelsRet != nullptr)
777 {
778 *numKernelsRet = static_cast<cl_uint>(numDevKernels);
779 }
780
781 if (numKernels != 0)
782 {
783 for (const auto &dev : mAssociatedDevicePrograms)
784 {
785 for (const auto &kernArgMap : dev.second.getKernelArgsMap())
786 {
787 createFuncs.emplace_back([this, &kernArgMap](const cl::Kernel &kern) {
788 CLKernelImpl::Ptr implPtr = nullptr;
789 ANGLE_CL_IMPL_TRY(this->createKernel(kern, kernArgMap.first.c_str(), &implPtr));
790 return CLKernelImpl::Ptr(std::move(implPtr));
791 });
792 }
793 }
794 }
795 return angle::Result::Continue;
796 }
797
getDeviceProgramData(const _cl_device_id * device) const798 const CLProgramVk::DeviceProgramData *CLProgramVk::getDeviceProgramData(
799 const _cl_device_id *device) const
800 {
801 if (!mAssociatedDevicePrograms.contains(device))
802 {
803 WARN() << "Device (" << device << ") is not associated with program (" << this << ") !";
804 return nullptr;
805 }
806 return &mAssociatedDevicePrograms.at(device);
807 }
808
getDeviceProgramData(const char * kernelName) const809 const CLProgramVk::DeviceProgramData *CLProgramVk::getDeviceProgramData(
810 const char *kernelName) const
811 {
812 for (const auto &deviceProgram : mAssociatedDevicePrograms)
813 {
814 if (deviceProgram.second.containsKernel(kernelName))
815 {
816 return &deviceProgram.second;
817 }
818 }
819 WARN() << "Kernel name (" << kernelName << ") is not associated with program (" << this
820 << ") !";
821 return nullptr;
822 }
823
buildInternal(const cl::DevicePtrs & devices,std::string options,std::string internalOptions,BuildType buildType,const LinkProgramsList & LinkProgramsList)824 bool CLProgramVk::buildInternal(const cl::DevicePtrs &devices,
825 std::string options,
826 std::string internalOptions,
827 BuildType buildType,
828 const LinkProgramsList &LinkProgramsList)
829 {
830 std::scoped_lock<angle::SimpleMutex> sl(mProgramMutex);
831
832 // Cache original options string
833 mProgramOpts = options;
834
835 // Process options and append any other internal (required) options for clspv
836 std::vector<std::string> optionTokens;
837 angle::SplitStringAlongWhitespace(options + " " + internalOptions, &optionTokens);
838 const bool createLibrary = std::find(optionTokens.begin(), optionTokens.end(),
839 "-create-library") != optionTokens.end();
840 std::string processedOptions = ProcessBuildOptions(optionTokens, buildType);
841
842 // Build for each associated device
843 for (size_t i = 0; i < devices.size(); ++i)
844 {
845 const cl::RefPointer<cl::Device> &device = devices.at(i);
846 DeviceProgramData &deviceProgramData = mAssociatedDevicePrograms[device->getNative()];
847
848 // add clspv compiler options based on device features
849 processedOptions += ClspvGetCompilerOptions(&device->getImpl<CLDeviceVk>());
850
851 if (buildType != BuildType::BINARY)
852 {
853 // Invoke clspv
854 switch (buildType)
855 {
856 case BuildType::BUILD:
857 case BuildType::COMPILE:
858 {
859 ScopedClspvContext clspvCtx;
860 const char *clSrc = mProgram.getSource().c_str();
861 ClspvError clspvRet = clspvCompileFromSourcesString(
862 1, NULL, static_cast<const char **>(&clSrc), processedOptions.c_str(),
863 &clspvCtx.mOutputBin, &clspvCtx.mOutputBinSize, &clspvCtx.mOutputBuildLog);
864 deviceProgramData.buildLog =
865 clspvCtx.mOutputBuildLog != nullptr ? clspvCtx.mOutputBuildLog : "";
866 if (clspvRet != CLSPV_SUCCESS)
867 {
868 ERR() << "OpenCL build failed with: ClspvError(" << clspvRet << ")!";
869 deviceProgramData.buildStatus = CL_BUILD_ERROR;
870 return false;
871 }
872
873 if (buildType == BuildType::COMPILE)
874 {
875 deviceProgramData.IR.assign(clspvCtx.mOutputBinSize, 0);
876 std::memcpy(deviceProgramData.IR.data(), clspvCtx.mOutputBin,
877 clspvCtx.mOutputBinSize);
878 deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT;
879 }
880 else
881 {
882 deviceProgramData.binary.assign(clspvCtx.mOutputBinSize / sizeof(uint32_t),
883 0);
884 std::memcpy(deviceProgramData.binary.data(), clspvCtx.mOutputBin,
885 clspvCtx.mOutputBinSize);
886 deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
887 }
888 break;
889 }
890 case BuildType::LINK:
891 {
892 ScopedClspvContext clspvCtx;
893 std::vector<size_t> vSizes;
894 std::vector<const char *> vBins;
895 const LinkPrograms &linkPrograms = LinkProgramsList.at(i);
896 for (const CLProgramVk::DeviceProgramData *linkProgramData : linkPrograms)
897 {
898 vSizes.push_back(linkProgramData->IR.size());
899 vBins.push_back(linkProgramData->IR.data());
900 }
901 ClspvError clspvRet = clspvCompileFromSourcesString(
902 linkPrograms.size(), vSizes.data(), vBins.data(), processedOptions.c_str(),
903 &clspvCtx.mOutputBin, &clspvCtx.mOutputBinSize, &clspvCtx.mOutputBuildLog);
904 deviceProgramData.buildLog =
905 clspvCtx.mOutputBuildLog != nullptr ? clspvCtx.mOutputBuildLog : "";
906 if (clspvRet != CLSPV_SUCCESS)
907 {
908 ERR() << "OpenCL build failed with: ClspvError(" << clspvRet << ")!";
909 deviceProgramData.buildStatus = CL_BUILD_ERROR;
910 return false;
911 }
912
913 if (createLibrary)
914 {
915 deviceProgramData.IR.assign(clspvCtx.mOutputBinSize, 0);
916 std::memcpy(deviceProgramData.IR.data(), clspvCtx.mOutputBin,
917 clspvCtx.mOutputBinSize);
918 deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY;
919 }
920 else
921 {
922 deviceProgramData.binary.assign(clspvCtx.mOutputBinSize / sizeof(uint32_t),
923 0);
924 std::memcpy(deviceProgramData.binary.data(),
925 reinterpret_cast<char *>(clspvCtx.mOutputBin),
926 clspvCtx.mOutputBinSize);
927 deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
928 }
929 break;
930 }
931 default:
932 UNREACHABLE();
933 return false;
934 }
935 }
936
937 // Extract reflection info from spv binary and populate reflection data, as well as create
938 // the shader module
939 if (deviceProgramData.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE)
940 {
941 spvtools::SpirvTools spvTool(SPV_ENV_UNIVERSAL_1_5);
942 bool parseRet = spvTool.Parse(
943 deviceProgramData.binary,
944 [](const spv_endianness_t endianess, const spv_parsed_header_t &instruction) {
945 return SPV_SUCCESS;
946 },
947 [&deviceProgramData](const spv_parsed_instruction_t &instruction) {
948 return ParseReflection(deviceProgramData.reflectionData, instruction);
949 });
950 if (!parseRet)
951 {
952 ERR() << "Failed to parse reflection info from SPIR-V!";
953 deviceProgramData.buildStatus = CL_BUILD_ERROR;
954 return false;
955 }
956
957 if (mShader)
958 {
959 mShader.reset();
960 }
961 // Strip SPIR-V binary if Vk implementation does not support non-semantic info
962 angle::spirv::Blob spvBlob =
963 !mContext->getFeatures().supportsShaderNonSemanticInfo.enabled
964 ? stripReflection(&deviceProgramData)
965 : deviceProgramData.binary;
966 ASSERT(!spvBlob.empty());
967 if (IsError(vk::InitShaderModule(mContext, &mShader, spvBlob.data(),
968 spvBlob.size() * sizeof(uint32_t))))
969 {
970 ERR() << "Failed to init Vulkan Shader Module!";
971 deviceProgramData.buildStatus = CL_BUILD_ERROR;
972 return false;
973 }
974
975 // Setup inital push constant range
976 uint32_t pushConstantMinOffet = UINT32_MAX, pushConstantMaxOffset = 0,
977 pushConstantMaxSize = 0;
978 for (const auto &pushConstant : deviceProgramData.reflectionData.pushConstants)
979 {
980 pushConstantMinOffet = pushConstant.second.offset < pushConstantMinOffet
981 ? pushConstant.second.offset
982 : pushConstantMinOffet;
983 if (pushConstant.second.offset >= pushConstantMaxOffset)
984 {
985 pushConstantMaxOffset = pushConstant.second.offset;
986 pushConstantMaxSize = pushConstant.second.size;
987 }
988 }
989 for (const auto &pushConstant : deviceProgramData.reflectionData.imagePushConstants)
990 {
991 for (const auto imageConstant : pushConstant.second)
992 {
993 pushConstantMinOffet = imageConstant.pcRange.offset < pushConstantMinOffet
994 ? imageConstant.pcRange.offset
995 : pushConstantMinOffet;
996 if (imageConstant.pcRange.offset >= pushConstantMaxOffset)
997 {
998 pushConstantMaxOffset = imageConstant.pcRange.offset;
999 pushConstantMaxSize = imageConstant.pcRange.size;
1000 }
1001 }
1002 }
1003 deviceProgramData.pushConstRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
1004 deviceProgramData.pushConstRange.offset =
1005 pushConstantMinOffet == UINT32_MAX ? 0 : pushConstantMinOffet;
1006 deviceProgramData.pushConstRange.size = pushConstantMaxOffset + pushConstantMaxSize;
1007
1008 if (kAngleDebug)
1009 {
1010 if (mContext->getFeatures().clDumpVkSpirv.enabled)
1011 {
1012 angle::spirv::Print(deviceProgramData.binary);
1013 }
1014 }
1015 }
1016 deviceProgramData.buildStatus = CL_BUILD_SUCCESS;
1017 }
1018 return true;
1019 }
1020
stripReflection(const DeviceProgramData * deviceProgramData)1021 angle::spirv::Blob CLProgramVk::stripReflection(const DeviceProgramData *deviceProgramData)
1022 {
1023 angle::spirv::Blob binaryStripped;
1024 spvtools::Optimizer opt(SPV_ENV_UNIVERSAL_1_5);
1025 opt.RegisterPass(spvtools::CreateStripReflectInfoPass());
1026 spvtools::OptimizerOptions optOptions;
1027 optOptions.set_run_validator(false);
1028 if (!opt.Run(deviceProgramData->binary.data(), deviceProgramData->binary.size(),
1029 &binaryStripped, optOptions))
1030 {
1031 ERR() << "Could not strip reflection data from binary!";
1032 }
1033 return binaryStripped;
1034 }
1035
allocateDescriptorSet(const DescriptorSetIndex setIndex,const vk::DescriptorSetLayout & descriptorSetLayout,vk::CommandBufferHelperCommon * commandBuffer,vk::DescriptorSetPointer * descriptorSetOut)1036 angle::Result CLProgramVk::allocateDescriptorSet(const DescriptorSetIndex setIndex,
1037 const vk::DescriptorSetLayout &descriptorSetLayout,
1038 vk::CommandBufferHelperCommon *commandBuffer,
1039 vk::DescriptorSetPointer *descriptorSetOut)
1040 {
1041 if (mDynamicDescriptorPools[setIndex])
1042 {
1043 ANGLE_CL_IMPL_TRY_ERROR(mDynamicDescriptorPools[setIndex]->allocateDescriptorSet(
1044 mContext, descriptorSetLayout, descriptorSetOut),
1045 CL_INVALID_OPERATION);
1046 commandBuffer->retainResource(descriptorSetOut->get());
1047 }
1048 return angle::Result::Continue;
1049 }
1050
setBuildStatus(const cl::DevicePtrs & devices,cl_build_status status)1051 void CLProgramVk::setBuildStatus(const cl::DevicePtrs &devices, cl_build_status status)
1052 {
1053 std::scoped_lock<angle::SimpleMutex> sl(mProgramMutex);
1054
1055 for (const auto &device : devices)
1056 {
1057 ASSERT(mAssociatedDevicePrograms.contains(device->getNative()));
1058 DeviceProgramData &deviceProgram = mAssociatedDevicePrograms.at(device->getNative());
1059 deviceProgram.buildStatus = status;
1060 }
1061 }
1062
getPrintfDescriptors(const std::string & kernelName) const1063 const angle::HashMap<uint32_t, ClspvPrintfInfo> *CLProgramVk::getPrintfDescriptors(
1064 const std::string &kernelName) const
1065 {
1066 const DeviceProgramData *deviceProgram = getDeviceProgramData(kernelName.c_str());
1067 if (deviceProgram)
1068 {
1069 return &deviceProgram->reflectionData.printfInfoMap;
1070 }
1071 return nullptr;
1072 }
1073
1074 } // namespace rx
1075