1 //
2 // Copyright 2021 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // CLDeviceVk.cpp: Implements the class methods for CLDeviceVk.
7
8 #include "libANGLE/renderer/vulkan/CLDeviceVk.h"
9 #include "libANGLE/renderer/vulkan/CLPlatformVk.h"
10 #include "libANGLE/renderer/vulkan/vk_renderer.h"
11
12 #include "libANGLE/renderer/cl_types.h"
13
14 #include "libANGLE/Display.h"
15 #include "libANGLE/cl_utils.h"
16
17 namespace rx
18 {
19
CLDeviceVk(const cl::Device & device,vk::Renderer * renderer)20 CLDeviceVk::CLDeviceVk(const cl::Device &device, vk::Renderer *renderer)
21 : CLDeviceImpl(device), mRenderer(renderer)
22 {
23 const VkPhysicalDeviceProperties &props = mRenderer->getPhysicalDeviceProperties();
24
25 // Setup initial device mInfo fields
26 // TODO(aannestrand) Create cl::Caps and use for device creation
27 // http://anglebug.com/42266954
28 mInfoString = {
29 {cl::DeviceInfo::Name, std::string(props.deviceName)},
30 {cl::DeviceInfo::Vendor, mRenderer->getVendorString()},
31 {cl::DeviceInfo::DriverVersion, mRenderer->getVersionString(true)},
32 {cl::DeviceInfo::Version, std::string("OpenCL 3.0 " + mRenderer->getVersionString(true))},
33 {cl::DeviceInfo::Profile, std::string("FULL_PROFILE")},
34 {cl::DeviceInfo::OpenCL_C_Version, std::string("OpenCL C 1.2 ")},
35 {cl::DeviceInfo::LatestConformanceVersionPassed, std::string("FIXME")}};
36 mInfoSizeT = {
37 {cl::DeviceInfo::MaxWorkGroupSize, props.limits.maxComputeWorkGroupInvocations},
38 {cl::DeviceInfo::MaxGlobalVariableSize, 0},
39 {cl::DeviceInfo::GlobalVariablePreferredTotalSize, 0},
40
41 // TODO(aannestrand) Update these hardcoded platform/device queries
42 // http://anglebug.com/42266935
43 {cl::DeviceInfo::MaxParameterSize, 1024},
44 {cl::DeviceInfo::ProfilingTimerResolution, 1},
45 {cl::DeviceInfo::PrintfBufferSize, 1024 * 1024},
46 {cl::DeviceInfo::PreferredWorkGroupSizeMultiple, 16},
47 };
48 mInfoULong = {
49 {cl::DeviceInfo::LocalMemSize, props.limits.maxComputeSharedMemorySize},
50 {cl::DeviceInfo::SVM_Capabilities, 0},
51 {cl::DeviceInfo::QueueOnDeviceProperties, 0},
52 {cl::DeviceInfo::PartitionAffinityDomain, 0},
53 {cl::DeviceInfo::DeviceEnqueueCapabilities, 0},
54 {cl::DeviceInfo::QueueOnHostProperties, CL_QUEUE_PROFILING_ENABLE},
55
56 // TODO(aannestrand) Update these hardcoded platform/device queries
57 // http://anglebug.com/42266935
58 {cl::DeviceInfo::HalfFpConfig, 0},
59 {cl::DeviceInfo::DoubleFpConfig, 0},
60 {cl::DeviceInfo::GlobalMemCacheSize, 0},
61 {cl::DeviceInfo::GlobalMemSize, 1024 * 1024 * 1024},
62 {cl::DeviceInfo::MaxConstantBufferSize, 64 * 1024},
63 {cl::DeviceInfo::SingleFpConfig, CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN | CL_FP_FMA},
64 {cl::DeviceInfo::AtomicMemoryCapabilities,
65 CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP},
66 // TODO (http://anglebug.com/379669750) Add these based on the Vulkan features query
67 {cl::DeviceInfo::AtomicFenceCapabilities, CL_DEVICE_ATOMIC_ORDER_RELAXED |
68 CL_DEVICE_ATOMIC_ORDER_ACQ_REL |
69 CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP |
70 // non-mandatory
71 CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM},
72 };
73 mInfoUInt = {
74 {cl::DeviceInfo::VendorID, props.vendorID},
75 {cl::DeviceInfo::MaxReadImageArgs, props.limits.maxPerStageDescriptorSampledImages},
76 {cl::DeviceInfo::MaxWriteImageArgs, props.limits.maxPerStageDescriptorStorageImages},
77 {cl::DeviceInfo::MaxReadWriteImageArgs, props.limits.maxPerStageDescriptorStorageImages},
78 {cl::DeviceInfo::GlobalMemCachelineSize,
79 static_cast<cl_uint>(props.limits.nonCoherentAtomSize)},
80 {cl::DeviceInfo::Available, CL_TRUE},
81 {cl::DeviceInfo::LinkerAvailable, CL_TRUE},
82 {cl::DeviceInfo::CompilerAvailable, CL_TRUE},
83 {cl::DeviceInfo::MaxOnDeviceQueues, 0},
84 {cl::DeviceInfo::MaxOnDeviceEvents, 0},
85 {cl::DeviceInfo::QueueOnDeviceMaxSize, 0},
86 {cl::DeviceInfo::QueueOnDevicePreferredSize, 0},
87 {cl::DeviceInfo::MaxPipeArgs, 0},
88 {cl::DeviceInfo::PipeMaxPacketSize, 0},
89 {cl::DeviceInfo::PipeSupport, CL_FALSE},
90 {cl::DeviceInfo::PipeMaxActiveReservations, 0},
91 {cl::DeviceInfo::ErrorCorrectionSupport, CL_FALSE},
92 {cl::DeviceInfo::PreferredInteropUserSync, CL_TRUE},
93 {cl::DeviceInfo::ExecutionCapabilities, CL_EXEC_KERNEL},
94
95 // TODO(aannestrand) Update these hardcoded platform/device queries
96 // http://anglebug.com/42266935
97 {cl::DeviceInfo::AddressBits, 32},
98 {cl::DeviceInfo::EndianLittle, CL_TRUE},
99 {cl::DeviceInfo::LocalMemType, CL_LOCAL},
100 // TODO (http://anglebug.com/379669750) Vulkan reports a big sampler count number, we dont
101 // need that many and set it to minimum req for now.
102 {cl::DeviceInfo::MaxSamplers, 16u},
103 {cl::DeviceInfo::MaxConstantArgs, 8},
104 {cl::DeviceInfo::MaxNumSubGroups, 0},
105 {cl::DeviceInfo::MaxComputeUnits, 4},
106 {cl::DeviceInfo::MaxClockFrequency, 555},
107 {cl::DeviceInfo::MaxWorkItemDimensions, 3},
108 {cl::DeviceInfo::MinDataTypeAlignSize, 128},
109 {cl::DeviceInfo::GlobalMemCacheType, CL_NONE},
110 {cl::DeviceInfo::HostUnifiedMemory, CL_TRUE},
111 {cl::DeviceInfo::NativeVectorWidthChar, 4},
112 {cl::DeviceInfo::NativeVectorWidthShort, 2},
113 {cl::DeviceInfo::NativeVectorWidthInt, 1},
114 {cl::DeviceInfo::NativeVectorWidthLong, 1},
115 {cl::DeviceInfo::NativeVectorWidthFloat, 1},
116 {cl::DeviceInfo::NativeVectorWidthDouble, 1},
117 {cl::DeviceInfo::NativeVectorWidthHalf, 0},
118 {cl::DeviceInfo::PartitionMaxSubDevices, 0},
119 {cl::DeviceInfo::PreferredVectorWidthInt, 1},
120 {cl::DeviceInfo::PreferredVectorWidthLong, 1},
121 {cl::DeviceInfo::PreferredVectorWidthChar, 4},
122 {cl::DeviceInfo::PreferredVectorWidthHalf, 0},
123 {cl::DeviceInfo::PreferredVectorWidthShort, 2},
124 {cl::DeviceInfo::PreferredVectorWidthFloat, 1},
125 {cl::DeviceInfo::PreferredVectorWidthDouble, 0},
126 {cl::DeviceInfo::PreferredLocalAtomicAlignment, 0},
127 {cl::DeviceInfo::PreferredGlobalAtomicAlignment, 0},
128 {cl::DeviceInfo::PreferredPlatformAtomicAlignment, 0},
129 {cl::DeviceInfo::NonUniformWorkGroupSupport, CL_FALSE},
130 {cl::DeviceInfo::GenericAddressSpaceSupport, CL_FALSE},
131 {cl::DeviceInfo::SubGroupIndependentForwardProgress, CL_FALSE},
132 {cl::DeviceInfo::WorkGroupCollectiveFunctionsSupport, CL_FALSE},
133 };
134 }
135
136 CLDeviceVk::~CLDeviceVk() = default;
137
createInfo(cl::DeviceType type) const138 CLDeviceImpl::Info CLDeviceVk::createInfo(cl::DeviceType type) const
139 {
140 Info info(type);
141
142 const VkPhysicalDeviceProperties &properties = mRenderer->getPhysicalDeviceProperties();
143
144 info.maxWorkItemSizes.push_back(properties.limits.maxComputeWorkGroupSize[0]);
145 info.maxWorkItemSizes.push_back(properties.limits.maxComputeWorkGroupSize[1]);
146 info.maxWorkItemSizes.push_back(properties.limits.maxComputeWorkGroupSize[2]);
147
148 // TODO(aannestrand) Update these hardcoded platform/device queries
149 // http://anglebug.com/42266935
150 info.maxMemAllocSize = 1 << 30;
151 info.memBaseAddrAlign = 1024;
152
153 info.imageSupport = CL_TRUE;
154
155 info.image2D_MaxWidth = properties.limits.maxImageDimension2D;
156 info.image2D_MaxHeight = properties.limits.maxImageDimension2D;
157 info.image3D_MaxWidth = properties.limits.maxImageDimension3D;
158 info.image3D_MaxHeight = properties.limits.maxImageDimension3D;
159 info.image3D_MaxDepth = properties.limits.maxImageDimension3D;
160 // TODO (http://anglebug.com/379669750) For now set it minimum requirement.
161 info.imageMaxBufferSize = 65536;
162 info.imageMaxArraySize = properties.limits.maxImageArrayLayers;
163 info.imagePitchAlignment = 0u;
164 info.imageBaseAddressAlignment = 0u;
165
166 info.execCapabilities = CL_EXEC_KERNEL;
167 info.queueOnDeviceMaxSize = 0u;
168 info.builtInKernels = "";
169 info.version = CL_MAKE_VERSION(3, 0, 0);
170 info.versionStr = "OpenCL 3.0 " + mRenderer->getVersionString(true);
171 info.OpenCL_C_AllVersions = {{CL_MAKE_VERSION(1, 0, 0), "OpenCL C"},
172 {CL_MAKE_VERSION(1, 1, 0), "OpenCL C"},
173 {CL_MAKE_VERSION(1, 2, 0), "OpenCL C"},
174 {CL_MAKE_VERSION(3, 0, 0), "OpenCL C"}};
175
176 info.OpenCL_C_Features = {};
177 info.ILsWithVersion = {};
178 info.builtInKernelsWithVersion = {};
179 info.partitionProperties = {};
180 info.partitionType = {};
181 info.IL_Version = "";
182
183 // Below extensions are required as of OpenCL 1.1, add their versioned strings
184 NameVersionVector versionedExtensionList = {
185 // Below extensions are required as of OpenCL 1.1
186 cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
187 .name = "cl_khr_byte_addressable_store"},
188 cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
189 .name = "cl_khr_global_int32_base_atomics"},
190 cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
191 .name = "cl_khr_global_int32_extended_atomics"},
192 cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
193 .name = "cl_khr_local_int32_base_atomics"},
194 cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
195 .name = "cl_khr_local_int32_extended_atomics"},
196 };
197 info.initializeVersionedExtensions(std::move(versionedExtensionList));
198
199 return info;
200 }
201
getInfoUInt(cl::DeviceInfo name,cl_uint * value) const202 angle::Result CLDeviceVk::getInfoUInt(cl::DeviceInfo name, cl_uint *value) const
203 {
204 if (mInfoUInt.count(name))
205 {
206 *value = mInfoUInt.at(name);
207 return angle::Result::Continue;
208 }
209 ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
210 }
211
getInfoULong(cl::DeviceInfo name,cl_ulong * value) const212 angle::Result CLDeviceVk::getInfoULong(cl::DeviceInfo name, cl_ulong *value) const
213 {
214 if (mInfoULong.count(name))
215 {
216 *value = mInfoULong.at(name);
217 return angle::Result::Continue;
218 }
219 ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
220 }
221
getInfoSizeT(cl::DeviceInfo name,size_t * value) const222 angle::Result CLDeviceVk::getInfoSizeT(cl::DeviceInfo name, size_t *value) const
223 {
224 if (mInfoSizeT.count(name))
225 {
226 *value = mInfoSizeT.at(name);
227 return angle::Result::Continue;
228 }
229 ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
230 }
231
getInfoStringLength(cl::DeviceInfo name,size_t * value) const232 angle::Result CLDeviceVk::getInfoStringLength(cl::DeviceInfo name, size_t *value) const
233 {
234 if (mInfoString.count(name))
235 {
236 *value = mInfoString.at(name).length() + 1;
237 return angle::Result::Continue;
238 }
239 ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
240 }
241
getInfoString(cl::DeviceInfo name,size_t size,char * value) const242 angle::Result CLDeviceVk::getInfoString(cl::DeviceInfo name, size_t size, char *value) const
243 {
244 if (mInfoString.count(name))
245 {
246 std::strcpy(value, mInfoString.at(name).c_str());
247 return angle::Result::Continue;
248 }
249 ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
250 }
251
createSubDevices(const cl_device_partition_property * properties,cl_uint numDevices,CreateFuncs & subDevices,cl_uint * numDevicesRet)252 angle::Result CLDeviceVk::createSubDevices(const cl_device_partition_property *properties,
253 cl_uint numDevices,
254 CreateFuncs &subDevices,
255 cl_uint *numDevicesRet)
256 {
257 UNIMPLEMENTED();
258 ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
259 }
260
selectWorkGroupSize(const cl::NDRange & ndrange) const261 cl::WorkgroupSize CLDeviceVk::selectWorkGroupSize(const cl::NDRange &ndrange) const
262 {
263 // Limit total work-group size to the Vulkan device's limit
264 const VkPhysicalDeviceProperties &props = mRenderer->getPhysicalDeviceProperties();
265 uint32_t maxSize = static_cast<uint32_t>(mInfoSizeT.at(cl::DeviceInfo::MaxWorkGroupSize));
266 maxSize = std::min(maxSize, 64u);
267
268 bool keepIncreasing = false;
269 cl::WorkgroupSize localSize = {1, 1, 1};
270 do
271 {
272 keepIncreasing = false;
273 for (cl_uint i = 0; i < ndrange.workDimensions; i++)
274 {
275 cl::WorkgroupSize newLocalSize = localSize;
276 newLocalSize[i] *= 2;
277
278 // TODO: Add support for non-uniform WGS
279 // http://anglebug.com/42267067
280 if (ndrange.globalWorkSize[i] % newLocalSize[i] == 0 &&
281 newLocalSize[i] <= props.limits.maxComputeWorkGroupCount[i] &&
282 newLocalSize[0] * newLocalSize[1] * newLocalSize[2] <= maxSize)
283 {
284 localSize = newLocalSize;
285 keepIncreasing = true;
286 }
287 }
288 } while (keepIncreasing);
289
290 return localSize;
291 }
292
293 } // namespace rx
294