xref: /aosp_15_r20/external/angle/src/libANGLE/renderer/vulkan/CLDeviceVk.cpp (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1 //
2 // Copyright 2021 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // CLDeviceVk.cpp: Implements the class methods for CLDeviceVk.
7 
8 #include "libANGLE/renderer/vulkan/CLDeviceVk.h"
9 #include "libANGLE/renderer/vulkan/CLPlatformVk.h"
10 #include "libANGLE/renderer/vulkan/vk_renderer.h"
11 
12 #include "libANGLE/renderer/cl_types.h"
13 
14 #include "libANGLE/Display.h"
15 #include "libANGLE/cl_utils.h"
16 
17 namespace rx
18 {
19 
CLDeviceVk(const cl::Device & device,vk::Renderer * renderer)20 CLDeviceVk::CLDeviceVk(const cl::Device &device, vk::Renderer *renderer)
21     : CLDeviceImpl(device), mRenderer(renderer)
22 {
23     const VkPhysicalDeviceProperties &props = mRenderer->getPhysicalDeviceProperties();
24 
25     // Setup initial device mInfo fields
26     // TODO(aannestrand) Create cl::Caps and use for device creation
27     // http://anglebug.com/42266954
28     mInfoString = {
29         {cl::DeviceInfo::Name, std::string(props.deviceName)},
30         {cl::DeviceInfo::Vendor, mRenderer->getVendorString()},
31         {cl::DeviceInfo::DriverVersion, mRenderer->getVersionString(true)},
32         {cl::DeviceInfo::Version, std::string("OpenCL 3.0 " + mRenderer->getVersionString(true))},
33         {cl::DeviceInfo::Profile, std::string("FULL_PROFILE")},
34         {cl::DeviceInfo::OpenCL_C_Version, std::string("OpenCL C 1.2 ")},
35         {cl::DeviceInfo::LatestConformanceVersionPassed, std::string("FIXME")}};
36     mInfoSizeT = {
37         {cl::DeviceInfo::MaxWorkGroupSize, props.limits.maxComputeWorkGroupInvocations},
38         {cl::DeviceInfo::MaxGlobalVariableSize, 0},
39         {cl::DeviceInfo::GlobalVariablePreferredTotalSize, 0},
40 
41         // TODO(aannestrand) Update these hardcoded platform/device queries
42         // http://anglebug.com/42266935
43         {cl::DeviceInfo::MaxParameterSize, 1024},
44         {cl::DeviceInfo::ProfilingTimerResolution, 1},
45         {cl::DeviceInfo::PrintfBufferSize, 1024 * 1024},
46         {cl::DeviceInfo::PreferredWorkGroupSizeMultiple, 16},
47     };
48     mInfoULong = {
49         {cl::DeviceInfo::LocalMemSize, props.limits.maxComputeSharedMemorySize},
50         {cl::DeviceInfo::SVM_Capabilities, 0},
51         {cl::DeviceInfo::QueueOnDeviceProperties, 0},
52         {cl::DeviceInfo::PartitionAffinityDomain, 0},
53         {cl::DeviceInfo::DeviceEnqueueCapabilities, 0},
54         {cl::DeviceInfo::QueueOnHostProperties, CL_QUEUE_PROFILING_ENABLE},
55 
56         // TODO(aannestrand) Update these hardcoded platform/device queries
57         // http://anglebug.com/42266935
58         {cl::DeviceInfo::HalfFpConfig, 0},
59         {cl::DeviceInfo::DoubleFpConfig, 0},
60         {cl::DeviceInfo::GlobalMemCacheSize, 0},
61         {cl::DeviceInfo::GlobalMemSize, 1024 * 1024 * 1024},
62         {cl::DeviceInfo::MaxConstantBufferSize, 64 * 1024},
63         {cl::DeviceInfo::SingleFpConfig, CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN | CL_FP_FMA},
64         {cl::DeviceInfo::AtomicMemoryCapabilities,
65          CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP},
66         // TODO (http://anglebug.com/379669750) Add these based on the Vulkan features query
67         {cl::DeviceInfo::AtomicFenceCapabilities, CL_DEVICE_ATOMIC_ORDER_RELAXED |
68                                                       CL_DEVICE_ATOMIC_ORDER_ACQ_REL |
69                                                       CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP |
70                                                       // non-mandatory
71                                                       CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM},
72     };
73     mInfoUInt = {
74         {cl::DeviceInfo::VendorID, props.vendorID},
75         {cl::DeviceInfo::MaxReadImageArgs, props.limits.maxPerStageDescriptorSampledImages},
76         {cl::DeviceInfo::MaxWriteImageArgs, props.limits.maxPerStageDescriptorStorageImages},
77         {cl::DeviceInfo::MaxReadWriteImageArgs, props.limits.maxPerStageDescriptorStorageImages},
78         {cl::DeviceInfo::GlobalMemCachelineSize,
79          static_cast<cl_uint>(props.limits.nonCoherentAtomSize)},
80         {cl::DeviceInfo::Available, CL_TRUE},
81         {cl::DeviceInfo::LinkerAvailable, CL_TRUE},
82         {cl::DeviceInfo::CompilerAvailable, CL_TRUE},
83         {cl::DeviceInfo::MaxOnDeviceQueues, 0},
84         {cl::DeviceInfo::MaxOnDeviceEvents, 0},
85         {cl::DeviceInfo::QueueOnDeviceMaxSize, 0},
86         {cl::DeviceInfo::QueueOnDevicePreferredSize, 0},
87         {cl::DeviceInfo::MaxPipeArgs, 0},
88         {cl::DeviceInfo::PipeMaxPacketSize, 0},
89         {cl::DeviceInfo::PipeSupport, CL_FALSE},
90         {cl::DeviceInfo::PipeMaxActiveReservations, 0},
91         {cl::DeviceInfo::ErrorCorrectionSupport, CL_FALSE},
92         {cl::DeviceInfo::PreferredInteropUserSync, CL_TRUE},
93         {cl::DeviceInfo::ExecutionCapabilities, CL_EXEC_KERNEL},
94 
95         // TODO(aannestrand) Update these hardcoded platform/device queries
96         // http://anglebug.com/42266935
97         {cl::DeviceInfo::AddressBits, 32},
98         {cl::DeviceInfo::EndianLittle, CL_TRUE},
99         {cl::DeviceInfo::LocalMemType, CL_LOCAL},
100         // TODO (http://anglebug.com/379669750) Vulkan reports a big sampler count number, we dont
101         // need that many and set it to minimum req for now.
102         {cl::DeviceInfo::MaxSamplers, 16u},
103         {cl::DeviceInfo::MaxConstantArgs, 8},
104         {cl::DeviceInfo::MaxNumSubGroups, 0},
105         {cl::DeviceInfo::MaxComputeUnits, 4},
106         {cl::DeviceInfo::MaxClockFrequency, 555},
107         {cl::DeviceInfo::MaxWorkItemDimensions, 3},
108         {cl::DeviceInfo::MinDataTypeAlignSize, 128},
109         {cl::DeviceInfo::GlobalMemCacheType, CL_NONE},
110         {cl::DeviceInfo::HostUnifiedMemory, CL_TRUE},
111         {cl::DeviceInfo::NativeVectorWidthChar, 4},
112         {cl::DeviceInfo::NativeVectorWidthShort, 2},
113         {cl::DeviceInfo::NativeVectorWidthInt, 1},
114         {cl::DeviceInfo::NativeVectorWidthLong, 1},
115         {cl::DeviceInfo::NativeVectorWidthFloat, 1},
116         {cl::DeviceInfo::NativeVectorWidthDouble, 1},
117         {cl::DeviceInfo::NativeVectorWidthHalf, 0},
118         {cl::DeviceInfo::PartitionMaxSubDevices, 0},
119         {cl::DeviceInfo::PreferredVectorWidthInt, 1},
120         {cl::DeviceInfo::PreferredVectorWidthLong, 1},
121         {cl::DeviceInfo::PreferredVectorWidthChar, 4},
122         {cl::DeviceInfo::PreferredVectorWidthHalf, 0},
123         {cl::DeviceInfo::PreferredVectorWidthShort, 2},
124         {cl::DeviceInfo::PreferredVectorWidthFloat, 1},
125         {cl::DeviceInfo::PreferredVectorWidthDouble, 0},
126         {cl::DeviceInfo::PreferredLocalAtomicAlignment, 0},
127         {cl::DeviceInfo::PreferredGlobalAtomicAlignment, 0},
128         {cl::DeviceInfo::PreferredPlatformAtomicAlignment, 0},
129         {cl::DeviceInfo::NonUniformWorkGroupSupport, CL_FALSE},
130         {cl::DeviceInfo::GenericAddressSpaceSupport, CL_FALSE},
131         {cl::DeviceInfo::SubGroupIndependentForwardProgress, CL_FALSE},
132         {cl::DeviceInfo::WorkGroupCollectiveFunctionsSupport, CL_FALSE},
133     };
134 }
135 
136 CLDeviceVk::~CLDeviceVk() = default;
137 
createInfo(cl::DeviceType type) const138 CLDeviceImpl::Info CLDeviceVk::createInfo(cl::DeviceType type) const
139 {
140     Info info(type);
141 
142     const VkPhysicalDeviceProperties &properties = mRenderer->getPhysicalDeviceProperties();
143 
144     info.maxWorkItemSizes.push_back(properties.limits.maxComputeWorkGroupSize[0]);
145     info.maxWorkItemSizes.push_back(properties.limits.maxComputeWorkGroupSize[1]);
146     info.maxWorkItemSizes.push_back(properties.limits.maxComputeWorkGroupSize[2]);
147 
148     // TODO(aannestrand) Update these hardcoded platform/device queries
149     // http://anglebug.com/42266935
150     info.maxMemAllocSize  = 1 << 30;
151     info.memBaseAddrAlign = 1024;
152 
153     info.imageSupport = CL_TRUE;
154 
155     info.image2D_MaxWidth  = properties.limits.maxImageDimension2D;
156     info.image2D_MaxHeight = properties.limits.maxImageDimension2D;
157     info.image3D_MaxWidth  = properties.limits.maxImageDimension3D;
158     info.image3D_MaxHeight = properties.limits.maxImageDimension3D;
159     info.image3D_MaxDepth  = properties.limits.maxImageDimension3D;
160     // TODO (http://anglebug.com/379669750) For now set it minimum requirement.
161     info.imageMaxBufferSize        = 65536;
162     info.imageMaxArraySize         = properties.limits.maxImageArrayLayers;
163     info.imagePitchAlignment       = 0u;
164     info.imageBaseAddressAlignment = 0u;
165 
166     info.execCapabilities     = CL_EXEC_KERNEL;
167     info.queueOnDeviceMaxSize = 0u;
168     info.builtInKernels       = "";
169     info.version              = CL_MAKE_VERSION(3, 0, 0);
170     info.versionStr           = "OpenCL 3.0 " + mRenderer->getVersionString(true);
171     info.OpenCL_C_AllVersions = {{CL_MAKE_VERSION(1, 0, 0), "OpenCL C"},
172                                  {CL_MAKE_VERSION(1, 1, 0), "OpenCL C"},
173                                  {CL_MAKE_VERSION(1, 2, 0), "OpenCL C"},
174                                  {CL_MAKE_VERSION(3, 0, 0), "OpenCL C"}};
175 
176     info.OpenCL_C_Features         = {};
177     info.ILsWithVersion            = {};
178     info.builtInKernelsWithVersion = {};
179     info.partitionProperties       = {};
180     info.partitionType             = {};
181     info.IL_Version                = "";
182 
183     // Below extensions are required as of OpenCL 1.1, add their versioned strings
184     NameVersionVector versionedExtensionList = {
185         // Below extensions are required as of OpenCL 1.1
186         cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
187                         .name    = "cl_khr_byte_addressable_store"},
188         cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
189                         .name    = "cl_khr_global_int32_base_atomics"},
190         cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
191                         .name    = "cl_khr_global_int32_extended_atomics"},
192         cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
193                         .name    = "cl_khr_local_int32_base_atomics"},
194         cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
195                         .name    = "cl_khr_local_int32_extended_atomics"},
196     };
197     info.initializeVersionedExtensions(std::move(versionedExtensionList));
198 
199     return info;
200 }
201 
getInfoUInt(cl::DeviceInfo name,cl_uint * value) const202 angle::Result CLDeviceVk::getInfoUInt(cl::DeviceInfo name, cl_uint *value) const
203 {
204     if (mInfoUInt.count(name))
205     {
206         *value = mInfoUInt.at(name);
207         return angle::Result::Continue;
208     }
209     ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
210 }
211 
getInfoULong(cl::DeviceInfo name,cl_ulong * value) const212 angle::Result CLDeviceVk::getInfoULong(cl::DeviceInfo name, cl_ulong *value) const
213 {
214     if (mInfoULong.count(name))
215     {
216         *value = mInfoULong.at(name);
217         return angle::Result::Continue;
218     }
219     ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
220 }
221 
getInfoSizeT(cl::DeviceInfo name,size_t * value) const222 angle::Result CLDeviceVk::getInfoSizeT(cl::DeviceInfo name, size_t *value) const
223 {
224     if (mInfoSizeT.count(name))
225     {
226         *value = mInfoSizeT.at(name);
227         return angle::Result::Continue;
228     }
229     ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
230 }
231 
getInfoStringLength(cl::DeviceInfo name,size_t * value) const232 angle::Result CLDeviceVk::getInfoStringLength(cl::DeviceInfo name, size_t *value) const
233 {
234     if (mInfoString.count(name))
235     {
236         *value = mInfoString.at(name).length() + 1;
237         return angle::Result::Continue;
238     }
239     ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
240 }
241 
getInfoString(cl::DeviceInfo name,size_t size,char * value) const242 angle::Result CLDeviceVk::getInfoString(cl::DeviceInfo name, size_t size, char *value) const
243 {
244     if (mInfoString.count(name))
245     {
246         std::strcpy(value, mInfoString.at(name).c_str());
247         return angle::Result::Continue;
248     }
249     ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
250 }
251 
createSubDevices(const cl_device_partition_property * properties,cl_uint numDevices,CreateFuncs & subDevices,cl_uint * numDevicesRet)252 angle::Result CLDeviceVk::createSubDevices(const cl_device_partition_property *properties,
253                                            cl_uint numDevices,
254                                            CreateFuncs &subDevices,
255                                            cl_uint *numDevicesRet)
256 {
257     UNIMPLEMENTED();
258     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
259 }
260 
selectWorkGroupSize(const cl::NDRange & ndrange) const261 cl::WorkgroupSize CLDeviceVk::selectWorkGroupSize(const cl::NDRange &ndrange) const
262 {
263     // Limit total work-group size to the Vulkan device's limit
264     const VkPhysicalDeviceProperties &props = mRenderer->getPhysicalDeviceProperties();
265     uint32_t maxSize = static_cast<uint32_t>(mInfoSizeT.at(cl::DeviceInfo::MaxWorkGroupSize));
266     maxSize          = std::min(maxSize, 64u);
267 
268     bool keepIncreasing         = false;
269     cl::WorkgroupSize localSize = {1, 1, 1};
270     do
271     {
272         keepIncreasing = false;
273         for (cl_uint i = 0; i < ndrange.workDimensions; i++)
274         {
275             cl::WorkgroupSize newLocalSize = localSize;
276             newLocalSize[i] *= 2;
277 
278             // TODO: Add support for non-uniform WGS
279             // http://anglebug.com/42267067
280             if (ndrange.globalWorkSize[i] % newLocalSize[i] == 0 &&
281                 newLocalSize[i] <= props.limits.maxComputeWorkGroupCount[i] &&
282                 newLocalSize[0] * newLocalSize[1] * newLocalSize[2] <= maxSize)
283             {
284                 localSize      = newLocalSize;
285                 keepIncreasing = true;
286             }
287         }
288     } while (keepIncreasing);
289 
290     return localSize;
291 }
292 
293 }  // namespace rx
294