// // Copyright 2021 The ANGLE Project Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // // CLDeviceVk.cpp: Implements the class methods for CLDeviceVk. #include "libANGLE/renderer/vulkan/CLDeviceVk.h" #include "libANGLE/renderer/vulkan/CLPlatformVk.h" #include "libANGLE/renderer/vulkan/vk_renderer.h" #include "libANGLE/renderer/cl_types.h" #include "libANGLE/Display.h" #include "libANGLE/cl_utils.h" namespace rx { CLDeviceVk::CLDeviceVk(const cl::Device &device, vk::Renderer *renderer) : CLDeviceImpl(device), mRenderer(renderer) { const VkPhysicalDeviceProperties &props = mRenderer->getPhysicalDeviceProperties(); // Setup initial device mInfo fields // TODO(aannestrand) Create cl::Caps and use for device creation // http://anglebug.com/42266954 mInfoString = { {cl::DeviceInfo::Name, std::string(props.deviceName)}, {cl::DeviceInfo::Vendor, mRenderer->getVendorString()}, {cl::DeviceInfo::DriverVersion, mRenderer->getVersionString(true)}, {cl::DeviceInfo::Version, std::string("OpenCL 3.0 " + mRenderer->getVersionString(true))}, {cl::DeviceInfo::Profile, std::string("FULL_PROFILE")}, {cl::DeviceInfo::OpenCL_C_Version, std::string("OpenCL C 1.2 ")}, {cl::DeviceInfo::LatestConformanceVersionPassed, std::string("FIXME")}}; mInfoSizeT = { {cl::DeviceInfo::MaxWorkGroupSize, props.limits.maxComputeWorkGroupInvocations}, {cl::DeviceInfo::MaxGlobalVariableSize, 0}, {cl::DeviceInfo::GlobalVariablePreferredTotalSize, 0}, // TODO(aannestrand) Update these hardcoded platform/device queries // http://anglebug.com/42266935 {cl::DeviceInfo::MaxParameterSize, 1024}, {cl::DeviceInfo::ProfilingTimerResolution, 1}, {cl::DeviceInfo::PrintfBufferSize, 1024 * 1024}, {cl::DeviceInfo::PreferredWorkGroupSizeMultiple, 16}, }; mInfoULong = { {cl::DeviceInfo::LocalMemSize, props.limits.maxComputeSharedMemorySize}, {cl::DeviceInfo::SVM_Capabilities, 0}, {cl::DeviceInfo::QueueOnDeviceProperties, 0}, {cl::DeviceInfo::PartitionAffinityDomain, 0}, {cl::DeviceInfo::DeviceEnqueueCapabilities, 0}, {cl::DeviceInfo::QueueOnHostProperties, CL_QUEUE_PROFILING_ENABLE}, // TODO(aannestrand) Update these hardcoded platform/device queries // http://anglebug.com/42266935 {cl::DeviceInfo::HalfFpConfig, 0}, {cl::DeviceInfo::DoubleFpConfig, 0}, {cl::DeviceInfo::GlobalMemCacheSize, 0}, {cl::DeviceInfo::GlobalMemSize, 1024 * 1024 * 1024}, {cl::DeviceInfo::MaxConstantBufferSize, 64 * 1024}, {cl::DeviceInfo::SingleFpConfig, CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN | CL_FP_FMA}, {cl::DeviceInfo::AtomicMemoryCapabilities, CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP}, // TODO (http://anglebug.com/379669750) Add these based on the Vulkan features query {cl::DeviceInfo::AtomicFenceCapabilities, CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_ORDER_ACQ_REL | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP | // non-mandatory CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM}, }; mInfoUInt = { {cl::DeviceInfo::VendorID, props.vendorID}, {cl::DeviceInfo::MaxReadImageArgs, props.limits.maxPerStageDescriptorSampledImages}, {cl::DeviceInfo::MaxWriteImageArgs, props.limits.maxPerStageDescriptorStorageImages}, {cl::DeviceInfo::MaxReadWriteImageArgs, props.limits.maxPerStageDescriptorStorageImages}, {cl::DeviceInfo::GlobalMemCachelineSize, static_cast(props.limits.nonCoherentAtomSize)}, {cl::DeviceInfo::Available, CL_TRUE}, {cl::DeviceInfo::LinkerAvailable, CL_TRUE}, {cl::DeviceInfo::CompilerAvailable, CL_TRUE}, {cl::DeviceInfo::MaxOnDeviceQueues, 0}, {cl::DeviceInfo::MaxOnDeviceEvents, 0}, {cl::DeviceInfo::QueueOnDeviceMaxSize, 0}, {cl::DeviceInfo::QueueOnDevicePreferredSize, 0}, {cl::DeviceInfo::MaxPipeArgs, 0}, {cl::DeviceInfo::PipeMaxPacketSize, 0}, {cl::DeviceInfo::PipeSupport, CL_FALSE}, {cl::DeviceInfo::PipeMaxActiveReservations, 0}, {cl::DeviceInfo::ErrorCorrectionSupport, CL_FALSE}, {cl::DeviceInfo::PreferredInteropUserSync, CL_TRUE}, {cl::DeviceInfo::ExecutionCapabilities, CL_EXEC_KERNEL}, // TODO(aannestrand) Update these hardcoded platform/device queries // http://anglebug.com/42266935 {cl::DeviceInfo::AddressBits, 32}, {cl::DeviceInfo::EndianLittle, CL_TRUE}, {cl::DeviceInfo::LocalMemType, CL_LOCAL}, // TODO (http://anglebug.com/379669750) Vulkan reports a big sampler count number, we dont // need that many and set it to minimum req for now. {cl::DeviceInfo::MaxSamplers, 16u}, {cl::DeviceInfo::MaxConstantArgs, 8}, {cl::DeviceInfo::MaxNumSubGroups, 0}, {cl::DeviceInfo::MaxComputeUnits, 4}, {cl::DeviceInfo::MaxClockFrequency, 555}, {cl::DeviceInfo::MaxWorkItemDimensions, 3}, {cl::DeviceInfo::MinDataTypeAlignSize, 128}, {cl::DeviceInfo::GlobalMemCacheType, CL_NONE}, {cl::DeviceInfo::HostUnifiedMemory, CL_TRUE}, {cl::DeviceInfo::NativeVectorWidthChar, 4}, {cl::DeviceInfo::NativeVectorWidthShort, 2}, {cl::DeviceInfo::NativeVectorWidthInt, 1}, {cl::DeviceInfo::NativeVectorWidthLong, 1}, {cl::DeviceInfo::NativeVectorWidthFloat, 1}, {cl::DeviceInfo::NativeVectorWidthDouble, 1}, {cl::DeviceInfo::NativeVectorWidthHalf, 0}, {cl::DeviceInfo::PartitionMaxSubDevices, 0}, {cl::DeviceInfo::PreferredVectorWidthInt, 1}, {cl::DeviceInfo::PreferredVectorWidthLong, 1}, {cl::DeviceInfo::PreferredVectorWidthChar, 4}, {cl::DeviceInfo::PreferredVectorWidthHalf, 0}, {cl::DeviceInfo::PreferredVectorWidthShort, 2}, {cl::DeviceInfo::PreferredVectorWidthFloat, 1}, {cl::DeviceInfo::PreferredVectorWidthDouble, 0}, {cl::DeviceInfo::PreferredLocalAtomicAlignment, 0}, {cl::DeviceInfo::PreferredGlobalAtomicAlignment, 0}, {cl::DeviceInfo::PreferredPlatformAtomicAlignment, 0}, {cl::DeviceInfo::NonUniformWorkGroupSupport, CL_FALSE}, {cl::DeviceInfo::GenericAddressSpaceSupport, CL_FALSE}, {cl::DeviceInfo::SubGroupIndependentForwardProgress, CL_FALSE}, {cl::DeviceInfo::WorkGroupCollectiveFunctionsSupport, CL_FALSE}, }; } CLDeviceVk::~CLDeviceVk() = default; CLDeviceImpl::Info CLDeviceVk::createInfo(cl::DeviceType type) const { Info info(type); const VkPhysicalDeviceProperties &properties = mRenderer->getPhysicalDeviceProperties(); info.maxWorkItemSizes.push_back(properties.limits.maxComputeWorkGroupSize[0]); info.maxWorkItemSizes.push_back(properties.limits.maxComputeWorkGroupSize[1]); info.maxWorkItemSizes.push_back(properties.limits.maxComputeWorkGroupSize[2]); // TODO(aannestrand) Update these hardcoded platform/device queries // http://anglebug.com/42266935 info.maxMemAllocSize = 1 << 30; info.memBaseAddrAlign = 1024; info.imageSupport = CL_TRUE; info.image2D_MaxWidth = properties.limits.maxImageDimension2D; info.image2D_MaxHeight = properties.limits.maxImageDimension2D; info.image3D_MaxWidth = properties.limits.maxImageDimension3D; info.image3D_MaxHeight = properties.limits.maxImageDimension3D; info.image3D_MaxDepth = properties.limits.maxImageDimension3D; // TODO (http://anglebug.com/379669750) For now set it minimum requirement. info.imageMaxBufferSize = 65536; info.imageMaxArraySize = properties.limits.maxImageArrayLayers; info.imagePitchAlignment = 0u; info.imageBaseAddressAlignment = 0u; info.execCapabilities = CL_EXEC_KERNEL; info.queueOnDeviceMaxSize = 0u; info.builtInKernels = ""; info.version = CL_MAKE_VERSION(3, 0, 0); info.versionStr = "OpenCL 3.0 " + mRenderer->getVersionString(true); info.OpenCL_C_AllVersions = {{CL_MAKE_VERSION(1, 0, 0), "OpenCL C"}, {CL_MAKE_VERSION(1, 1, 0), "OpenCL C"}, {CL_MAKE_VERSION(1, 2, 0), "OpenCL C"}, {CL_MAKE_VERSION(3, 0, 0), "OpenCL C"}}; info.OpenCL_C_Features = {}; info.ILsWithVersion = {}; info.builtInKernelsWithVersion = {}; info.partitionProperties = {}; info.partitionType = {}; info.IL_Version = ""; // Below extensions are required as of OpenCL 1.1, add their versioned strings NameVersionVector versionedExtensionList = { // Below extensions are required as of OpenCL 1.1 cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0), .name = "cl_khr_byte_addressable_store"}, cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0), .name = "cl_khr_global_int32_base_atomics"}, cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0), .name = "cl_khr_global_int32_extended_atomics"}, cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0), .name = "cl_khr_local_int32_base_atomics"}, cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0), .name = "cl_khr_local_int32_extended_atomics"}, }; info.initializeVersionedExtensions(std::move(versionedExtensionList)); return info; } angle::Result CLDeviceVk::getInfoUInt(cl::DeviceInfo name, cl_uint *value) const { if (mInfoUInt.count(name)) { *value = mInfoUInt.at(name); return angle::Result::Continue; } ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE); } angle::Result CLDeviceVk::getInfoULong(cl::DeviceInfo name, cl_ulong *value) const { if (mInfoULong.count(name)) { *value = mInfoULong.at(name); return angle::Result::Continue; } ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE); } angle::Result CLDeviceVk::getInfoSizeT(cl::DeviceInfo name, size_t *value) const { if (mInfoSizeT.count(name)) { *value = mInfoSizeT.at(name); return angle::Result::Continue; } ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE); } angle::Result CLDeviceVk::getInfoStringLength(cl::DeviceInfo name, size_t *value) const { if (mInfoString.count(name)) { *value = mInfoString.at(name).length() + 1; return angle::Result::Continue; } ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE); } angle::Result CLDeviceVk::getInfoString(cl::DeviceInfo name, size_t size, char *value) const { if (mInfoString.count(name)) { std::strcpy(value, mInfoString.at(name).c_str()); return angle::Result::Continue; } ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE); } angle::Result CLDeviceVk::createSubDevices(const cl_device_partition_property *properties, cl_uint numDevices, CreateFuncs &subDevices, cl_uint *numDevicesRet) { UNIMPLEMENTED(); ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES); } cl::WorkgroupSize CLDeviceVk::selectWorkGroupSize(const cl::NDRange &ndrange) const { // Limit total work-group size to the Vulkan device's limit const VkPhysicalDeviceProperties &props = mRenderer->getPhysicalDeviceProperties(); uint32_t maxSize = static_cast(mInfoSizeT.at(cl::DeviceInfo::MaxWorkGroupSize)); maxSize = std::min(maxSize, 64u); bool keepIncreasing = false; cl::WorkgroupSize localSize = {1, 1, 1}; do { keepIncreasing = false; for (cl_uint i = 0; i < ndrange.workDimensions; i++) { cl::WorkgroupSize newLocalSize = localSize; newLocalSize[i] *= 2; // TODO: Add support for non-uniform WGS // http://anglebug.com/42267067 if (ndrange.globalWorkSize[i] % newLocalSize[i] == 0 && newLocalSize[i] <= props.limits.maxComputeWorkGroupCount[i] && newLocalSize[0] * newLocalSize[1] * newLocalSize[2] <= maxSize) { localSize = newLocalSize; keepIncreasing = true; } } } while (keepIncreasing); return localSize; } } // namespace rx