#include #include #include #include namespace c10::xpu { namespace { /* * Note [Device Management] * * An Intel GPU device qualifies as a type of SYCL device. This classification * allows for the runtime querying of Intel GPU device information through the * SYCL runtime library. * * Device status is managed through a SYCL device pool, with SYCL devices * determined at runtime. There's currently a SYCL device pool that is lazily * created and only initialized once, ensuring thread-local safety. Each device * within the device pool shares the same default context. */ c10::once_flag init_flag; thread_local DeviceIndex curDeviceIndex = 0; struct DevicePool { std::vector> devices; std::unique_ptr context; } gDevicePool; void enumDevices(std::vector>& devices) { auto platform_list = sycl::platform::get_platforms(); // Enumerated GPU devices from the specific platform. for (const auto& platform : platform_list) { if (platform.get_backend() != sycl::backend::ext_oneapi_level_zero) { continue; } auto device_list = platform.get_devices(); for (const auto& device : device_list) { if (device.is_gpu()) { devices.push_back(std::make_unique(device)); } } } } inline void initGlobalDevicePoolState() { // Enumerate all GPU devices and record them. enumDevices(gDevicePool.devices); if (gDevicePool.devices.empty()) { TORCH_WARN("XPU device count is zero!"); return; } #ifdef _WIN32 // default context feature is disabled by default on Windows. std::vector deviceList; for (auto it = gDevicePool.devices.begin(); it != gDevicePool.devices.end(); ++it) { deviceList.push_back(*(*it)); } gDevicePool.context = std::make_unique(deviceList); #else // The default context is utilized for each Intel GPU device, allowing the // retrieval of the context from any GPU device. gDevicePool.context = std::make_unique( gDevicePool.devices[0]->get_platform().ext_oneapi_get_default_context()); #endif } inline void initDevicePoolCallOnce() { c10::call_once(init_flag, initGlobalDevicePoolState); } void initDeviceProperties(DeviceProp* device_prop, int device) { using namespace sycl::info; using namespace sycl::ext; // Get raw sycl device associated with device index. auto& raw_device = *gDevicePool.devices[device]; // Initialize the device properties associated with the specific device. #define ASSIGN_DEVICE_PROP(property) \ device_prop->property = raw_device.get_info(); #define ASSIGN_EXT_DEVICE_PROP(property, default_value) \ device_prop->property = raw_device.has(sycl::aspect::ext_intel_##property) \ ? raw_device.get_info() \ : default_value; #define ASSIGN_DEVICE_ASPECT(member) \ device_prop->has_##member = raw_device.has(sycl::aspect::member); #define ASSIGN_EXP_CL_ASPECT(member) \ device_prop->has_##member = raw_device.ext_oneapi_supports_cl_extension( \ "cl_intel_" #member, &cl_version); AT_FORALL_XPU_DEVICE_PROPERTIES(ASSIGN_DEVICE_PROP); device_prop->platform_name = raw_device.get_info().get_info(); AT_FORALL_XPU_EXT_DEVICE_PROPERTIES(ASSIGN_EXT_DEVICE_PROP); AT_FORALL_XPU_DEVICE_ASPECT(ASSIGN_DEVICE_ASPECT); // TODO: Remove cl_version since it is unnecessary. sycl::ext::oneapi::experimental::cl_version cl_version; AT_FORALL_XPU_EXP_CL_ASPECT(ASSIGN_EXP_CL_ASPECT); return; } inline void check_device(DeviceIndex device) { // TODO: Use c10::Device::MAX_NUM_DEVICES directly. DeviceIndex is a int8_t // value, and the maximum number of GPUs that PyTorch recognizes is 64. So, we // have to check if there is an overflow happen. When DeviceIndex changes to // int16_t and c10::Device::MAX_NUM_DEVICES is provided, we should use it // directly to check if too many XPU devices are detected. TORCH_CHECK( gDevicePool.devices.size() <= std::numeric_limits::max(), "Too many XPU devices, DeviceIndex overflowed"); auto total = static_cast(gDevicePool.devices.size()); TORCH_CHECK( device >= 0 && device < total, "device is out of range, device is ", device, ", total number of device is ", total, "."); } } // anonymous namespace sycl::device& get_raw_device(DeviceIndex device) { initDevicePoolCallOnce(); check_device(device); return *gDevicePool.devices[device]; } sycl::context& get_device_context() { initDevicePoolCallOnce(); TORCH_CHECK( gDevicePool.context, "Device pool initialization failed, you might not have an XPU device.") return *gDevicePool.context; } void get_device_properties(DeviceProp* device_prop, DeviceIndex device) { initDevicePoolCallOnce(); TORCH_CHECK(device_prop, "device_prop is an invalid pointer."); check_device(device); initDeviceProperties(device_prop, device); } DeviceIndex get_device_idx_from_pointer(void* ptr) { initDevicePoolCallOnce(); TORCH_CHECK(ptr, "ptr is an invalid pointer."); auto type = sycl::get_pointer_type(ptr, get_device_context()); TORCH_CHECK( type == sycl::usm::alloc::device, "ptr is not a device type pointer."); sycl::device raw_device = sycl::get_pointer_device(ptr, get_device_context()); auto match_device = [raw_device](const auto& device) -> bool { return raw_device == *device; }; auto it = std::find_if( gDevicePool.devices.begin(), gDevicePool.devices.end(), match_device); TORCH_CHECK( it != gDevicePool.devices.end(), "Can't find the pointer from XPU devices."); return static_cast( std::distance(gDevicePool.devices.begin(), it)); } DeviceIndex device_count() { initDevicePoolCallOnce(); return static_cast(gDevicePool.devices.size()); } DeviceIndex device_count_ensure_non_zero() { auto count = device_count(); // Zero gpus could produce a warning in `device_count` but we fail here. TORCH_CHECK(count, "No XPU devices are available."); return count; } DeviceIndex current_device() { initDevicePoolCallOnce(); return curDeviceIndex; } void set_device(DeviceIndex device) { initDevicePoolCallOnce(); check_device(device); curDeviceIndex = device; } c10::DeviceIndex exchange_device(c10::DeviceIndex to_device) { auto cur_device = current_device(); if (to_device == cur_device) { return cur_device; } set_device(to_device); return cur_device; } c10::DeviceIndex maybe_exchange_device(c10::DeviceIndex to_device) { return exchange_device(to_device); } } // namespace c10::xpu