1 #pragma once 2 3 #include <c10/xpu/XPUMacros.h> 4 #include <sycl/sycl.hpp> 5 6 namespace c10::xpu { 7 8 #define AT_FORALL_XPU_DEVICE_PROPERTIES(_) \ 9 /* the device name of this SYCL device. */ \ 10 _(name) \ 11 \ 12 /* the device type associated with the device. */ \ 13 _(device_type) \ 14 \ 15 /* the vendor of this SYCL device. */ \ 16 _(vendor) \ 17 \ 18 /* a backend-defined driver version as a std::string. */ \ 19 _(driver_version) \ 20 \ 21 /* the SYCL version as a std::string in the form <major>.<minor> */ \ 22 _(version) \ 23 \ 24 /* true if the SYCL device is available. Otherwise, return false. */ \ 25 _(is_available) \ 26 \ 27 /* the maximum size in bytes of the arguments that can be passed to a \ 28 * kernel. */ \ 29 _(max_parameter_size) \ 30 \ 31 /* the number of parallel compute units available to the device. */ \ 32 _(max_compute_units) \ 33 \ 34 /* the maximum dimensions that specify the global and local work-item IDs \ 35 * used by the data parallel execution model. */ \ 36 _(max_work_item_dimensions) \ 37 \ 38 /* the maximum number of workitems that are permitted in a work-group \ 39 * executing a kernel on a single compute unit. */ \ 40 _(max_work_group_size) \ 41 \ 42 /* the maximum number of subgroups in a work-group for any kernel executed \ 43 * on the device. */ \ 44 _(max_num_sub_groups) \ 45 \ 46 /* a std::vector of size_t containing the set of sub-group sizes supported \ 47 * by the device. */ \ 48 _(sub_group_sizes) \ 49 \ 50 /* the maximum configured clock frequency of this SYCL device in MHz. */ \ 51 _(max_clock_frequency) \ 52 \ 53 /* the default compute device address space size specified as an unsigned \ 54 * integer value in bits. Must return either 32 or 64. */ \ 55 _(address_bits) \ 56 \ 57 /* the maximum size of memory object allocation in bytes. */ \ 58 _(max_mem_alloc_size) \ 59 \ 60 /* the minimum value in bits of the largest supported SYCL built-in data \ 61 * type if this SYCL device is not of device type \ 62 * sycl::info::device_type::custom. */ \ 63 _(mem_base_addr_align) \ 64 \ 65 /* a std::vector of info::fp_config describing the half/single/double \ 66 * precision floating-point capability of this SYCL device. */ \ 67 _(half_fp_config) \ 68 _(single_fp_config) \ 69 _(double_fp_config) \ 70 \ 71 /* the size of global device memory in bytes. */ \ 72 _(global_mem_size) \ 73 \ 74 /* the type of global memory cache supported. */ \ 75 _(global_mem_cache_type) \ 76 \ 77 /* the size of global memory cache in bytes. */ \ 78 _(global_mem_cache_size) \ 79 \ 80 /* the size of global memory cache line in bytes. */ \ 81 _(global_mem_cache_line_size) \ 82 \ 83 /* the type of local memory supported. */ \ 84 _(local_mem_type) \ 85 \ 86 /* the size of local memory arena in bytes. */ \ 87 _(local_mem_size) \ 88 \ 89 /* the maximum number of sub-devices that can be created when this device is \ 90 * partitioned. */ \ 91 _(partition_max_sub_devices) \ 92 \ 93 /* the resolution of device timer in nanoseconds. */ \ 94 _(profiling_timer_resolution) \ 95 \ 96 /* the preferred native vector width size for built-in scalar types that can \ 97 * be put into vectors. */ \ 98 _(preferred_vector_width_char) \ 99 _(preferred_vector_width_short) \ 100 _(preferred_vector_width_int) \ 101 _(preferred_vector_width_long) \ 102 _(preferred_vector_width_float) \ 103 _(preferred_vector_width_double) \ 104 _(preferred_vector_width_half) \ 105 \ 106 /* the native ISA vector width. The vector width is defined as the number of \ 107 * scalar elements that can be stored in the vector. */ \ 108 _(native_vector_width_char) \ 109 _(native_vector_width_short) \ 110 _(native_vector_width_int) \ 111 _(native_vector_width_long) \ 112 _(native_vector_width_float) \ 113 _(native_vector_width_double) \ 114 _(native_vector_width_half) 115 116 #define AT_FORALL_XPU_EXT_DEVICE_PROPERTIES(_) \ 117 /* the number of EUs associated with the Intel GPU. */ \ 118 _(gpu_eu_count, 512) \ 119 \ 120 /* the number of EUs in a subslice. */ \ 121 _(gpu_eu_count_per_subslice, 8) \ 122 \ 123 /* the simd width of EU of GPU. */ \ 124 _(gpu_eu_simd_width, 8) \ 125 \ 126 /* the number of hardware threads per EU of GPU. */ \ 127 _(gpu_hw_threads_per_eu, 8) 128 129 #define AT_FORALL_XPU_DEVICE_ASPECT(_) \ 130 /* sycl::half is supported on device. */ \ 131 _(fp16) \ 132 \ 133 /* double is supported on device. */ \ 134 _(fp64) \ 135 \ 136 /* 64-bit atomic operation is supported on device. */ \ 137 _(atomic64) 138 139 #define AT_FORALL_XPU_EXP_CL_ASPECT(_) \ 140 /* conversion between single-precision 32-bit floating-point values and \ 141 * 16-bit bfloat16 values is supported on device. */ \ 142 _(bfloat16_conversions) \ 143 \ 144 /* specialized hardware to compute MMA is supported on device. */ \ 145 _(subgroup_matrix_multiply_accumulate) \ 146 \ 147 /* specialized hardware to compute MMA for 32-bit floating-point is \ 148 * supported on device. */ \ 149 _(subgroup_matrix_multiply_accumulate_tensor_float32) \ 150 \ 151 /* block read operations for efficient matrix multiplication is supported on \ 152 * device. */ \ 153 _(subgroup_2d_block_io) 154 155 #define _DEFINE_SYCL_PROP(ns, property, member) \ 156 ns::property::return_type member; 157 158 #define DEFINE_DEVICE_PROP(property) \ 159 _DEFINE_SYCL_PROP(sycl::info::device, property, property) 160 161 #define DEFINE_PLATFORM_PROP(property, member) \ 162 _DEFINE_SYCL_PROP(sycl::info::platform, property, member) 163 164 #define DEFINE_EXT_DEVICE_PROP(property, ...) \ 165 _DEFINE_SYCL_PROP(sycl::ext::intel::info::device, property, property) 166 167 #define DEFINE_DEVICE_ASPECT(member) bool has_##member; 168 169 struct C10_XPU_API DeviceProp { 170 AT_FORALL_XPU_DEVICE_PROPERTIES(DEFINE_DEVICE_PROP); 171 172 // the platform name. 173 DEFINE_PLATFORM_PROP(name, platform_name); 174 175 AT_FORALL_XPU_EXT_DEVICE_PROPERTIES(DEFINE_EXT_DEVICE_PROP); 176 177 AT_FORALL_XPU_DEVICE_ASPECT(DEFINE_DEVICE_ASPECT); 178 179 AT_FORALL_XPU_EXP_CL_ASPECT(DEFINE_DEVICE_ASPECT); 180 }; 181 182 #undef _DEFINE_SYCL_PROP 183 #undef DEFINE_DEVICE_PROP 184 #undef DEFINE_PLATFORM_PROP 185 #undef DEFINE_EXT_DEVICE_PROP 186 #undef DEFINE_DEVICE_ASPECT 187 188 } // namespace c10::xpu 189