xref: /aosp_15_r20/external/pytorch/c10/xpu/XPUDeviceProp.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #pragma once
2 
3 #include <c10/xpu/XPUMacros.h>
4 #include <sycl/sycl.hpp>
5 
6 namespace c10::xpu {
7 
8 #define AT_FORALL_XPU_DEVICE_PROPERTIES(_)                                     \
9   /* the device name of this SYCL device. */                                   \
10   _(name)                                                                      \
11                                                                                \
12   /* the device type associated with the device. */                            \
13   _(device_type)                                                               \
14                                                                                \
15   /* the vendor of this SYCL device. */                                        \
16   _(vendor)                                                                    \
17                                                                                \
18   /* a backend-defined driver version as a std::string. */                     \
19   _(driver_version)                                                            \
20                                                                                \
21   /* the SYCL version as a std::string in the form <major>.<minor> */          \
22   _(version)                                                                   \
23                                                                                \
24   /* true if the SYCL device is available. Otherwise, return false. */         \
25   _(is_available)                                                              \
26                                                                                \
27   /* the maximum size in bytes of the arguments that can be passed to a        \
28    * kernel. */                                                                \
29   _(max_parameter_size)                                                        \
30                                                                                \
31   /* the number of parallel compute units available to the device. */          \
32   _(max_compute_units)                                                         \
33                                                                                \
34   /* the maximum dimensions that specify the global and local work-item IDs    \
35    * used by the data parallel execution model. */                             \
36   _(max_work_item_dimensions)                                                  \
37                                                                                \
38   /* the maximum number of workitems that are permitted in a work-group        \
39    * executing a kernel on a single compute unit. */                           \
40   _(max_work_group_size)                                                       \
41                                                                                \
42   /* the maximum number of subgroups in a work-group for any kernel executed   \
43    * on the device. */                                                         \
44   _(max_num_sub_groups)                                                        \
45                                                                                \
46   /* a std::vector of size_t containing the set of sub-group sizes  supported  \
47    * by the device. */                                                         \
48   _(sub_group_sizes)                                                           \
49                                                                                \
50   /* the maximum configured clock frequency of this SYCL device in MHz. */     \
51   _(max_clock_frequency)                                                       \
52                                                                                \
53   /* the default compute device address space size specified as an unsigned    \
54    * integer value in bits. Must return either 32 or 64. */                    \
55   _(address_bits)                                                              \
56                                                                                \
57   /* the maximum size of memory object allocation in bytes. */                 \
58   _(max_mem_alloc_size)                                                        \
59                                                                                \
60   /* the minimum value in bits of the largest supported SYCL built-in data     \
61    * type if this SYCL device is not of device type                            \
62    * sycl::info::device_type::custom. */                                       \
63   _(mem_base_addr_align)                                                       \
64                                                                                \
65   /* a std::vector of info::fp_config describing the half/single/double        \
66    * precision floating-point capability of this SYCL device. */               \
67   _(half_fp_config)                                                            \
68   _(single_fp_config)                                                          \
69   _(double_fp_config)                                                          \
70                                                                                \
71   /* the size of global device memory in bytes. */                             \
72   _(global_mem_size)                                                           \
73                                                                                \
74   /* the type of global memory cache supported. */                             \
75   _(global_mem_cache_type)                                                     \
76                                                                                \
77   /* the size of global memory cache in bytes. */                              \
78   _(global_mem_cache_size)                                                     \
79                                                                                \
80   /* the size of global memory cache line in bytes. */                         \
81   _(global_mem_cache_line_size)                                                \
82                                                                                \
83   /* the type of local memory supported. */                                    \
84   _(local_mem_type)                                                            \
85                                                                                \
86   /* the size of local memory arena in bytes. */                               \
87   _(local_mem_size)                                                            \
88                                                                                \
89   /* the maximum number of sub-devices that can be created when this device is \
90    * partitioned. */                                                           \
91   _(partition_max_sub_devices)                                                 \
92                                                                                \
93   /* the resolution of device timer in nanoseconds. */                         \
94   _(profiling_timer_resolution)                                                \
95                                                                                \
96   /* the preferred native vector width size for built-in scalar types that can \
97    * be put into vectors. */                                                   \
98   _(preferred_vector_width_char)                                               \
99   _(preferred_vector_width_short)                                              \
100   _(preferred_vector_width_int)                                                \
101   _(preferred_vector_width_long)                                               \
102   _(preferred_vector_width_float)                                              \
103   _(preferred_vector_width_double)                                             \
104   _(preferred_vector_width_half)                                               \
105                                                                                \
106   /* the native ISA vector width. The vector width is defined as the number of \
107    * scalar elements that can be stored in the vector. */                      \
108   _(native_vector_width_char)                                                  \
109   _(native_vector_width_short)                                                 \
110   _(native_vector_width_int)                                                   \
111   _(native_vector_width_long)                                                  \
112   _(native_vector_width_float)                                                 \
113   _(native_vector_width_double)                                                \
114   _(native_vector_width_half)
115 
116 #define AT_FORALL_XPU_EXT_DEVICE_PROPERTIES(_)           \
117   /* the number of EUs associated with the Intel GPU. */ \
118   _(gpu_eu_count, 512)                                   \
119                                                          \
120   /* the number of EUs in a subslice. */                 \
121   _(gpu_eu_count_per_subslice, 8)                        \
122                                                          \
123   /* the simd width of EU of GPU. */                     \
124   _(gpu_eu_simd_width, 8)                                \
125                                                          \
126   /* the number of hardware threads per EU of GPU. */    \
127   _(gpu_hw_threads_per_eu, 8)
128 
129 #define AT_FORALL_XPU_DEVICE_ASPECT(_)                  \
130   /* sycl::half is supported on device. */              \
131   _(fp16)                                               \
132                                                         \
133   /* double is supported on device. */                  \
134   _(fp64)                                               \
135                                                         \
136   /* 64-bit atomic operation is supported on device. */ \
137   _(atomic64)
138 
139 #define AT_FORALL_XPU_EXP_CL_ASPECT(_)                                         \
140   /* conversion between single-precision 32-bit floating-point values and      \
141    * 16-bit bfloat16 values is supported on device. */                         \
142   _(bfloat16_conversions)                                                      \
143                                                                                \
144   /* specialized hardware to compute MMA is supported on device. */            \
145   _(subgroup_matrix_multiply_accumulate)                                       \
146                                                                                \
147   /* specialized hardware to compute MMA for 32-bit floating-point is          \
148    * supported on device. */                                                   \
149   _(subgroup_matrix_multiply_accumulate_tensor_float32)                        \
150                                                                                \
151   /* block read operations for efficient matrix multiplication is supported on \
152    * device. */                                                                \
153   _(subgroup_2d_block_io)
154 
155 #define _DEFINE_SYCL_PROP(ns, property, member) \
156   ns::property::return_type member;
157 
158 #define DEFINE_DEVICE_PROP(property) \
159   _DEFINE_SYCL_PROP(sycl::info::device, property, property)
160 
161 #define DEFINE_PLATFORM_PROP(property, member) \
162   _DEFINE_SYCL_PROP(sycl::info::platform, property, member)
163 
164 #define DEFINE_EXT_DEVICE_PROP(property, ...) \
165   _DEFINE_SYCL_PROP(sycl::ext::intel::info::device, property, property)
166 
167 #define DEFINE_DEVICE_ASPECT(member) bool has_##member;
168 
169 struct C10_XPU_API DeviceProp {
170   AT_FORALL_XPU_DEVICE_PROPERTIES(DEFINE_DEVICE_PROP);
171 
172   // the platform name.
173   DEFINE_PLATFORM_PROP(name, platform_name);
174 
175   AT_FORALL_XPU_EXT_DEVICE_PROPERTIES(DEFINE_EXT_DEVICE_PROP);
176 
177   AT_FORALL_XPU_DEVICE_ASPECT(DEFINE_DEVICE_ASPECT);
178 
179   AT_FORALL_XPU_EXP_CL_ASPECT(DEFINE_DEVICE_ASPECT);
180 };
181 
182 #undef _DEFINE_SYCL_PROP
183 #undef DEFINE_DEVICE_PROP
184 #undef DEFINE_PLATFORM_PROP
185 #undef DEFINE_EXT_DEVICE_PROP
186 #undef DEFINE_DEVICE_ASPECT
187 
188 } // namespace c10::xpu
189