1 /*
2 * Copyright © 2023 Collabora, Ltd.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include <errno.h>
8 #include <fcntl.h>
9 #include <string.h>
10 #include <xf86drm.h>
11
12 #include "util/hash_table.h"
13 #include "util/macros.h"
14 #include "util/simple_mtx.h"
15
16 #include "drm-uapi/panfrost_drm.h"
17
18 #include "pan_kmod_backend.h"
19
20 #include "pan_props.h"
21
22 const struct pan_kmod_ops panfrost_kmod_ops;
23
24 struct panfrost_kmod_vm {
25 struct pan_kmod_vm base;
26 };
27
28 struct panfrost_kmod_dev {
29 struct pan_kmod_dev base;
30 struct panfrost_kmod_vm *vm;
31 };
32
33 struct panfrost_kmod_bo {
34 struct pan_kmod_bo base;
35
36 /* This is actually the VA assigned to the BO at creation/import time.
37 * We don't control it, it's automatically assigned by the kernel driver.
38 */
39 uint64_t offset;
40 };
41
42 static struct pan_kmod_dev *
panfrost_kmod_dev_create(int fd,uint32_t flags,drmVersionPtr version,const struct pan_kmod_allocator * allocator)43 panfrost_kmod_dev_create(int fd, uint32_t flags, drmVersionPtr version,
44 const struct pan_kmod_allocator *allocator)
45 {
46 if (version->version_major < 1 ||
47 (version->version_major == 1 && version->version_minor < 1)) {
48 mesa_loge("kernel driver is too old (requires at least 1.1, found %d.%d)",
49 version->version_major, version->version_minor);
50 return NULL;
51 }
52
53 struct panfrost_kmod_dev *panfrost_dev =
54 pan_kmod_alloc(allocator, sizeof(*panfrost_dev));
55 if (!panfrost_dev) {
56 mesa_loge("failed to allocate a panfrost_kmod_dev object");
57 return NULL;
58 }
59
60 pan_kmod_dev_init(&panfrost_dev->base, fd, flags, version,
61 &panfrost_kmod_ops, allocator);
62 return &panfrost_dev->base;
63 }
64
65 static void
panfrost_kmod_dev_destroy(struct pan_kmod_dev * dev)66 panfrost_kmod_dev_destroy(struct pan_kmod_dev *dev)
67 {
68 struct panfrost_kmod_dev *panfrost_dev =
69 container_of(dev, struct panfrost_kmod_dev, base);
70
71 pan_kmod_dev_cleanup(dev);
72 pan_kmod_free(dev->allocator, panfrost_dev);
73 }
74
75 /* Abstraction over the raw drm_panfrost_get_param ioctl for fetching
76 * information about devices.
77 */
78 static __u64
panfrost_query_raw(int fd,enum drm_panfrost_param param,bool required,unsigned default_value)79 panfrost_query_raw(int fd, enum drm_panfrost_param param, bool required,
80 unsigned default_value)
81 {
82 struct drm_panfrost_get_param get_param = {};
83 ASSERTED int ret;
84
85 get_param.param = param;
86 ret = drmIoctl(fd, DRM_IOCTL_PANFROST_GET_PARAM, &get_param);
87
88 if (ret) {
89 assert(!required);
90 return default_value;
91 }
92
93 return get_param.value;
94 }
95
96 static void
panfrost_dev_query_thread_props(const struct pan_kmod_dev * dev,struct pan_kmod_dev_props * props)97 panfrost_dev_query_thread_props(const struct pan_kmod_dev *dev,
98 struct pan_kmod_dev_props *props)
99 {
100 int fd = dev->fd;
101
102 props->max_threads_per_core =
103 panfrost_query_raw(fd, DRM_PANFROST_PARAM_MAX_THREADS, true, 0);
104 if (!props->max_threads_per_core) {
105 switch (pan_arch(props->gpu_prod_id)) {
106 case 4:
107 case 5:
108 props->max_threads_per_core = 256;
109 break;
110
111 case 6:
112 /* Bifrost, first generation */
113 props->max_threads_per_core = 384;
114 break;
115
116 case 7:
117 /* Bifrost, second generation (G31 is 512 but it doesn't matter) */
118 props->max_threads_per_core = 768;
119 break;
120
121 case 9:
122 /* Valhall, first generation. */
123 props->max_threads_per_core = 512;
124 break;
125
126 default:
127 assert(!"Unsupported arch");
128 }
129 }
130
131 props->max_threads_per_wg = panfrost_query_raw(
132 fd, DRM_PANFROST_PARAM_THREAD_MAX_WORKGROUP_SZ, true, 0);
133 if (!props->max_threads_per_wg)
134 props->max_threads_per_wg = props->max_threads_per_core;
135
136 uint32_t thread_features =
137 panfrost_query_raw(fd, DRM_PANFROST_PARAM_THREAD_FEATURES, true, 0);
138 props->num_registers_per_core = thread_features & 0xffff;
139 if (!props->num_registers_per_core) {
140 switch (pan_arch(props->gpu_prod_id)) {
141 case 4:
142 case 5:
143 /* Assume we can always schedule max_threads_per_core when using 4
144 * registers per-shader or less.
145 */
146 props->num_registers_per_core = props->max_threads_per_core * 4;
147 break;
148
149 case 6:
150 /* Assume we can always schedule max_threads_per_core for shader
151 * using the full per-shader register file (64 regs).
152 */
153 props->num_registers_per_core = props->max_threads_per_core * 64;
154 break;
155
156 case 7:
157 case 9:
158 /* Assume we can always schedule max_threads_per_core for shaders
159 * using half the per-shader register file (32 regs).
160 */
161 props->num_registers_per_core = props->max_threads_per_core * 32;
162 break;
163
164 default:
165 assert(!"Unsupported arch");
166 }
167 }
168
169 props->max_tls_instance_per_core =
170 panfrost_query_raw(fd, DRM_PANFROST_PARAM_THREAD_TLS_ALLOC, true, 0);
171 if (!props->max_tls_instance_per_core)
172 props->max_tls_instance_per_core = props->max_threads_per_core;
173 }
174
175 static void
panfrost_dev_query_props(const struct pan_kmod_dev * dev,struct pan_kmod_dev_props * props)176 panfrost_dev_query_props(const struct pan_kmod_dev *dev,
177 struct pan_kmod_dev_props *props)
178 {
179 int fd = dev->fd;
180
181 memset(props, 0, sizeof(*props));
182 props->gpu_prod_id =
183 panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_PROD_ID, true, 0);
184 props->gpu_revision =
185 panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_REVISION, true, 0);
186 props->shader_present =
187 panfrost_query_raw(fd, DRM_PANFROST_PARAM_SHADER_PRESENT, true, 0);
188 props->tiler_features =
189 panfrost_query_raw(fd, DRM_PANFROST_PARAM_TILER_FEATURES, true, 0);
190 props->mem_features =
191 panfrost_query_raw(fd, DRM_PANFROST_PARAM_MEM_FEATURES, true, 0);
192 props->mmu_features =
193 panfrost_query_raw(fd, DRM_PANFROST_PARAM_MMU_FEATURES, true, 0);
194
195 for (unsigned i = 0; i < ARRAY_SIZE(props->texture_features); i++) {
196 props->texture_features[i] = panfrost_query_raw(
197 fd, DRM_PANFROST_PARAM_TEXTURE_FEATURES0 + i, true, 0);
198 }
199
200 props->afbc_features =
201 panfrost_query_raw(fd, DRM_PANFROST_PARAM_AFBC_FEATURES, true, 0);
202
203 panfrost_dev_query_thread_props(dev, props);
204
205 if (dev->driver.version.major > 1 || dev->driver.version.minor >= 3) {
206 props->gpu_can_query_timestamp = true;
207 props->timestamp_frequency = panfrost_query_raw(
208 fd, DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP_FREQUENCY, true, 0);
209 }
210 }
211
212 static uint32_t
to_panfrost_bo_flags(struct pan_kmod_dev * dev,uint32_t flags)213 to_panfrost_bo_flags(struct pan_kmod_dev *dev, uint32_t flags)
214 {
215 uint32_t panfrost_flags = 0;
216
217 if (dev->driver.version.major > 1 || dev->driver.version.minor >= 1) {
218 /* The alloc-on-fault feature is only used for the tiler HEAP object,
219 * hence the name of the flag on panfrost.
220 */
221 if (flags & PAN_KMOD_BO_FLAG_ALLOC_ON_FAULT)
222 panfrost_flags |= PANFROST_BO_HEAP;
223
224 if (!(flags & PAN_KMOD_BO_FLAG_EXECUTABLE))
225 panfrost_flags |= PANFROST_BO_NOEXEC;
226 }
227
228 return panfrost_flags;
229 }
230
231 static struct pan_kmod_bo *
panfrost_kmod_bo_alloc(struct pan_kmod_dev * dev,struct pan_kmod_vm * exclusive_vm,size_t size,uint32_t flags)232 panfrost_kmod_bo_alloc(struct pan_kmod_dev *dev,
233 struct pan_kmod_vm *exclusive_vm, size_t size,
234 uint32_t flags)
235 {
236 /* We can't map GPU uncached. */
237 if (flags & PAN_KMOD_BO_FLAG_GPU_UNCACHED)
238 return NULL;
239
240 struct panfrost_kmod_bo *bo = pan_kmod_dev_alloc(dev, sizeof(*bo));
241 if (!bo)
242 return NULL;
243
244 struct drm_panfrost_create_bo req = {
245 .size = size,
246 .flags = to_panfrost_bo_flags(dev, flags),
247 };
248
249 int ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &req);
250 if (ret) {
251 mesa_loge("DRM_IOCTL_PANFROST_CREATE_BO failed (err=%d)", errno);
252 goto err_free_bo;
253 }
254
255 pan_kmod_bo_init(&bo->base, dev, exclusive_vm, req.size, flags, req.handle);
256 bo->offset = req.offset;
257 return &bo->base;
258
259 err_free_bo:
260 pan_kmod_dev_free(dev, bo);
261 return NULL;
262 }
263
264 static void
panfrost_kmod_bo_free(struct pan_kmod_bo * bo)265 panfrost_kmod_bo_free(struct pan_kmod_bo *bo)
266 {
267 drmCloseBufferHandle(bo->dev->fd, bo->handle);
268 pan_kmod_dev_free(bo->dev, bo);
269 }
270
271 static struct pan_kmod_bo *
panfrost_kmod_bo_import(struct pan_kmod_dev * dev,uint32_t handle,size_t size,uint32_t flags)272 panfrost_kmod_bo_import(struct pan_kmod_dev *dev, uint32_t handle, size_t size,
273 uint32_t flags)
274 {
275 struct panfrost_kmod_bo *panfrost_bo =
276 pan_kmod_dev_alloc(dev, sizeof(*panfrost_bo));
277 if (!panfrost_bo) {
278 mesa_loge("failed to allocate a panfrost_kmod_bo object");
279 return NULL;
280 }
281
282 struct drm_panfrost_get_bo_offset get_bo_offset = {.handle = handle, 0};
283 int ret =
284 drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
285 if (ret) {
286 mesa_loge("DRM_IOCTL_PANFROST_GET_BO_OFFSET failed (err=%d)", errno);
287 goto err_free_bo;
288 }
289
290 panfrost_bo->offset = get_bo_offset.offset;
291
292 pan_kmod_bo_init(&panfrost_bo->base, dev, NULL, size,
293 flags | PAN_KMOD_BO_FLAG_IMPORTED, handle);
294 return &panfrost_bo->base;
295
296 err_free_bo:
297 pan_kmod_dev_free(dev, panfrost_bo);
298 return NULL;
299 }
300
301 static off_t
panfrost_kmod_bo_get_mmap_offset(struct pan_kmod_bo * bo)302 panfrost_kmod_bo_get_mmap_offset(struct pan_kmod_bo *bo)
303 {
304 struct drm_panfrost_mmap_bo mmap_bo = {.handle = bo->handle};
305 int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
306 if (ret) {
307 fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
308 assert(0);
309 }
310
311 return mmap_bo.offset;
312 }
313
314 static bool
panfrost_kmod_bo_wait(struct pan_kmod_bo * bo,int64_t timeout_ns,bool for_read_only_access)315 panfrost_kmod_bo_wait(struct pan_kmod_bo *bo, int64_t timeout_ns,
316 bool for_read_only_access)
317 {
318 struct drm_panfrost_wait_bo req = {
319 .handle = bo->handle,
320 .timeout_ns = timeout_ns,
321 };
322
323 /* The ioctl returns >= 0 value when the BO we are waiting for is ready
324 * -1 otherwise.
325 */
326 if (drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req) != -1)
327 return true;
328
329 assert(errno == ETIMEDOUT || errno == EBUSY);
330 return false;
331 }
332
333 static void
panfrost_kmod_bo_make_evictable(struct pan_kmod_bo * bo)334 panfrost_kmod_bo_make_evictable(struct pan_kmod_bo *bo)
335 {
336 struct drm_panfrost_madvise req = {
337 .handle = bo->handle,
338 .madv = PANFROST_MADV_DONTNEED,
339 };
340
341 drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MADVISE, &req);
342 }
343
344 static bool
panfrost_kmod_bo_make_unevictable(struct pan_kmod_bo * bo)345 panfrost_kmod_bo_make_unevictable(struct pan_kmod_bo *bo)
346 {
347 struct drm_panfrost_madvise req = {
348 .handle = bo->handle,
349 .madv = PANFROST_MADV_WILLNEED,
350 };
351
352 if (drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MADVISE, &req) == 0 &&
353 req.retained == 0)
354 return false;
355
356 return true;
357 }
358
359 /* The VA range is restricted by the kernel driver. Lower 32MB are reserved, and
360 * the address space is limited to 32-bit.
361 */
362 #define PANFROST_KMOD_VA_START 0x2000000ull
363 #define PANFROST_KMOD_VA_END (1ull << 32)
364
365 static struct pan_kmod_va_range
panfrost_kmod_dev_query_user_va_range(const struct pan_kmod_dev * dev)366 panfrost_kmod_dev_query_user_va_range(const struct pan_kmod_dev *dev)
367 {
368 return (struct pan_kmod_va_range){
369 .start = PANFROST_KMOD_VA_START,
370 .size = PANFROST_KMOD_VA_END - PANFROST_KMOD_VA_START,
371 };
372 }
373
374 static struct pan_kmod_vm *
panfrost_kmod_vm_create(struct pan_kmod_dev * dev,uint32_t flags,uint64_t va_start,uint64_t va_range)375 panfrost_kmod_vm_create(struct pan_kmod_dev *dev, uint32_t flags,
376 uint64_t va_start, uint64_t va_range)
377 {
378 struct panfrost_kmod_dev *panfrost_dev =
379 container_of(dev, struct panfrost_kmod_dev, base);
380
381 /* Only one VM per device. */
382 if (panfrost_dev->vm) {
383 mesa_loge("panfrost_kmod only supports one VM per device");
384 return NULL;
385 }
386
387 /* Panfrost kernel driver doesn't support userspace VA management. */
388 if (!(flags & PAN_KMOD_VM_FLAG_AUTO_VA)) {
389 mesa_loge("panfrost_kmod only supports PAN_KMOD_VM_FLAG_AUTO_VA");
390 assert(0);
391 return NULL;
392 }
393
394 struct panfrost_kmod_vm *vm = pan_kmod_dev_alloc(dev, sizeof(*vm));
395 if (!vm) {
396 mesa_loge("failed to allocate a panfrost_kmod_vm object");
397 return NULL;
398 }
399
400 pan_kmod_vm_init(&vm->base, dev, 0, flags);
401 panfrost_dev->vm = vm;
402 return &vm->base;
403 }
404
405 static void
panfrost_kmod_vm_destroy(struct pan_kmod_vm * vm)406 panfrost_kmod_vm_destroy(struct pan_kmod_vm *vm)
407 {
408 struct panfrost_kmod_dev *panfrost_dev =
409 container_of(vm->dev, struct panfrost_kmod_dev, base);
410
411 panfrost_dev->vm = NULL;
412 pan_kmod_dev_free(vm->dev, vm);
413 }
414
415 static int
panfrost_kmod_vm_bind(struct pan_kmod_vm * vm,enum pan_kmod_vm_op_mode mode,struct pan_kmod_vm_op * ops,uint32_t op_count)416 panfrost_kmod_vm_bind(struct pan_kmod_vm *vm, enum pan_kmod_vm_op_mode mode,
417 struct pan_kmod_vm_op *ops, uint32_t op_count)
418 {
419 UNUSED struct panfrost_kmod_vm *panfrost_vm =
420 container_of(vm, struct panfrost_kmod_vm, base);
421
422 /* We only support IMMEDIATE and WAIT_IDLE mode. Actually we always do
423 * WAIT_IDLE in practice, but it shouldn't matter.
424 */
425 if (mode != PAN_KMOD_VM_OP_MODE_IMMEDIATE &&
426 mode != PAN_KMOD_VM_OP_MODE_DEFER_TO_NEXT_IDLE_POINT) {
427 mesa_loge("panfrost_kmod doesn't support mode=%d", mode);
428 assert(0);
429 return -1;
430 }
431
432 for (uint32_t i = 0; i < op_count; i++) {
433
434 if (ops[i].type == PAN_KMOD_VM_OP_TYPE_MAP) {
435 struct panfrost_kmod_bo *panfrost_bo =
436 container_of(ops[i].map.bo, struct panfrost_kmod_bo, base);
437
438 /* Panfrost kernel driver doesn't support userspace VA management. */
439 if (ops[i].va.start != PAN_KMOD_VM_MAP_AUTO_VA) {
440 mesa_loge("panfrost_kmod can only do auto-VA allocation");
441 assert(0);
442 return -1;
443 }
444
445 /* Panfrost kernel driver only support full BO mapping. */
446 if (ops[i].map.bo_offset != 0 ||
447 ops[i].va.size != ops[i].map.bo->size) {
448 mesa_loge("panfrost_kmod doesn't support partial BO mapping");
449 assert(0);
450 return -1;
451 }
452
453 ops[i].va.start = panfrost_bo->offset;
454 } else if (ops[i].type == PAN_KMOD_VM_OP_TYPE_UNMAP) {
455 /* Do nothing, unmapping is done at BO destruction time. */
456 } else {
457 /* We reject PAN_KMOD_VM_OP_TYPE_SYNC_ONLY as this implies
458 * supporting PAN_KMOD_VM_OP_MODE_ASYNC, which we don't support.
459 */
460 mesa_loge("panfrost_kmod doesn't support op=%d", ops[i].type);
461 assert(0);
462 return -1;
463 }
464 }
465
466 return 0;
467 }
468
469 static uint64_t
panfrost_kmod_query_timestamp(const struct pan_kmod_dev * dev)470 panfrost_kmod_query_timestamp(const struct pan_kmod_dev *dev)
471 {
472 return panfrost_query_raw(dev->fd, DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP,
473 false, 0);
474 }
475
476 const struct pan_kmod_ops panfrost_kmod_ops = {
477 .dev_create = panfrost_kmod_dev_create,
478 .dev_destroy = panfrost_kmod_dev_destroy,
479 .dev_query_props = panfrost_dev_query_props,
480 .dev_query_user_va_range = panfrost_kmod_dev_query_user_va_range,
481 .bo_alloc = panfrost_kmod_bo_alloc,
482 .bo_free = panfrost_kmod_bo_free,
483 .bo_import = panfrost_kmod_bo_import,
484 .bo_get_mmap_offset = panfrost_kmod_bo_get_mmap_offset,
485 .bo_wait = panfrost_kmod_bo_wait,
486 .bo_make_evictable = panfrost_kmod_bo_make_evictable,
487 .bo_make_unevictable = panfrost_kmod_bo_make_unevictable,
488 .vm_create = panfrost_kmod_vm_create,
489 .vm_destroy = panfrost_kmod_vm_destroy,
490 .vm_bind = panfrost_kmod_vm_bind,
491 .query_timestamp = panfrost_kmod_query_timestamp,
492 };
493