1
2 #include "util/u_inlines.h"
3 #include "util/u_memory.h"
4 #include "util/u_math.h"
5 #include "util/u_surface.h"
6
7 #include "nouveau_screen.h"
8 #include "nouveau_context.h"
9 #include "nouveau_winsys.h"
10 #include "nouveau_fence.h"
11 #include "nouveau_buffer.h"
12 #include "nouveau_mm.h"
13
14 struct nouveau_transfer {
15 struct pipe_transfer base;
16
17 uint8_t *map;
18 struct nouveau_bo *bo;
19 struct nouveau_mm_allocation *mm;
20 uint32_t offset;
21 };
22
23 static void *
24 nouveau_user_ptr_transfer_map(struct pipe_context *pipe,
25 struct pipe_resource *resource,
26 unsigned level, unsigned usage,
27 const struct pipe_box *box,
28 struct pipe_transfer **ptransfer);
29
30 static void
31 nouveau_user_ptr_transfer_unmap(struct pipe_context *pipe,
32 struct pipe_transfer *transfer);
33
34 static inline struct nouveau_transfer *
nouveau_transfer(struct pipe_transfer * transfer)35 nouveau_transfer(struct pipe_transfer *transfer)
36 {
37 return (struct nouveau_transfer *)transfer;
38 }
39
40 static inline bool
nouveau_buffer_malloc(struct nv04_resource * buf)41 nouveau_buffer_malloc(struct nv04_resource *buf)
42 {
43 if (!buf->data)
44 buf->data = align_malloc(buf->base.width0, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
45 return !!buf->data;
46 }
47
48 static inline bool
nouveau_buffer_allocate(struct nouveau_screen * screen,struct nv04_resource * buf,unsigned domain)49 nouveau_buffer_allocate(struct nouveau_screen *screen,
50 struct nv04_resource *buf, unsigned domain)
51 {
52 uint32_t size = align(buf->base.width0, 0x100);
53
54 if (domain == NOUVEAU_BO_VRAM) {
55 buf->mm = nouveau_mm_allocate(screen->mm_VRAM, size,
56 &buf->bo, &buf->offset);
57 if (!buf->bo)
58 return nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_GART);
59 NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_vid, buf->base.width0);
60 } else
61 if (domain == NOUVEAU_BO_GART) {
62 buf->mm = nouveau_mm_allocate(screen->mm_GART, size,
63 &buf->bo, &buf->offset);
64 if (!buf->bo)
65 return false;
66 NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_sys, buf->base.width0);
67 } else {
68 assert(domain == 0);
69 if (!nouveau_buffer_malloc(buf))
70 return false;
71 }
72 buf->domain = domain;
73 if (buf->bo)
74 buf->address = buf->bo->offset + buf->offset;
75
76 util_range_set_empty(&buf->valid_buffer_range);
77
78 return true;
79 }
80
81 static inline void
release_allocation(struct nouveau_mm_allocation ** mm,struct nouveau_fence * fence)82 release_allocation(struct nouveau_mm_allocation **mm,
83 struct nouveau_fence *fence)
84 {
85 nouveau_fence_work(fence, nouveau_mm_free_work, *mm);
86 (*mm) = NULL;
87 }
88
89 inline void
nouveau_buffer_release_gpu_storage(struct nv04_resource * buf)90 nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
91 {
92 assert(!(buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
93
94 nouveau_fence_work(buf->fence, nouveau_fence_unref_bo, buf->bo);
95 buf->bo = NULL;
96
97 if (buf->mm)
98 release_allocation(&buf->mm, buf->fence);
99
100 if (buf->domain == NOUVEAU_BO_VRAM)
101 NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_vid, -(uint64_t)buf->base.width0);
102 if (buf->domain == NOUVEAU_BO_GART)
103 NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_sys, -(uint64_t)buf->base.width0);
104
105 buf->domain = 0;
106 }
107
108 static inline bool
nouveau_buffer_reallocate(struct nouveau_screen * screen,struct nv04_resource * buf,unsigned domain)109 nouveau_buffer_reallocate(struct nouveau_screen *screen,
110 struct nv04_resource *buf, unsigned domain)
111 {
112 nouveau_buffer_release_gpu_storage(buf);
113
114 nouveau_fence_ref(NULL, &buf->fence);
115 nouveau_fence_ref(NULL, &buf->fence_wr);
116
117 buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
118
119 return nouveau_buffer_allocate(screen, buf, domain);
120 }
121
122 void
nouveau_buffer_destroy(struct pipe_screen * pscreen,struct pipe_resource * presource)123 nouveau_buffer_destroy(struct pipe_screen *pscreen,
124 struct pipe_resource *presource)
125 {
126 struct nv04_resource *res = nv04_resource(presource);
127
128 if (res->status & NOUVEAU_BUFFER_STATUS_USER_PTR) {
129 FREE(res);
130 return;
131 }
132
133 nouveau_buffer_release_gpu_storage(res);
134
135 if (res->data && !(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY))
136 align_free(res->data);
137
138 nouveau_fence_ref(NULL, &res->fence);
139 nouveau_fence_ref(NULL, &res->fence_wr);
140
141 util_range_destroy(&res->valid_buffer_range);
142
143 FREE(res);
144
145 NOUVEAU_DRV_STAT(nouveau_screen(pscreen), buf_obj_current_count, -1);
146 }
147
148 /* Set up a staging area for the transfer. This is either done in "regular"
149 * system memory if the driver supports push_data (nv50+) and the data is
150 * small enough (and permit_pb == true), or in GART memory.
151 */
152 static uint8_t *
nouveau_transfer_staging(struct nouveau_context * nv,struct nouveau_transfer * tx,bool permit_pb)153 nouveau_transfer_staging(struct nouveau_context *nv,
154 struct nouveau_transfer *tx, bool permit_pb)
155 {
156 const unsigned adj = tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK;
157 const unsigned size = align(tx->base.box.width, 4) + adj;
158
159 if (!nv->push_data)
160 permit_pb = false;
161
162 if ((size <= nv->screen->transfer_pushbuf_threshold) && permit_pb) {
163 tx->map = align_malloc(size, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
164 if (tx->map)
165 tx->map += adj;
166 } else {
167 tx->mm =
168 nouveau_mm_allocate(nv->screen->mm_GART, size, &tx->bo, &tx->offset);
169 if (tx->bo) {
170 tx->offset += adj;
171 if (!BO_MAP(nv->screen, tx->bo, 0, NULL))
172 tx->map = (uint8_t *)tx->bo->map + tx->offset;
173 }
174 }
175 return tx->map;
176 }
177
178 /* Copies data from the resource into the transfer's temporary GART
179 * buffer. Also updates buf->data if present.
180 *
181 * Maybe just migrate to GART right away if we actually need to do this. */
182 static bool
nouveau_transfer_read(struct nouveau_context * nv,struct nouveau_transfer * tx)183 nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx)
184 {
185 struct nv04_resource *buf = nv04_resource(tx->base.resource);
186 const unsigned base = tx->base.box.x;
187 const unsigned size = tx->base.box.width;
188
189 NOUVEAU_DRV_STAT(nv->screen, buf_read_bytes_staging_vid, size);
190
191 nv->copy_data(nv, tx->bo, tx->offset, NOUVEAU_BO_GART,
192 buf->bo, buf->offset + base, buf->domain, size);
193
194 if (BO_WAIT(nv->screen, tx->bo, NOUVEAU_BO_RD, nv->client))
195 return false;
196
197 if (buf->data)
198 memcpy(buf->data + base, tx->map, size);
199
200 return true;
201 }
202
203 static void
nouveau_transfer_write(struct nouveau_context * nv,struct nouveau_transfer * tx,unsigned offset,unsigned size)204 nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
205 unsigned offset, unsigned size)
206 {
207 struct nv04_resource *buf = nv04_resource(tx->base.resource);
208 uint8_t *data = tx->map + offset;
209 const unsigned base = tx->base.box.x + offset;
210 const bool can_cb = !((base | size) & 3);
211
212 if (buf->data)
213 memcpy(data, buf->data + base, size);
214 else
215 buf->status |= NOUVEAU_BUFFER_STATUS_DIRTY;
216
217 if (buf->domain == NOUVEAU_BO_VRAM)
218 NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_vid, size);
219 if (buf->domain == NOUVEAU_BO_GART)
220 NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_sys, size);
221
222 if (tx->bo)
223 nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
224 tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
225 else
226 if (nv->push_cb && can_cb)
227 nv->push_cb(nv, buf,
228 base, size / 4, (const uint32_t *)data);
229 else
230 nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data);
231
232 nouveau_fence_ref(nv->fence, &buf->fence);
233 nouveau_fence_ref(nv->fence, &buf->fence_wr);
234 }
235
236 /* Does a CPU wait for the buffer's backing data to become reliably accessible
237 * for write/read by waiting on the buffer's relevant fences.
238 */
239 static inline bool
nouveau_buffer_sync(struct nouveau_context * nv,struct nv04_resource * buf,unsigned rw)240 nouveau_buffer_sync(struct nouveau_context *nv,
241 struct nv04_resource *buf, unsigned rw)
242 {
243 if (rw == PIPE_MAP_READ) {
244 if (!buf->fence_wr)
245 return true;
246 NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
247 !nouveau_fence_signalled(buf->fence_wr));
248 if (!nouveau_fence_wait(buf->fence_wr, &nv->debug))
249 return false;
250 } else {
251 if (!buf->fence)
252 return true;
253 NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
254 !nouveau_fence_signalled(buf->fence));
255 if (!nouveau_fence_wait(buf->fence, &nv->debug))
256 return false;
257
258 nouveau_fence_ref(NULL, &buf->fence);
259 }
260 nouveau_fence_ref(NULL, &buf->fence_wr);
261
262 return true;
263 }
264
265 static inline bool
nouveau_buffer_busy(struct nv04_resource * buf,unsigned rw)266 nouveau_buffer_busy(struct nv04_resource *buf, unsigned rw)
267 {
268 if (rw == PIPE_MAP_READ)
269 return (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr));
270 else
271 return (buf->fence && !nouveau_fence_signalled(buf->fence));
272 }
273
274 static inline void
nouveau_buffer_transfer_init(struct nouveau_transfer * tx,struct pipe_resource * resource,const struct pipe_box * box,unsigned usage)275 nouveau_buffer_transfer_init(struct nouveau_transfer *tx,
276 struct pipe_resource *resource,
277 const struct pipe_box *box,
278 unsigned usage)
279 {
280 tx->base.resource = resource;
281 tx->base.level = 0;
282 tx->base.usage = usage;
283 tx->base.box.x = box->x;
284 tx->base.box.y = 0;
285 tx->base.box.z = 0;
286 tx->base.box.width = box->width;
287 tx->base.box.height = 1;
288 tx->base.box.depth = 1;
289 tx->base.stride = 0;
290 tx->base.layer_stride = 0;
291
292 tx->bo = NULL;
293 tx->map = NULL;
294 }
295
296 static inline void
nouveau_buffer_transfer_del(struct nouveau_context * nv,struct nouveau_transfer * tx)297 nouveau_buffer_transfer_del(struct nouveau_context *nv,
298 struct nouveau_transfer *tx)
299 {
300 if (tx->map) {
301 if (likely(tx->bo)) {
302 nouveau_fence_work(nv->fence, nouveau_fence_unref_bo, tx->bo);
303 if (tx->mm)
304 release_allocation(&tx->mm, nv->fence);
305 } else {
306 align_free(tx->map -
307 (tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK));
308 }
309 }
310 }
311
312 /* Creates a cache in system memory of the buffer data. */
313 static bool
nouveau_buffer_cache(struct nouveau_context * nv,struct nv04_resource * buf)314 nouveau_buffer_cache(struct nouveau_context *nv, struct nv04_resource *buf)
315 {
316 struct nouveau_transfer tx;
317 bool ret;
318 tx.base.resource = &buf->base;
319 tx.base.box.x = 0;
320 tx.base.box.width = buf->base.width0;
321 tx.bo = NULL;
322 tx.map = NULL;
323
324 if (!buf->data)
325 if (!nouveau_buffer_malloc(buf))
326 return false;
327 if (!(buf->status & NOUVEAU_BUFFER_STATUS_DIRTY))
328 return true;
329 nv->stats.buf_cache_count++;
330
331 if (!nouveau_transfer_staging(nv, &tx, false))
332 return false;
333
334 ret = nouveau_transfer_read(nv, &tx);
335 if (ret) {
336 buf->status &= ~NOUVEAU_BUFFER_STATUS_DIRTY;
337 memcpy(buf->data, tx.map, buf->base.width0);
338 }
339 nouveau_buffer_transfer_del(nv, &tx);
340 return ret;
341 }
342
343
344 #define NOUVEAU_TRANSFER_DISCARD \
345 (PIPE_MAP_DISCARD_RANGE | PIPE_MAP_DISCARD_WHOLE_RESOURCE)
346
347 /* Checks whether it is possible to completely discard the memory backing this
348 * resource. This can be useful if we would otherwise have to wait for a read
349 * operation to complete on this data.
350 */
351 static inline bool
nouveau_buffer_should_discard(struct nv04_resource * buf,unsigned usage)352 nouveau_buffer_should_discard(struct nv04_resource *buf, unsigned usage)
353 {
354 if (!(usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE))
355 return false;
356 if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
357 return false;
358 if (unlikely(usage & PIPE_MAP_PERSISTENT))
359 return false;
360 return buf->mm && nouveau_buffer_busy(buf, PIPE_MAP_WRITE);
361 }
362
363 /* Returns a pointer to a memory area representing a window into the
364 * resource's data.
365 *
366 * This may or may not be the _actual_ memory area of the resource. However
367 * when calling nouveau_buffer_transfer_unmap, if it wasn't the actual memory
368 * area, the contents of the returned map are copied over to the resource.
369 *
370 * The usage indicates what the caller plans to do with the map:
371 *
372 * WRITE means that the user plans to write to it
373 *
374 * READ means that the user plans on reading from it
375 *
376 * DISCARD_WHOLE_RESOURCE means that the whole resource is going to be
377 * potentially overwritten, and even if it isn't, the bits that aren't don't
378 * need to be maintained.
379 *
380 * DISCARD_RANGE means that all the data in the specified range is going to
381 * be overwritten.
382 *
383 * The strategy for determining what kind of memory area to return is complex,
384 * see comments inside of the function.
385 */
386 void *
nouveau_buffer_transfer_map(struct pipe_context * pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** ptransfer)387 nouveau_buffer_transfer_map(struct pipe_context *pipe,
388 struct pipe_resource *resource,
389 unsigned level, unsigned usage,
390 const struct pipe_box *box,
391 struct pipe_transfer **ptransfer)
392 {
393 struct nouveau_context *nv = nouveau_context(pipe);
394 struct nv04_resource *buf = nv04_resource(resource);
395
396 if (buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR)
397 return nouveau_user_ptr_transfer_map(pipe, resource, level, usage, box, ptransfer);
398
399 struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer);
400 uint8_t *map;
401 int ret;
402
403 if (!tx)
404 return NULL;
405 nouveau_buffer_transfer_init(tx, resource, box, usage);
406 *ptransfer = &tx->base;
407
408 if (usage & PIPE_MAP_READ)
409 NOUVEAU_DRV_STAT(nv->screen, buf_transfers_rd, 1);
410 if (usage & PIPE_MAP_WRITE)
411 NOUVEAU_DRV_STAT(nv->screen, buf_transfers_wr, 1);
412
413 /* If we are trying to write to an uninitialized range, the user shouldn't
414 * care what was there before. So we can treat the write as if the target
415 * range were being discarded. Furthermore, since we know that even if this
416 * buffer is busy due to GPU activity, because the contents were
417 * uninitialized, the GPU can't care what was there, and so we can treat
418 * the write as being unsynchronized.
419 */
420 if ((usage & PIPE_MAP_WRITE) &&
421 !util_ranges_intersect(&buf->valid_buffer_range, box->x, box->x + box->width))
422 usage |= PIPE_MAP_DISCARD_RANGE | PIPE_MAP_UNSYNCHRONIZED;
423
424 if (buf->domain == NOUVEAU_BO_VRAM) {
425 if (usage & NOUVEAU_TRANSFER_DISCARD) {
426 /* Set up a staging area for the user to write to. It will be copied
427 * back into VRAM on unmap. */
428 if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
429 buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
430 nouveau_transfer_staging(nv, tx, true);
431 } else {
432 if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
433 /* The GPU is currently writing to this buffer. Copy its current
434 * contents to a staging area in the GART. This is necessary since
435 * not the whole area being mapped is being discarded.
436 */
437 if (buf->data) {
438 align_free(buf->data);
439 buf->data = NULL;
440 }
441 nouveau_transfer_staging(nv, tx, false);
442 nouveau_transfer_read(nv, tx);
443 } else {
444 /* The buffer is currently idle. Create a staging area for writes,
445 * and make sure that the cached data is up-to-date. */
446 if (usage & PIPE_MAP_WRITE)
447 nouveau_transfer_staging(nv, tx, true);
448 if (!buf->data)
449 nouveau_buffer_cache(nv, buf);
450 }
451 }
452 return buf->data ? (buf->data + box->x) : tx->map;
453 } else
454 if (unlikely(buf->domain == 0)) {
455 return buf->data + box->x;
456 }
457
458 /* At this point, buf->domain == GART */
459
460 if (nouveau_buffer_should_discard(buf, usage)) {
461 int ref = buf->base.reference.count - 1;
462 nouveau_buffer_reallocate(nv->screen, buf, buf->domain);
463 if (ref > 0) /* any references inside context possible ? */
464 nv->invalidate_resource_storage(nv, &buf->base, ref);
465 }
466
467 /* Note that nouveau_bo_map ends up doing a nouveau_bo_wait with the
468 * relevant flags. If buf->mm is set, that means this resource is part of a
469 * larger slab bo that holds multiple resources. So in that case, don't
470 * wait on the whole slab and instead use the logic below to return a
471 * reasonable buffer for that case.
472 */
473 ret = BO_MAP(nv->screen, buf->bo,
474 buf->mm ? 0 : nouveau_screen_transfer_flags(usage),
475 nv->client);
476 if (ret) {
477 FREE(tx);
478 return NULL;
479 }
480 map = (uint8_t *)buf->bo->map + buf->offset + box->x;
481
482 /* using kernel fences only if !buf->mm */
483 if ((usage & PIPE_MAP_UNSYNCHRONIZED) || !buf->mm)
484 return map;
485
486 /* If the GPU is currently reading/writing this buffer, we shouldn't
487 * interfere with its progress. So instead we either wait for the GPU to
488 * complete its operation, or set up a staging area to perform our work in.
489 */
490 if (nouveau_buffer_busy(buf, usage & PIPE_MAP_READ_WRITE)) {
491 if (unlikely(usage & (PIPE_MAP_DISCARD_WHOLE_RESOURCE |
492 PIPE_MAP_PERSISTENT))) {
493 /* Discarding was not possible, must sync because
494 * subsequent transfers might use UNSYNCHRONIZED. */
495 nouveau_buffer_sync(nv, buf, usage & PIPE_MAP_READ_WRITE);
496 } else
497 if (usage & PIPE_MAP_DISCARD_RANGE) {
498 /* The whole range is being discarded, so it doesn't matter what was
499 * there before. No need to copy anything over. */
500 nouveau_transfer_staging(nv, tx, true);
501 map = tx->map;
502 } else
503 if (nouveau_buffer_busy(buf, PIPE_MAP_READ)) {
504 if (usage & PIPE_MAP_DONTBLOCK)
505 map = NULL;
506 else
507 nouveau_buffer_sync(nv, buf, usage & PIPE_MAP_READ_WRITE);
508 } else {
509 /* It is expected that the returned buffer be a representation of the
510 * data in question, so we must copy it over from the buffer. */
511 nouveau_transfer_staging(nv, tx, true);
512 if (tx->map)
513 memcpy(tx->map, map, box->width);
514 map = tx->map;
515 }
516 }
517 if (!map)
518 FREE(tx);
519 return map;
520 }
521
522
523
524 void
nouveau_buffer_transfer_flush_region(struct pipe_context * pipe,struct pipe_transfer * transfer,const struct pipe_box * box)525 nouveau_buffer_transfer_flush_region(struct pipe_context *pipe,
526 struct pipe_transfer *transfer,
527 const struct pipe_box *box)
528 {
529 struct nouveau_transfer *tx = nouveau_transfer(transfer);
530 struct nv04_resource *buf = nv04_resource(transfer->resource);
531
532 if (tx->map)
533 nouveau_transfer_write(nouveau_context(pipe), tx, box->x, box->width);
534
535 util_range_add(&buf->base, &buf->valid_buffer_range,
536 tx->base.box.x + box->x,
537 tx->base.box.x + box->x + box->width);
538 }
539
540 /* Unmap stage of the transfer. If it was a WRITE transfer and the map that
541 * was returned was not the real resource's data, this needs to transfer the
542 * data back to the resource.
543 *
544 * Also marks vbo dirty based on the buffer's binding
545 */
546 void
nouveau_buffer_transfer_unmap(struct pipe_context * pipe,struct pipe_transfer * transfer)547 nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
548 struct pipe_transfer *transfer)
549 {
550 struct nouveau_context *nv = nouveau_context(pipe);
551 struct nv04_resource *buf = nv04_resource(transfer->resource);
552
553 if (buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR)
554 return nouveau_user_ptr_transfer_unmap(pipe, transfer);
555
556 struct nouveau_transfer *tx = nouveau_transfer(transfer);
557
558 if (tx->base.usage & PIPE_MAP_WRITE) {
559 if (!(tx->base.usage & PIPE_MAP_FLUSH_EXPLICIT)) {
560 if (tx->map)
561 nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
562
563 util_range_add(&buf->base, &buf->valid_buffer_range,
564 tx->base.box.x, tx->base.box.x + tx->base.box.width);
565 }
566
567 if (likely(buf->domain)) {
568 const uint8_t bind = buf->base.bind;
569 /* make sure we invalidate dedicated caches */
570 if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
571 nv->vbo_dirty = true;
572 }
573 }
574
575 if (!tx->bo && (tx->base.usage & PIPE_MAP_WRITE))
576 NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_direct, tx->base.box.width);
577
578 nouveau_buffer_transfer_del(nv, tx);
579 FREE(tx);
580 }
581
582
583 void
nouveau_copy_buffer(struct nouveau_context * nv,struct nv04_resource * dst,unsigned dstx,struct nv04_resource * src,unsigned srcx,unsigned size)584 nouveau_copy_buffer(struct nouveau_context *nv,
585 struct nv04_resource *dst, unsigned dstx,
586 struct nv04_resource *src, unsigned srcx, unsigned size)
587 {
588 assert(dst->base.target == PIPE_BUFFER && src->base.target == PIPE_BUFFER);
589
590 if (likely(dst->domain) && likely(src->domain)) {
591 nv->copy_data(nv,
592 dst->bo, dst->offset + dstx, dst->domain,
593 src->bo, src->offset + srcx, src->domain, size);
594
595 dst->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
596 nouveau_fence_ref(nv->fence, &dst->fence);
597 nouveau_fence_ref(nv->fence, &dst->fence_wr);
598
599 src->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
600 nouveau_fence_ref(nv->fence, &src->fence);
601 } else {
602 struct pipe_box src_box;
603 src_box.x = srcx;
604 src_box.y = 0;
605 src_box.z = 0;
606 src_box.width = size;
607 src_box.height = 1;
608 src_box.depth = 1;
609 util_resource_copy_region(&nv->pipe,
610 &dst->base, 0, dstx, 0, 0,
611 &src->base, 0, &src_box);
612 }
613
614 util_range_add(&dst->base, &dst->valid_buffer_range, dstx, dstx + size);
615 }
616
617
618 void *
nouveau_resource_map_offset(struct nouveau_context * nv,struct nv04_resource * res,uint32_t offset,uint32_t flags)619 nouveau_resource_map_offset(struct nouveau_context *nv,
620 struct nv04_resource *res, uint32_t offset,
621 uint32_t flags)
622 {
623 if (unlikely(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY) ||
624 unlikely(res->status & NOUVEAU_BUFFER_STATUS_USER_PTR))
625 return res->data + offset;
626
627 if (res->domain == NOUVEAU_BO_VRAM) {
628 if (!res->data || (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING))
629 nouveau_buffer_cache(nv, res);
630 }
631 if (res->domain != NOUVEAU_BO_GART)
632 return res->data + offset;
633
634 if (res->mm) {
635 unsigned rw;
636 rw = (flags & NOUVEAU_BO_WR) ? PIPE_MAP_WRITE : PIPE_MAP_READ;
637 nouveau_buffer_sync(nv, res, rw);
638 if (BO_MAP(nv->screen, res->bo, 0, NULL))
639 return NULL;
640 } else {
641 if (BO_MAP(nv->screen, res->bo, flags, nv->client))
642 return NULL;
643 }
644 return (uint8_t *)res->bo->map + res->offset + offset;
645 }
646
647 static void *
nouveau_user_ptr_transfer_map(struct pipe_context * pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** ptransfer)648 nouveau_user_ptr_transfer_map(struct pipe_context *pipe,
649 struct pipe_resource *resource,
650 unsigned level, unsigned usage,
651 const struct pipe_box *box,
652 struct pipe_transfer **ptransfer)
653 {
654 struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer);
655 if (!tx)
656 return NULL;
657 nouveau_buffer_transfer_init(tx, resource, box, usage);
658 *ptransfer = &tx->base;
659 return nv04_resource(resource)->data;
660 }
661
662 static void
nouveau_user_ptr_transfer_unmap(struct pipe_context * pipe,struct pipe_transfer * transfer)663 nouveau_user_ptr_transfer_unmap(struct pipe_context *pipe,
664 struct pipe_transfer *transfer)
665 {
666 struct nouveau_transfer *tx = nouveau_transfer(transfer);
667 FREE(tx);
668 }
669
670 struct pipe_resource *
nouveau_buffer_create(struct pipe_screen * pscreen,const struct pipe_resource * templ)671 nouveau_buffer_create(struct pipe_screen *pscreen,
672 const struct pipe_resource *templ)
673 {
674 struct nouveau_screen *screen = nouveau_screen(pscreen);
675 struct nv04_resource *buffer;
676 bool ret;
677
678 buffer = CALLOC_STRUCT(nv04_resource);
679 if (!buffer)
680 return NULL;
681
682 buffer->base = *templ;
683 pipe_reference_init(&buffer->base.reference, 1);
684 buffer->base.screen = pscreen;
685
686 if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
687 PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
688 buffer->domain = NOUVEAU_BO_GART;
689 } else if (buffer->base.bind == 0 || (buffer->base.bind &
690 (screen->vidmem_bindings & screen->sysmem_bindings))) {
691 switch (buffer->base.usage) {
692 case PIPE_USAGE_DEFAULT:
693 case PIPE_USAGE_IMMUTABLE:
694 buffer->domain = NV_VRAM_DOMAIN(screen);
695 break;
696 case PIPE_USAGE_DYNAMIC:
697 /* For most apps, we'd have to do staging transfers to avoid sync
698 * with this usage, and GART -> GART copies would be suboptimal.
699 */
700 buffer->domain = NV_VRAM_DOMAIN(screen);
701 break;
702 case PIPE_USAGE_STAGING:
703 case PIPE_USAGE_STREAM:
704 buffer->domain = NOUVEAU_BO_GART;
705 break;
706 default:
707 assert(0);
708 break;
709 }
710 } else {
711 if (buffer->base.bind & screen->vidmem_bindings)
712 buffer->domain = NV_VRAM_DOMAIN(screen);
713 else
714 if (buffer->base.bind & screen->sysmem_bindings)
715 buffer->domain = NOUVEAU_BO_GART;
716 }
717
718 ret = nouveau_buffer_allocate(screen, buffer, buffer->domain);
719
720 if (ret == false)
721 goto fail;
722
723 if (buffer->domain == NOUVEAU_BO_VRAM && screen->hint_buf_keep_sysmem_copy)
724 nouveau_buffer_cache(NULL, buffer);
725
726 NOUVEAU_DRV_STAT(screen, buf_obj_current_count, 1);
727
728 util_range_init(&buffer->valid_buffer_range);
729
730 return &buffer->base;
731
732 fail:
733 FREE(buffer);
734 return NULL;
735 }
736
737 struct pipe_resource *
nouveau_buffer_create_from_user(struct pipe_screen * pscreen,const struct pipe_resource * templ,void * user_ptr)738 nouveau_buffer_create_from_user(struct pipe_screen *pscreen,
739 const struct pipe_resource *templ,
740 void *user_ptr)
741 {
742 struct nv04_resource *buffer;
743
744 buffer = CALLOC_STRUCT(nv04_resource);
745 if (!buffer)
746 return NULL;
747
748 buffer->base = *templ;
749 /* set address and data to the same thing for higher compatibility with
750 * existing code. It's correct nonetheless as the same pointer is equally
751 * valid on the CPU and the GPU.
752 */
753 buffer->address = (uintptr_t)user_ptr;
754 buffer->data = user_ptr;
755 buffer->status = NOUVEAU_BUFFER_STATUS_USER_PTR;
756 buffer->base.screen = pscreen;
757
758 pipe_reference_init(&buffer->base.reference, 1);
759
760 return &buffer->base;
761 }
762
763 struct pipe_resource *
nouveau_user_buffer_create(struct pipe_screen * pscreen,void * ptr,unsigned bytes,unsigned bind)764 nouveau_user_buffer_create(struct pipe_screen *pscreen, void *ptr,
765 unsigned bytes, unsigned bind)
766 {
767 struct nv04_resource *buffer;
768
769 buffer = CALLOC_STRUCT(nv04_resource);
770 if (!buffer)
771 return NULL;
772
773 pipe_reference_init(&buffer->base.reference, 1);
774 buffer->base.screen = pscreen;
775 buffer->base.format = PIPE_FORMAT_R8_UNORM;
776 buffer->base.usage = PIPE_USAGE_IMMUTABLE;
777 buffer->base.bind = bind;
778 buffer->base.width0 = bytes;
779 buffer->base.height0 = 1;
780 buffer->base.depth0 = 1;
781
782 buffer->data = ptr;
783 buffer->status = NOUVEAU_BUFFER_STATUS_USER_MEMORY;
784
785 util_range_init(&buffer->valid_buffer_range);
786 util_range_add(&buffer->base, &buffer->valid_buffer_range, 0, bytes);
787
788 return &buffer->base;
789 }
790
791 static inline bool
nouveau_buffer_data_fetch(struct nouveau_context * nv,struct nv04_resource * buf,struct nouveau_bo * bo,unsigned offset,unsigned size)792 nouveau_buffer_data_fetch(struct nouveau_context *nv, struct nv04_resource *buf,
793 struct nouveau_bo *bo, unsigned offset, unsigned size)
794 {
795 if (!nouveau_buffer_malloc(buf))
796 return false;
797 if (BO_MAP(nv->screen, bo, NOUVEAU_BO_RD, nv->client))
798 return false;
799 memcpy(buf->data, (uint8_t *)bo->map + offset, size);
800 return true;
801 }
802
803 /* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */
804 bool
nouveau_buffer_migrate(struct nouveau_context * nv,struct nv04_resource * buf,const unsigned new_domain)805 nouveau_buffer_migrate(struct nouveau_context *nv,
806 struct nv04_resource *buf, const unsigned new_domain)
807 {
808 assert(!(buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
809
810 struct nouveau_screen *screen = nv->screen;
811 struct nouveau_bo *bo;
812 const unsigned old_domain = buf->domain;
813 unsigned size = buf->base.width0;
814 unsigned offset;
815 int ret;
816
817 assert(new_domain != old_domain);
818
819 if (new_domain == NOUVEAU_BO_GART && old_domain == 0) {
820 if (!nouveau_buffer_allocate(screen, buf, new_domain))
821 return false;
822 ret = BO_MAP(nv->screen, buf->bo, 0, nv->client);
823 if (ret)
824 return ret;
825 memcpy((uint8_t *)buf->bo->map + buf->offset, buf->data, size);
826 align_free(buf->data);
827 } else
828 if (old_domain != 0 && new_domain != 0) {
829 struct nouveau_mm_allocation *mm = buf->mm;
830
831 if (new_domain == NOUVEAU_BO_VRAM) {
832 /* keep a system memory copy of our data in case we hit a fallback */
833 if (!nouveau_buffer_data_fetch(nv, buf, buf->bo, buf->offset, size))
834 return false;
835 if (nouveau_mesa_debug)
836 debug_printf("migrating %u KiB to VRAM\n", size / 1024);
837 }
838
839 offset = buf->offset;
840 bo = buf->bo;
841 buf->bo = NULL;
842 buf->mm = NULL;
843 nouveau_buffer_allocate(screen, buf, new_domain);
844
845 nv->copy_data(nv, buf->bo, buf->offset, new_domain,
846 bo, offset, old_domain, buf->base.width0);
847
848 nouveau_fence_work(nv->fence, nouveau_fence_unref_bo, bo);
849 if (mm)
850 release_allocation(&mm, nv->fence);
851 } else
852 if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) {
853 struct nouveau_transfer tx;
854 if (!nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM))
855 return false;
856 tx.base.resource = &buf->base;
857 tx.base.box.x = 0;
858 tx.base.box.width = buf->base.width0;
859 tx.bo = NULL;
860 tx.map = NULL;
861 if (!nouveau_transfer_staging(nv, &tx, false))
862 return false;
863 nouveau_transfer_write(nv, &tx, 0, tx.base.box.width);
864 nouveau_buffer_transfer_del(nv, &tx);
865 } else
866 return false;
867
868 assert(buf->domain == new_domain);
869 return true;
870 }
871
872 /* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART.
873 * We'd like to only allocate @size bytes here, but then we'd have to rebase
874 * the vertex indices ...
875 */
876 bool
nouveau_user_buffer_upload(struct nouveau_context * nv,struct nv04_resource * buf,unsigned base,unsigned size)877 nouveau_user_buffer_upload(struct nouveau_context *nv,
878 struct nv04_resource *buf,
879 unsigned base, unsigned size)
880 {
881 assert(!(buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
882
883 struct nouveau_screen *screen = nouveau_screen(buf->base.screen);
884 int ret;
885
886 assert(buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY);
887
888 buf->base.width0 = base + size;
889 if (!nouveau_buffer_reallocate(screen, buf, NOUVEAU_BO_GART))
890 return false;
891
892 ret = BO_MAP(nv->screen, buf->bo, 0, nv->client);
893 if (ret)
894 return false;
895 memcpy((uint8_t *)buf->bo->map + buf->offset + base, buf->data + base, size);
896
897 return true;
898 }
899
900 /* Invalidate underlying buffer storage, reset fences, reallocate to non-busy
901 * buffer.
902 */
903 void
nouveau_buffer_invalidate(struct pipe_context * pipe,struct pipe_resource * resource)904 nouveau_buffer_invalidate(struct pipe_context *pipe,
905 struct pipe_resource *resource)
906 {
907 struct nouveau_context *nv = nouveau_context(pipe);
908 struct nv04_resource *buf = nv04_resource(resource);
909 int ref = buf->base.reference.count - 1;
910
911 assert(!(buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
912
913 /* Shared buffers shouldn't get reallocated */
914 if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
915 return;
916
917 /* If the buffer is sub-allocated and not currently being written, just
918 * wipe the valid buffer range. Otherwise we have to create fresh
919 * storage. (We don't keep track of fences for non-sub-allocated BO's.)
920 */
921 if (buf->mm && !nouveau_buffer_busy(buf, PIPE_MAP_WRITE)) {
922 util_range_set_empty(&buf->valid_buffer_range);
923 } else {
924 nouveau_buffer_reallocate(nv->screen, buf, buf->domain);
925 if (ref > 0) /* any references inside context possible ? */
926 nv->invalidate_resource_storage(nv, &buf->base, ref);
927 }
928 }
929
930
931 /* Scratch data allocation. */
932
933 static inline int
nouveau_scratch_bo_alloc(struct nouveau_context * nv,struct nouveau_bo ** pbo,unsigned size)934 nouveau_scratch_bo_alloc(struct nouveau_context *nv, struct nouveau_bo **pbo,
935 unsigned size)
936 {
937 return nouveau_bo_new(nv->screen->device, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
938 4096, size, NULL, pbo);
939 }
940
941 static void
nouveau_scratch_unref_bos(void * d)942 nouveau_scratch_unref_bos(void *d)
943 {
944 struct runout *b = d;
945 int i;
946
947 for (i = 0; i < b->nr; ++i)
948 nouveau_bo_ref(NULL, &b->bo[i]);
949
950 FREE(b);
951 }
952
953 void
nouveau_scratch_runout_release(struct nouveau_context * nv)954 nouveau_scratch_runout_release(struct nouveau_context *nv)
955 {
956 if (!nv->scratch.runout)
957 return;
958
959 if (!nouveau_fence_work(nv->fence, nouveau_scratch_unref_bos,
960 nv->scratch.runout))
961 return;
962
963 nv->scratch.end = 0;
964 nv->scratch.runout = NULL;
965 }
966
967 /* Allocate an extra bo if we can't fit everything we need simultaneously.
968 * (Could happen for very large user arrays.)
969 */
970 static inline bool
nouveau_scratch_runout(struct nouveau_context * nv,unsigned size)971 nouveau_scratch_runout(struct nouveau_context *nv, unsigned size)
972 {
973 int ret;
974 unsigned n;
975
976 if (nv->scratch.runout)
977 n = nv->scratch.runout->nr;
978 else
979 n = 0;
980 nv->scratch.runout = REALLOC(nv->scratch.runout, n == 0 ? 0 :
981 (sizeof(*nv->scratch.runout) + (n + 0) * sizeof(void *)),
982 sizeof(*nv->scratch.runout) + (n + 1) * sizeof(void *));
983 nv->scratch.runout->nr = n + 1;
984 nv->scratch.runout->bo[n] = NULL;
985
986 ret = nouveau_scratch_bo_alloc(nv, &nv->scratch.runout->bo[n], size);
987 if (!ret) {
988 ret = BO_MAP(nv->screen, nv->scratch.runout->bo[n], 0, NULL);
989 if (ret)
990 nouveau_bo_ref(NULL, &nv->scratch.runout->bo[--nv->scratch.runout->nr]);
991 }
992 if (!ret) {
993 nv->scratch.current = nv->scratch.runout->bo[n];
994 nv->scratch.offset = 0;
995 nv->scratch.end = size;
996 nv->scratch.map = nv->scratch.current->map;
997 }
998 return !ret;
999 }
1000
1001 /* Continue to next scratch buffer, if available (no wrapping, large enough).
1002 * Allocate it if it has not yet been created.
1003 */
1004 static inline bool
nouveau_scratch_next(struct nouveau_context * nv,unsigned size)1005 nouveau_scratch_next(struct nouveau_context *nv, unsigned size)
1006 {
1007 struct nouveau_bo *bo;
1008 int ret;
1009 const unsigned i = (nv->scratch.id + 1) % NOUVEAU_MAX_SCRATCH_BUFS;
1010
1011 if ((size > nv->scratch.bo_size) || (i == nv->scratch.wrap))
1012 return false;
1013 nv->scratch.id = i;
1014
1015 bo = nv->scratch.bo[i];
1016 if (!bo) {
1017 ret = nouveau_scratch_bo_alloc(nv, &bo, nv->scratch.bo_size);
1018 if (ret)
1019 return false;
1020 nv->scratch.bo[i] = bo;
1021 }
1022 nv->scratch.current = bo;
1023 nv->scratch.offset = 0;
1024 nv->scratch.end = nv->scratch.bo_size;
1025
1026 ret = BO_MAP(nv->screen, bo, NOUVEAU_BO_WR, nv->client);
1027 if (!ret)
1028 nv->scratch.map = bo->map;
1029 return !ret;
1030 }
1031
1032 static bool
nouveau_scratch_more(struct nouveau_context * nv,unsigned min_size)1033 nouveau_scratch_more(struct nouveau_context *nv, unsigned min_size)
1034 {
1035 bool ret;
1036
1037 ret = nouveau_scratch_next(nv, min_size);
1038 if (!ret)
1039 ret = nouveau_scratch_runout(nv, min_size);
1040 return ret;
1041 }
1042
1043
1044 /* Copy data to a scratch buffer and return address & bo the data resides in. */
1045 uint64_t
nouveau_scratch_data(struct nouveau_context * nv,const void * data,unsigned base,unsigned size,struct nouveau_bo ** bo)1046 nouveau_scratch_data(struct nouveau_context *nv,
1047 const void *data, unsigned base, unsigned size,
1048 struct nouveau_bo **bo)
1049 {
1050 unsigned bgn = MAX2(base, nv->scratch.offset);
1051 unsigned end = bgn + size;
1052
1053 if (end >= nv->scratch.end) {
1054 end = base + size;
1055 if (!nouveau_scratch_more(nv, end))
1056 return 0;
1057 bgn = base;
1058 }
1059 nv->scratch.offset = align(end, 4);
1060
1061 memcpy(nv->scratch.map + bgn, (const uint8_t *)data + base, size);
1062
1063 *bo = nv->scratch.current;
1064 return (*bo)->offset + (bgn - base);
1065 }
1066
1067 void *
nouveau_scratch_get(struct nouveau_context * nv,unsigned size,uint64_t * gpu_addr,struct nouveau_bo ** pbo)1068 nouveau_scratch_get(struct nouveau_context *nv,
1069 unsigned size, uint64_t *gpu_addr, struct nouveau_bo **pbo)
1070 {
1071 unsigned bgn = nv->scratch.offset;
1072 unsigned end = nv->scratch.offset + size;
1073
1074 if (end >= nv->scratch.end) {
1075 end = size;
1076 if (!nouveau_scratch_more(nv, end))
1077 return NULL;
1078 bgn = 0;
1079 }
1080 nv->scratch.offset = align(end, 4);
1081
1082 *pbo = nv->scratch.current;
1083 *gpu_addr = nv->scratch.current->offset + bgn;
1084 return nv->scratch.map + bgn;
1085 }
1086