xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/nouveau/nouveau_buffer.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 
2 #include "util/u_inlines.h"
3 #include "util/u_memory.h"
4 #include "util/u_math.h"
5 #include "util/u_surface.h"
6 
7 #include "nouveau_screen.h"
8 #include "nouveau_context.h"
9 #include "nouveau_winsys.h"
10 #include "nouveau_fence.h"
11 #include "nouveau_buffer.h"
12 #include "nouveau_mm.h"
13 
14 struct nouveau_transfer {
15    struct pipe_transfer base;
16 
17    uint8_t *map;
18    struct nouveau_bo *bo;
19    struct nouveau_mm_allocation *mm;
20    uint32_t offset;
21 };
22 
23 static void *
24 nouveau_user_ptr_transfer_map(struct pipe_context *pipe,
25                               struct pipe_resource *resource,
26                               unsigned level, unsigned usage,
27                               const struct pipe_box *box,
28                               struct pipe_transfer **ptransfer);
29 
30 static void
31 nouveau_user_ptr_transfer_unmap(struct pipe_context *pipe,
32                                 struct pipe_transfer *transfer);
33 
34 static inline struct nouveau_transfer *
nouveau_transfer(struct pipe_transfer * transfer)35 nouveau_transfer(struct pipe_transfer *transfer)
36 {
37    return (struct nouveau_transfer *)transfer;
38 }
39 
40 static inline bool
nouveau_buffer_malloc(struct nv04_resource * buf)41 nouveau_buffer_malloc(struct nv04_resource *buf)
42 {
43    if (!buf->data)
44       buf->data = align_malloc(buf->base.width0, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
45    return !!buf->data;
46 }
47 
48 static inline bool
nouveau_buffer_allocate(struct nouveau_screen * screen,struct nv04_resource * buf,unsigned domain)49 nouveau_buffer_allocate(struct nouveau_screen *screen,
50                         struct nv04_resource *buf, unsigned domain)
51 {
52    uint32_t size = align(buf->base.width0, 0x100);
53 
54    if (domain == NOUVEAU_BO_VRAM) {
55       buf->mm = nouveau_mm_allocate(screen->mm_VRAM, size,
56                                     &buf->bo, &buf->offset);
57       if (!buf->bo)
58          return nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_GART);
59       NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_vid, buf->base.width0);
60    } else
61    if (domain == NOUVEAU_BO_GART) {
62       buf->mm = nouveau_mm_allocate(screen->mm_GART, size,
63                                     &buf->bo, &buf->offset);
64       if (!buf->bo)
65          return false;
66       NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_sys, buf->base.width0);
67    } else {
68       assert(domain == 0);
69       if (!nouveau_buffer_malloc(buf))
70          return false;
71    }
72    buf->domain = domain;
73    if (buf->bo)
74       buf->address = buf->bo->offset + buf->offset;
75 
76    util_range_set_empty(&buf->valid_buffer_range);
77 
78    return true;
79 }
80 
81 static inline void
release_allocation(struct nouveau_mm_allocation ** mm,struct nouveau_fence * fence)82 release_allocation(struct nouveau_mm_allocation **mm,
83                    struct nouveau_fence *fence)
84 {
85    nouveau_fence_work(fence, nouveau_mm_free_work, *mm);
86    (*mm) = NULL;
87 }
88 
89 inline void
nouveau_buffer_release_gpu_storage(struct nv04_resource * buf)90 nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
91 {
92    assert(!(buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
93 
94    nouveau_fence_work(buf->fence, nouveau_fence_unref_bo, buf->bo);
95    buf->bo = NULL;
96 
97    if (buf->mm)
98       release_allocation(&buf->mm, buf->fence);
99 
100    if (buf->domain == NOUVEAU_BO_VRAM)
101       NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_vid, -(uint64_t)buf->base.width0);
102    if (buf->domain == NOUVEAU_BO_GART)
103       NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_sys, -(uint64_t)buf->base.width0);
104 
105    buf->domain = 0;
106 }
107 
108 static inline bool
nouveau_buffer_reallocate(struct nouveau_screen * screen,struct nv04_resource * buf,unsigned domain)109 nouveau_buffer_reallocate(struct nouveau_screen *screen,
110                           struct nv04_resource *buf, unsigned domain)
111 {
112    nouveau_buffer_release_gpu_storage(buf);
113 
114    nouveau_fence_ref(NULL, &buf->fence);
115    nouveau_fence_ref(NULL, &buf->fence_wr);
116 
117    buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
118 
119    return nouveau_buffer_allocate(screen, buf, domain);
120 }
121 
122 void
nouveau_buffer_destroy(struct pipe_screen * pscreen,struct pipe_resource * presource)123 nouveau_buffer_destroy(struct pipe_screen *pscreen,
124                        struct pipe_resource *presource)
125 {
126    struct nv04_resource *res = nv04_resource(presource);
127 
128    if (res->status & NOUVEAU_BUFFER_STATUS_USER_PTR) {
129       FREE(res);
130       return;
131    }
132 
133    nouveau_buffer_release_gpu_storage(res);
134 
135    if (res->data && !(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY))
136       align_free(res->data);
137 
138    nouveau_fence_ref(NULL, &res->fence);
139    nouveau_fence_ref(NULL, &res->fence_wr);
140 
141    util_range_destroy(&res->valid_buffer_range);
142 
143    FREE(res);
144 
145    NOUVEAU_DRV_STAT(nouveau_screen(pscreen), buf_obj_current_count, -1);
146 }
147 
148 /* Set up a staging area for the transfer. This is either done in "regular"
149  * system memory if the driver supports push_data (nv50+) and the data is
150  * small enough (and permit_pb == true), or in GART memory.
151  */
152 static uint8_t *
nouveau_transfer_staging(struct nouveau_context * nv,struct nouveau_transfer * tx,bool permit_pb)153 nouveau_transfer_staging(struct nouveau_context *nv,
154                          struct nouveau_transfer *tx, bool permit_pb)
155 {
156    const unsigned adj = tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK;
157    const unsigned size = align(tx->base.box.width, 4) + adj;
158 
159    if (!nv->push_data)
160       permit_pb = false;
161 
162    if ((size <= nv->screen->transfer_pushbuf_threshold) && permit_pb) {
163       tx->map = align_malloc(size, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
164       if (tx->map)
165          tx->map += adj;
166    } else {
167       tx->mm =
168          nouveau_mm_allocate(nv->screen->mm_GART, size, &tx->bo, &tx->offset);
169       if (tx->bo) {
170          tx->offset += adj;
171          if (!BO_MAP(nv->screen, tx->bo, 0, NULL))
172             tx->map = (uint8_t *)tx->bo->map + tx->offset;
173       }
174    }
175    return tx->map;
176 }
177 
178 /* Copies data from the resource into the transfer's temporary GART
179  * buffer. Also updates buf->data if present.
180  *
181  * Maybe just migrate to GART right away if we actually need to do this. */
182 static bool
nouveau_transfer_read(struct nouveau_context * nv,struct nouveau_transfer * tx)183 nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx)
184 {
185    struct nv04_resource *buf = nv04_resource(tx->base.resource);
186    const unsigned base = tx->base.box.x;
187    const unsigned size = tx->base.box.width;
188 
189    NOUVEAU_DRV_STAT(nv->screen, buf_read_bytes_staging_vid, size);
190 
191    nv->copy_data(nv, tx->bo, tx->offset, NOUVEAU_BO_GART,
192                  buf->bo, buf->offset + base, buf->domain, size);
193 
194    if (BO_WAIT(nv->screen, tx->bo, NOUVEAU_BO_RD, nv->client))
195       return false;
196 
197    if (buf->data)
198       memcpy(buf->data + base, tx->map, size);
199 
200    return true;
201 }
202 
203 static void
nouveau_transfer_write(struct nouveau_context * nv,struct nouveau_transfer * tx,unsigned offset,unsigned size)204 nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
205                        unsigned offset, unsigned size)
206 {
207    struct nv04_resource *buf = nv04_resource(tx->base.resource);
208    uint8_t *data = tx->map + offset;
209    const unsigned base = tx->base.box.x + offset;
210    const bool can_cb = !((base | size) & 3);
211 
212    if (buf->data)
213       memcpy(data, buf->data + base, size);
214    else
215       buf->status |= NOUVEAU_BUFFER_STATUS_DIRTY;
216 
217    if (buf->domain == NOUVEAU_BO_VRAM)
218       NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_vid, size);
219    if (buf->domain == NOUVEAU_BO_GART)
220       NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_sys, size);
221 
222    if (tx->bo)
223       nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
224                     tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
225    else
226    if (nv->push_cb && can_cb)
227       nv->push_cb(nv, buf,
228                   base, size / 4, (const uint32_t *)data);
229    else
230       nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data);
231 
232    nouveau_fence_ref(nv->fence, &buf->fence);
233    nouveau_fence_ref(nv->fence, &buf->fence_wr);
234 }
235 
236 /* Does a CPU wait for the buffer's backing data to become reliably accessible
237  * for write/read by waiting on the buffer's relevant fences.
238  */
239 static inline bool
nouveau_buffer_sync(struct nouveau_context * nv,struct nv04_resource * buf,unsigned rw)240 nouveau_buffer_sync(struct nouveau_context *nv,
241                     struct nv04_resource *buf, unsigned rw)
242 {
243    if (rw == PIPE_MAP_READ) {
244       if (!buf->fence_wr)
245          return true;
246       NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
247                            !nouveau_fence_signalled(buf->fence_wr));
248       if (!nouveau_fence_wait(buf->fence_wr, &nv->debug))
249          return false;
250    } else {
251       if (!buf->fence)
252          return true;
253       NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
254                            !nouveau_fence_signalled(buf->fence));
255       if (!nouveau_fence_wait(buf->fence, &nv->debug))
256          return false;
257 
258       nouveau_fence_ref(NULL, &buf->fence);
259    }
260    nouveau_fence_ref(NULL, &buf->fence_wr);
261 
262    return true;
263 }
264 
265 static inline bool
nouveau_buffer_busy(struct nv04_resource * buf,unsigned rw)266 nouveau_buffer_busy(struct nv04_resource *buf, unsigned rw)
267 {
268    if (rw == PIPE_MAP_READ)
269       return (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr));
270    else
271       return (buf->fence && !nouveau_fence_signalled(buf->fence));
272 }
273 
274 static inline void
nouveau_buffer_transfer_init(struct nouveau_transfer * tx,struct pipe_resource * resource,const struct pipe_box * box,unsigned usage)275 nouveau_buffer_transfer_init(struct nouveau_transfer *tx,
276                              struct pipe_resource *resource,
277                              const struct pipe_box *box,
278                              unsigned usage)
279 {
280    tx->base.resource = resource;
281    tx->base.level = 0;
282    tx->base.usage = usage;
283    tx->base.box.x = box->x;
284    tx->base.box.y = 0;
285    tx->base.box.z = 0;
286    tx->base.box.width = box->width;
287    tx->base.box.height = 1;
288    tx->base.box.depth = 1;
289    tx->base.stride = 0;
290    tx->base.layer_stride = 0;
291 
292    tx->bo = NULL;
293    tx->map = NULL;
294 }
295 
296 static inline void
nouveau_buffer_transfer_del(struct nouveau_context * nv,struct nouveau_transfer * tx)297 nouveau_buffer_transfer_del(struct nouveau_context *nv,
298                             struct nouveau_transfer *tx)
299 {
300    if (tx->map) {
301       if (likely(tx->bo)) {
302          nouveau_fence_work(nv->fence, nouveau_fence_unref_bo, tx->bo);
303          if (tx->mm)
304             release_allocation(&tx->mm, nv->fence);
305       } else {
306          align_free(tx->map -
307                     (tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK));
308       }
309    }
310 }
311 
312 /* Creates a cache in system memory of the buffer data. */
313 static bool
nouveau_buffer_cache(struct nouveau_context * nv,struct nv04_resource * buf)314 nouveau_buffer_cache(struct nouveau_context *nv, struct nv04_resource *buf)
315 {
316    struct nouveau_transfer tx;
317    bool ret;
318    tx.base.resource = &buf->base;
319    tx.base.box.x = 0;
320    tx.base.box.width = buf->base.width0;
321    tx.bo = NULL;
322    tx.map = NULL;
323 
324    if (!buf->data)
325       if (!nouveau_buffer_malloc(buf))
326          return false;
327    if (!(buf->status & NOUVEAU_BUFFER_STATUS_DIRTY))
328       return true;
329    nv->stats.buf_cache_count++;
330 
331    if (!nouveau_transfer_staging(nv, &tx, false))
332       return false;
333 
334    ret = nouveau_transfer_read(nv, &tx);
335    if (ret) {
336       buf->status &= ~NOUVEAU_BUFFER_STATUS_DIRTY;
337       memcpy(buf->data, tx.map, buf->base.width0);
338    }
339    nouveau_buffer_transfer_del(nv, &tx);
340    return ret;
341 }
342 
343 
344 #define NOUVEAU_TRANSFER_DISCARD \
345    (PIPE_MAP_DISCARD_RANGE | PIPE_MAP_DISCARD_WHOLE_RESOURCE)
346 
347 /* Checks whether it is possible to completely discard the memory backing this
348  * resource. This can be useful if we would otherwise have to wait for a read
349  * operation to complete on this data.
350  */
351 static inline bool
nouveau_buffer_should_discard(struct nv04_resource * buf,unsigned usage)352 nouveau_buffer_should_discard(struct nv04_resource *buf, unsigned usage)
353 {
354    if (!(usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE))
355       return false;
356    if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
357       return false;
358    if (unlikely(usage & PIPE_MAP_PERSISTENT))
359       return false;
360    return buf->mm && nouveau_buffer_busy(buf, PIPE_MAP_WRITE);
361 }
362 
363 /* Returns a pointer to a memory area representing a window into the
364  * resource's data.
365  *
366  * This may or may not be the _actual_ memory area of the resource. However
367  * when calling nouveau_buffer_transfer_unmap, if it wasn't the actual memory
368  * area, the contents of the returned map are copied over to the resource.
369  *
370  * The usage indicates what the caller plans to do with the map:
371  *
372  *   WRITE means that the user plans to write to it
373  *
374  *   READ means that the user plans on reading from it
375  *
376  *   DISCARD_WHOLE_RESOURCE means that the whole resource is going to be
377  *   potentially overwritten, and even if it isn't, the bits that aren't don't
378  *   need to be maintained.
379  *
380  *   DISCARD_RANGE means that all the data in the specified range is going to
381  *   be overwritten.
382  *
383  * The strategy for determining what kind of memory area to return is complex,
384  * see comments inside of the function.
385  */
386 void *
nouveau_buffer_transfer_map(struct pipe_context * pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** ptransfer)387 nouveau_buffer_transfer_map(struct pipe_context *pipe,
388                             struct pipe_resource *resource,
389                             unsigned level, unsigned usage,
390                             const struct pipe_box *box,
391                             struct pipe_transfer **ptransfer)
392 {
393    struct nouveau_context *nv = nouveau_context(pipe);
394    struct nv04_resource *buf = nv04_resource(resource);
395 
396    if (buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR)
397       return nouveau_user_ptr_transfer_map(pipe, resource, level, usage, box, ptransfer);
398 
399    struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer);
400    uint8_t *map;
401    int ret;
402 
403    if (!tx)
404       return NULL;
405    nouveau_buffer_transfer_init(tx, resource, box, usage);
406    *ptransfer = &tx->base;
407 
408    if (usage & PIPE_MAP_READ)
409       NOUVEAU_DRV_STAT(nv->screen, buf_transfers_rd, 1);
410    if (usage & PIPE_MAP_WRITE)
411       NOUVEAU_DRV_STAT(nv->screen, buf_transfers_wr, 1);
412 
413    /* If we are trying to write to an uninitialized range, the user shouldn't
414     * care what was there before. So we can treat the write as if the target
415     * range were being discarded. Furthermore, since we know that even if this
416     * buffer is busy due to GPU activity, because the contents were
417     * uninitialized, the GPU can't care what was there, and so we can treat
418     * the write as being unsynchronized.
419     */
420    if ((usage & PIPE_MAP_WRITE) &&
421        !util_ranges_intersect(&buf->valid_buffer_range, box->x, box->x + box->width))
422       usage |= PIPE_MAP_DISCARD_RANGE | PIPE_MAP_UNSYNCHRONIZED;
423 
424    if (buf->domain == NOUVEAU_BO_VRAM) {
425       if (usage & NOUVEAU_TRANSFER_DISCARD) {
426          /* Set up a staging area for the user to write to. It will be copied
427           * back into VRAM on unmap. */
428          if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
429             buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
430          nouveau_transfer_staging(nv, tx, true);
431       } else {
432          if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
433             /* The GPU is currently writing to this buffer. Copy its current
434              * contents to a staging area in the GART. This is necessary since
435              * not the whole area being mapped is being discarded.
436              */
437             if (buf->data) {
438                align_free(buf->data);
439                buf->data = NULL;
440             }
441             nouveau_transfer_staging(nv, tx, false);
442             nouveau_transfer_read(nv, tx);
443          } else {
444             /* The buffer is currently idle. Create a staging area for writes,
445              * and make sure that the cached data is up-to-date. */
446             if (usage & PIPE_MAP_WRITE)
447                nouveau_transfer_staging(nv, tx, true);
448             if (!buf->data)
449                nouveau_buffer_cache(nv, buf);
450          }
451       }
452       return buf->data ? (buf->data + box->x) : tx->map;
453    } else
454    if (unlikely(buf->domain == 0)) {
455       return buf->data + box->x;
456    }
457 
458    /* At this point, buf->domain == GART */
459 
460    if (nouveau_buffer_should_discard(buf, usage)) {
461       int ref = buf->base.reference.count - 1;
462       nouveau_buffer_reallocate(nv->screen, buf, buf->domain);
463       if (ref > 0) /* any references inside context possible ? */
464          nv->invalidate_resource_storage(nv, &buf->base, ref);
465    }
466 
467    /* Note that nouveau_bo_map ends up doing a nouveau_bo_wait with the
468     * relevant flags. If buf->mm is set, that means this resource is part of a
469     * larger slab bo that holds multiple resources. So in that case, don't
470     * wait on the whole slab and instead use the logic below to return a
471     * reasonable buffer for that case.
472     */
473    ret = BO_MAP(nv->screen, buf->bo,
474                 buf->mm ? 0 : nouveau_screen_transfer_flags(usage),
475                 nv->client);
476    if (ret) {
477       FREE(tx);
478       return NULL;
479    }
480    map = (uint8_t *)buf->bo->map + buf->offset + box->x;
481 
482    /* using kernel fences only if !buf->mm */
483    if ((usage & PIPE_MAP_UNSYNCHRONIZED) || !buf->mm)
484       return map;
485 
486    /* If the GPU is currently reading/writing this buffer, we shouldn't
487     * interfere with its progress. So instead we either wait for the GPU to
488     * complete its operation, or set up a staging area to perform our work in.
489     */
490    if (nouveau_buffer_busy(buf, usage & PIPE_MAP_READ_WRITE)) {
491       if (unlikely(usage & (PIPE_MAP_DISCARD_WHOLE_RESOURCE |
492                             PIPE_MAP_PERSISTENT))) {
493          /* Discarding was not possible, must sync because
494           * subsequent transfers might use UNSYNCHRONIZED. */
495          nouveau_buffer_sync(nv, buf, usage & PIPE_MAP_READ_WRITE);
496       } else
497       if (usage & PIPE_MAP_DISCARD_RANGE) {
498          /* The whole range is being discarded, so it doesn't matter what was
499           * there before. No need to copy anything over. */
500          nouveau_transfer_staging(nv, tx, true);
501          map = tx->map;
502       } else
503       if (nouveau_buffer_busy(buf, PIPE_MAP_READ)) {
504          if (usage & PIPE_MAP_DONTBLOCK)
505             map = NULL;
506          else
507             nouveau_buffer_sync(nv, buf, usage & PIPE_MAP_READ_WRITE);
508       } else {
509          /* It is expected that the returned buffer be a representation of the
510           * data in question, so we must copy it over from the buffer. */
511          nouveau_transfer_staging(nv, tx, true);
512          if (tx->map)
513             memcpy(tx->map, map, box->width);
514          map = tx->map;
515       }
516    }
517    if (!map)
518       FREE(tx);
519    return map;
520 }
521 
522 
523 
524 void
nouveau_buffer_transfer_flush_region(struct pipe_context * pipe,struct pipe_transfer * transfer,const struct pipe_box * box)525 nouveau_buffer_transfer_flush_region(struct pipe_context *pipe,
526                                      struct pipe_transfer *transfer,
527                                      const struct pipe_box *box)
528 {
529    struct nouveau_transfer *tx = nouveau_transfer(transfer);
530    struct nv04_resource *buf = nv04_resource(transfer->resource);
531 
532    if (tx->map)
533       nouveau_transfer_write(nouveau_context(pipe), tx, box->x, box->width);
534 
535    util_range_add(&buf->base, &buf->valid_buffer_range,
536                   tx->base.box.x + box->x,
537                   tx->base.box.x + box->x + box->width);
538 }
539 
540 /* Unmap stage of the transfer. If it was a WRITE transfer and the map that
541  * was returned was not the real resource's data, this needs to transfer the
542  * data back to the resource.
543  *
544  * Also marks vbo dirty based on the buffer's binding
545  */
546 void
nouveau_buffer_transfer_unmap(struct pipe_context * pipe,struct pipe_transfer * transfer)547 nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
548                               struct pipe_transfer *transfer)
549 {
550    struct nouveau_context *nv = nouveau_context(pipe);
551    struct nv04_resource *buf = nv04_resource(transfer->resource);
552 
553    if (buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR)
554       return nouveau_user_ptr_transfer_unmap(pipe, transfer);
555 
556    struct nouveau_transfer *tx = nouveau_transfer(transfer);
557 
558    if (tx->base.usage & PIPE_MAP_WRITE) {
559       if (!(tx->base.usage & PIPE_MAP_FLUSH_EXPLICIT)) {
560          if (tx->map)
561             nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
562 
563          util_range_add(&buf->base, &buf->valid_buffer_range,
564                         tx->base.box.x, tx->base.box.x + tx->base.box.width);
565       }
566 
567       if (likely(buf->domain)) {
568          const uint8_t bind = buf->base.bind;
569          /* make sure we invalidate dedicated caches */
570          if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
571             nv->vbo_dirty = true;
572       }
573    }
574 
575    if (!tx->bo && (tx->base.usage & PIPE_MAP_WRITE))
576       NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_direct, tx->base.box.width);
577 
578    nouveau_buffer_transfer_del(nv, tx);
579    FREE(tx);
580 }
581 
582 
583 void
nouveau_copy_buffer(struct nouveau_context * nv,struct nv04_resource * dst,unsigned dstx,struct nv04_resource * src,unsigned srcx,unsigned size)584 nouveau_copy_buffer(struct nouveau_context *nv,
585                     struct nv04_resource *dst, unsigned dstx,
586                     struct nv04_resource *src, unsigned srcx, unsigned size)
587 {
588    assert(dst->base.target == PIPE_BUFFER && src->base.target == PIPE_BUFFER);
589 
590    if (likely(dst->domain) && likely(src->domain)) {
591       nv->copy_data(nv,
592                     dst->bo, dst->offset + dstx, dst->domain,
593                     src->bo, src->offset + srcx, src->domain, size);
594 
595       dst->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
596       nouveau_fence_ref(nv->fence, &dst->fence);
597       nouveau_fence_ref(nv->fence, &dst->fence_wr);
598 
599       src->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
600       nouveau_fence_ref(nv->fence, &src->fence);
601    } else {
602       struct pipe_box src_box;
603       src_box.x = srcx;
604       src_box.y = 0;
605       src_box.z = 0;
606       src_box.width = size;
607       src_box.height = 1;
608       src_box.depth = 1;
609       util_resource_copy_region(&nv->pipe,
610                                 &dst->base, 0, dstx, 0, 0,
611                                 &src->base, 0, &src_box);
612    }
613 
614    util_range_add(&dst->base, &dst->valid_buffer_range, dstx, dstx + size);
615 }
616 
617 
618 void *
nouveau_resource_map_offset(struct nouveau_context * nv,struct nv04_resource * res,uint32_t offset,uint32_t flags)619 nouveau_resource_map_offset(struct nouveau_context *nv,
620                             struct nv04_resource *res, uint32_t offset,
621                             uint32_t flags)
622 {
623    if (unlikely(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY) ||
624        unlikely(res->status & NOUVEAU_BUFFER_STATUS_USER_PTR))
625       return res->data + offset;
626 
627    if (res->domain == NOUVEAU_BO_VRAM) {
628       if (!res->data || (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING))
629          nouveau_buffer_cache(nv, res);
630    }
631    if (res->domain != NOUVEAU_BO_GART)
632       return res->data + offset;
633 
634    if (res->mm) {
635       unsigned rw;
636       rw = (flags & NOUVEAU_BO_WR) ? PIPE_MAP_WRITE : PIPE_MAP_READ;
637       nouveau_buffer_sync(nv, res, rw);
638       if (BO_MAP(nv->screen, res->bo, 0, NULL))
639          return NULL;
640    } else {
641       if (BO_MAP(nv->screen, res->bo, flags, nv->client))
642          return NULL;
643    }
644    return (uint8_t *)res->bo->map + res->offset + offset;
645 }
646 
647 static void *
nouveau_user_ptr_transfer_map(struct pipe_context * pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** ptransfer)648 nouveau_user_ptr_transfer_map(struct pipe_context *pipe,
649                               struct pipe_resource *resource,
650                               unsigned level, unsigned usage,
651                               const struct pipe_box *box,
652                               struct pipe_transfer **ptransfer)
653 {
654    struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer);
655    if (!tx)
656       return NULL;
657    nouveau_buffer_transfer_init(tx, resource, box, usage);
658    *ptransfer = &tx->base;
659    return nv04_resource(resource)->data;
660 }
661 
662 static void
nouveau_user_ptr_transfer_unmap(struct pipe_context * pipe,struct pipe_transfer * transfer)663 nouveau_user_ptr_transfer_unmap(struct pipe_context *pipe,
664                                 struct pipe_transfer *transfer)
665 {
666    struct nouveau_transfer *tx = nouveau_transfer(transfer);
667    FREE(tx);
668 }
669 
670 struct pipe_resource *
nouveau_buffer_create(struct pipe_screen * pscreen,const struct pipe_resource * templ)671 nouveau_buffer_create(struct pipe_screen *pscreen,
672                       const struct pipe_resource *templ)
673 {
674    struct nouveau_screen *screen = nouveau_screen(pscreen);
675    struct nv04_resource *buffer;
676    bool ret;
677 
678    buffer = CALLOC_STRUCT(nv04_resource);
679    if (!buffer)
680       return NULL;
681 
682    buffer->base = *templ;
683    pipe_reference_init(&buffer->base.reference, 1);
684    buffer->base.screen = pscreen;
685 
686    if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
687                              PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
688       buffer->domain = NOUVEAU_BO_GART;
689    } else if (buffer->base.bind == 0 || (buffer->base.bind &
690               (screen->vidmem_bindings & screen->sysmem_bindings))) {
691       switch (buffer->base.usage) {
692       case PIPE_USAGE_DEFAULT:
693       case PIPE_USAGE_IMMUTABLE:
694          buffer->domain = NV_VRAM_DOMAIN(screen);
695          break;
696       case PIPE_USAGE_DYNAMIC:
697          /* For most apps, we'd have to do staging transfers to avoid sync
698           * with this usage, and GART -> GART copies would be suboptimal.
699           */
700          buffer->domain = NV_VRAM_DOMAIN(screen);
701          break;
702       case PIPE_USAGE_STAGING:
703       case PIPE_USAGE_STREAM:
704          buffer->domain = NOUVEAU_BO_GART;
705          break;
706       default:
707          assert(0);
708          break;
709       }
710    } else {
711       if (buffer->base.bind & screen->vidmem_bindings)
712          buffer->domain = NV_VRAM_DOMAIN(screen);
713       else
714       if (buffer->base.bind & screen->sysmem_bindings)
715          buffer->domain = NOUVEAU_BO_GART;
716    }
717 
718    ret = nouveau_buffer_allocate(screen, buffer, buffer->domain);
719 
720    if (ret == false)
721       goto fail;
722 
723    if (buffer->domain == NOUVEAU_BO_VRAM && screen->hint_buf_keep_sysmem_copy)
724       nouveau_buffer_cache(NULL, buffer);
725 
726    NOUVEAU_DRV_STAT(screen, buf_obj_current_count, 1);
727 
728    util_range_init(&buffer->valid_buffer_range);
729 
730    return &buffer->base;
731 
732 fail:
733    FREE(buffer);
734    return NULL;
735 }
736 
737 struct pipe_resource *
nouveau_buffer_create_from_user(struct pipe_screen * pscreen,const struct pipe_resource * templ,void * user_ptr)738 nouveau_buffer_create_from_user(struct pipe_screen *pscreen,
739                                 const struct pipe_resource *templ,
740                                 void *user_ptr)
741 {
742    struct nv04_resource *buffer;
743 
744    buffer = CALLOC_STRUCT(nv04_resource);
745    if (!buffer)
746       return NULL;
747 
748    buffer->base = *templ;
749    /* set address and data to the same thing for higher compatibility with
750     * existing code. It's correct nonetheless as the same pointer is equally
751     * valid on the CPU and the GPU.
752     */
753    buffer->address = (uintptr_t)user_ptr;
754    buffer->data = user_ptr;
755    buffer->status = NOUVEAU_BUFFER_STATUS_USER_PTR;
756    buffer->base.screen = pscreen;
757 
758    pipe_reference_init(&buffer->base.reference, 1);
759 
760    return &buffer->base;
761 }
762 
763 struct pipe_resource *
nouveau_user_buffer_create(struct pipe_screen * pscreen,void * ptr,unsigned bytes,unsigned bind)764 nouveau_user_buffer_create(struct pipe_screen *pscreen, void *ptr,
765                            unsigned bytes, unsigned bind)
766 {
767    struct nv04_resource *buffer;
768 
769    buffer = CALLOC_STRUCT(nv04_resource);
770    if (!buffer)
771       return NULL;
772 
773    pipe_reference_init(&buffer->base.reference, 1);
774    buffer->base.screen = pscreen;
775    buffer->base.format = PIPE_FORMAT_R8_UNORM;
776    buffer->base.usage = PIPE_USAGE_IMMUTABLE;
777    buffer->base.bind = bind;
778    buffer->base.width0 = bytes;
779    buffer->base.height0 = 1;
780    buffer->base.depth0 = 1;
781 
782    buffer->data = ptr;
783    buffer->status = NOUVEAU_BUFFER_STATUS_USER_MEMORY;
784 
785    util_range_init(&buffer->valid_buffer_range);
786    util_range_add(&buffer->base, &buffer->valid_buffer_range, 0, bytes);
787 
788    return &buffer->base;
789 }
790 
791 static inline bool
nouveau_buffer_data_fetch(struct nouveau_context * nv,struct nv04_resource * buf,struct nouveau_bo * bo,unsigned offset,unsigned size)792 nouveau_buffer_data_fetch(struct nouveau_context *nv, struct nv04_resource *buf,
793                           struct nouveau_bo *bo, unsigned offset, unsigned size)
794 {
795    if (!nouveau_buffer_malloc(buf))
796       return false;
797    if (BO_MAP(nv->screen, bo, NOUVEAU_BO_RD, nv->client))
798       return false;
799    memcpy(buf->data, (uint8_t *)bo->map + offset, size);
800    return true;
801 }
802 
803 /* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */
804 bool
nouveau_buffer_migrate(struct nouveau_context * nv,struct nv04_resource * buf,const unsigned new_domain)805 nouveau_buffer_migrate(struct nouveau_context *nv,
806                        struct nv04_resource *buf, const unsigned new_domain)
807 {
808    assert(!(buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
809 
810    struct nouveau_screen *screen = nv->screen;
811    struct nouveau_bo *bo;
812    const unsigned old_domain = buf->domain;
813    unsigned size = buf->base.width0;
814    unsigned offset;
815    int ret;
816 
817    assert(new_domain != old_domain);
818 
819    if (new_domain == NOUVEAU_BO_GART && old_domain == 0) {
820       if (!nouveau_buffer_allocate(screen, buf, new_domain))
821          return false;
822       ret = BO_MAP(nv->screen, buf->bo, 0, nv->client);
823       if (ret)
824          return ret;
825       memcpy((uint8_t *)buf->bo->map + buf->offset, buf->data, size);
826       align_free(buf->data);
827    } else
828    if (old_domain != 0 && new_domain != 0) {
829       struct nouveau_mm_allocation *mm = buf->mm;
830 
831       if (new_domain == NOUVEAU_BO_VRAM) {
832          /* keep a system memory copy of our data in case we hit a fallback */
833          if (!nouveau_buffer_data_fetch(nv, buf, buf->bo, buf->offset, size))
834             return false;
835          if (nouveau_mesa_debug)
836             debug_printf("migrating %u KiB to VRAM\n", size / 1024);
837       }
838 
839       offset = buf->offset;
840       bo = buf->bo;
841       buf->bo = NULL;
842       buf->mm = NULL;
843       nouveau_buffer_allocate(screen, buf, new_domain);
844 
845       nv->copy_data(nv, buf->bo, buf->offset, new_domain,
846                     bo, offset, old_domain, buf->base.width0);
847 
848       nouveau_fence_work(nv->fence, nouveau_fence_unref_bo, bo);
849       if (mm)
850          release_allocation(&mm, nv->fence);
851    } else
852    if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) {
853       struct nouveau_transfer tx;
854       if (!nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM))
855          return false;
856       tx.base.resource = &buf->base;
857       tx.base.box.x = 0;
858       tx.base.box.width = buf->base.width0;
859       tx.bo = NULL;
860       tx.map = NULL;
861       if (!nouveau_transfer_staging(nv, &tx, false))
862          return false;
863       nouveau_transfer_write(nv, &tx, 0, tx.base.box.width);
864       nouveau_buffer_transfer_del(nv, &tx);
865    } else
866       return false;
867 
868    assert(buf->domain == new_domain);
869    return true;
870 }
871 
872 /* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART.
873  * We'd like to only allocate @size bytes here, but then we'd have to rebase
874  * the vertex indices ...
875  */
876 bool
nouveau_user_buffer_upload(struct nouveau_context * nv,struct nv04_resource * buf,unsigned base,unsigned size)877 nouveau_user_buffer_upload(struct nouveau_context *nv,
878                            struct nv04_resource *buf,
879                            unsigned base, unsigned size)
880 {
881    assert(!(buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
882 
883    struct nouveau_screen *screen = nouveau_screen(buf->base.screen);
884    int ret;
885 
886    assert(buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY);
887 
888    buf->base.width0 = base + size;
889    if (!nouveau_buffer_reallocate(screen, buf, NOUVEAU_BO_GART))
890       return false;
891 
892    ret = BO_MAP(nv->screen, buf->bo, 0, nv->client);
893    if (ret)
894       return false;
895    memcpy((uint8_t *)buf->bo->map + buf->offset + base, buf->data + base, size);
896 
897    return true;
898 }
899 
900 /* Invalidate underlying buffer storage, reset fences, reallocate to non-busy
901  * buffer.
902  */
903 void
nouveau_buffer_invalidate(struct pipe_context * pipe,struct pipe_resource * resource)904 nouveau_buffer_invalidate(struct pipe_context *pipe,
905                           struct pipe_resource *resource)
906 {
907    struct nouveau_context *nv = nouveau_context(pipe);
908    struct nv04_resource *buf = nv04_resource(resource);
909    int ref = buf->base.reference.count - 1;
910 
911    assert(!(buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
912 
913    /* Shared buffers shouldn't get reallocated */
914    if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
915       return;
916 
917    /* If the buffer is sub-allocated and not currently being written, just
918     * wipe the valid buffer range. Otherwise we have to create fresh
919     * storage. (We don't keep track of fences for non-sub-allocated BO's.)
920     */
921    if (buf->mm && !nouveau_buffer_busy(buf, PIPE_MAP_WRITE)) {
922       util_range_set_empty(&buf->valid_buffer_range);
923    } else {
924       nouveau_buffer_reallocate(nv->screen, buf, buf->domain);
925       if (ref > 0) /* any references inside context possible ? */
926          nv->invalidate_resource_storage(nv, &buf->base, ref);
927    }
928 }
929 
930 
931 /* Scratch data allocation. */
932 
933 static inline int
nouveau_scratch_bo_alloc(struct nouveau_context * nv,struct nouveau_bo ** pbo,unsigned size)934 nouveau_scratch_bo_alloc(struct nouveau_context *nv, struct nouveau_bo **pbo,
935                          unsigned size)
936 {
937    return nouveau_bo_new(nv->screen->device, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
938                          4096, size, NULL, pbo);
939 }
940 
941 static void
nouveau_scratch_unref_bos(void * d)942 nouveau_scratch_unref_bos(void *d)
943 {
944    struct runout *b = d;
945    int i;
946 
947    for (i = 0; i < b->nr; ++i)
948       nouveau_bo_ref(NULL, &b->bo[i]);
949 
950    FREE(b);
951 }
952 
953 void
nouveau_scratch_runout_release(struct nouveau_context * nv)954 nouveau_scratch_runout_release(struct nouveau_context *nv)
955 {
956    if (!nv->scratch.runout)
957       return;
958 
959    if (!nouveau_fence_work(nv->fence, nouveau_scratch_unref_bos,
960          nv->scratch.runout))
961       return;
962 
963    nv->scratch.end = 0;
964    nv->scratch.runout = NULL;
965 }
966 
967 /* Allocate an extra bo if we can't fit everything we need simultaneously.
968  * (Could happen for very large user arrays.)
969  */
970 static inline bool
nouveau_scratch_runout(struct nouveau_context * nv,unsigned size)971 nouveau_scratch_runout(struct nouveau_context *nv, unsigned size)
972 {
973    int ret;
974    unsigned n;
975 
976    if (nv->scratch.runout)
977       n = nv->scratch.runout->nr;
978    else
979       n = 0;
980    nv->scratch.runout = REALLOC(nv->scratch.runout, n == 0 ? 0 :
981                                 (sizeof(*nv->scratch.runout) + (n + 0) * sizeof(void *)),
982                                  sizeof(*nv->scratch.runout) + (n + 1) * sizeof(void *));
983    nv->scratch.runout->nr = n + 1;
984    nv->scratch.runout->bo[n] = NULL;
985 
986    ret = nouveau_scratch_bo_alloc(nv, &nv->scratch.runout->bo[n], size);
987    if (!ret) {
988       ret = BO_MAP(nv->screen, nv->scratch.runout->bo[n], 0, NULL);
989       if (ret)
990          nouveau_bo_ref(NULL, &nv->scratch.runout->bo[--nv->scratch.runout->nr]);
991    }
992    if (!ret) {
993       nv->scratch.current = nv->scratch.runout->bo[n];
994       nv->scratch.offset = 0;
995       nv->scratch.end = size;
996       nv->scratch.map = nv->scratch.current->map;
997    }
998    return !ret;
999 }
1000 
1001 /* Continue to next scratch buffer, if available (no wrapping, large enough).
1002  * Allocate it if it has not yet been created.
1003  */
1004 static inline bool
nouveau_scratch_next(struct nouveau_context * nv,unsigned size)1005 nouveau_scratch_next(struct nouveau_context *nv, unsigned size)
1006 {
1007    struct nouveau_bo *bo;
1008    int ret;
1009    const unsigned i = (nv->scratch.id + 1) % NOUVEAU_MAX_SCRATCH_BUFS;
1010 
1011    if ((size > nv->scratch.bo_size) || (i == nv->scratch.wrap))
1012       return false;
1013    nv->scratch.id = i;
1014 
1015    bo = nv->scratch.bo[i];
1016    if (!bo) {
1017       ret = nouveau_scratch_bo_alloc(nv, &bo, nv->scratch.bo_size);
1018       if (ret)
1019          return false;
1020       nv->scratch.bo[i] = bo;
1021    }
1022    nv->scratch.current = bo;
1023    nv->scratch.offset = 0;
1024    nv->scratch.end = nv->scratch.bo_size;
1025 
1026    ret = BO_MAP(nv->screen, bo, NOUVEAU_BO_WR, nv->client);
1027    if (!ret)
1028       nv->scratch.map = bo->map;
1029    return !ret;
1030 }
1031 
1032 static bool
nouveau_scratch_more(struct nouveau_context * nv,unsigned min_size)1033 nouveau_scratch_more(struct nouveau_context *nv, unsigned min_size)
1034 {
1035    bool ret;
1036 
1037    ret = nouveau_scratch_next(nv, min_size);
1038    if (!ret)
1039       ret = nouveau_scratch_runout(nv, min_size);
1040    return ret;
1041 }
1042 
1043 
1044 /* Copy data to a scratch buffer and return address & bo the data resides in. */
1045 uint64_t
nouveau_scratch_data(struct nouveau_context * nv,const void * data,unsigned base,unsigned size,struct nouveau_bo ** bo)1046 nouveau_scratch_data(struct nouveau_context *nv,
1047                      const void *data, unsigned base, unsigned size,
1048                      struct nouveau_bo **bo)
1049 {
1050    unsigned bgn = MAX2(base, nv->scratch.offset);
1051    unsigned end = bgn + size;
1052 
1053    if (end >= nv->scratch.end) {
1054       end = base + size;
1055       if (!nouveau_scratch_more(nv, end))
1056          return 0;
1057       bgn = base;
1058    }
1059    nv->scratch.offset = align(end, 4);
1060 
1061    memcpy(nv->scratch.map + bgn, (const uint8_t *)data + base, size);
1062 
1063    *bo = nv->scratch.current;
1064    return (*bo)->offset + (bgn - base);
1065 }
1066 
1067 void *
nouveau_scratch_get(struct nouveau_context * nv,unsigned size,uint64_t * gpu_addr,struct nouveau_bo ** pbo)1068 nouveau_scratch_get(struct nouveau_context *nv,
1069                     unsigned size, uint64_t *gpu_addr, struct nouveau_bo **pbo)
1070 {
1071    unsigned bgn = nv->scratch.offset;
1072    unsigned end = nv->scratch.offset + size;
1073 
1074    if (end >= nv->scratch.end) {
1075       end = size;
1076       if (!nouveau_scratch_more(nv, end))
1077          return NULL;
1078       bgn = 0;
1079    }
1080    nv->scratch.offset = align(end, 4);
1081 
1082    *pbo = nv->scratch.current;
1083    *gpu_addr = nv->scratch.current->offset + bgn;
1084    return nv->scratch.map + bgn;
1085 }
1086