xref: /aosp_15_r20/external/mesa3d/src/gallium/frontends/nine/buffer9.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2011 Joakim Sindholt <[email protected]>
3  * Copyright 2015 Patrick Rudolph <[email protected]>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "buffer9.h"
8 #include "device9.h"
9 #include "indexbuffer9.h"
10 #include "nine_buffer_upload.h"
11 #include "nine_helpers.h"
12 #include "nine_pipe.h"
13 
14 #include "pipe/p_screen.h"
15 #include "pipe/p_context.h"
16 #include "pipe/p_state.h"
17 #include "pipe/p_defines.h"
18 #include "util/format/u_formats.h"
19 #include "util/box.h"
20 #include "util/u_inlines.h"
21 
22 #define DBG_CHANNEL (DBG_INDEXBUFFER|DBG_VERTEXBUFFER)
23 
24 HRESULT
NineBuffer9_ctor(struct NineBuffer9 * This,struct NineUnknownParams * pParams,D3DRESOURCETYPE Type,DWORD Usage,UINT Size,D3DPOOL Pool)25 NineBuffer9_ctor( struct NineBuffer9 *This,
26                         struct NineUnknownParams *pParams,
27                         D3DRESOURCETYPE Type,
28                         DWORD Usage,
29                         UINT Size,
30                         D3DPOOL Pool )
31 {
32     struct pipe_resource *info = &This->base.info;
33     HRESULT hr;
34 
35     DBG("This=%p Size=0x%x Usage=%x Pool=%u\n", This, Size, Usage, Pool);
36 
37     user_assert(Pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL);
38 
39     This->maps = MALLOC(sizeof(struct NineTransfer));
40     if (!This->maps)
41         return E_OUTOFMEMORY;
42     This->nlocks = 0;
43     This->nmaps = 0;
44     This->maxmaps = 1;
45     This->size = Size;
46 
47     info->screen = pParams->device->screen;
48     info->target = PIPE_BUFFER;
49     info->format = PIPE_FORMAT_R8_UNORM;
50     info->width0 = Size;
51     info->flags = 0;
52 
53     /* Note: WRITEONLY is just tip for resource placement, the resource
54      * can still be read (but slower). */
55     info->bind = (Type == D3DRTYPE_INDEXBUFFER) ? PIPE_BIND_INDEX_BUFFER : PIPE_BIND_VERTEX_BUFFER;
56 
57     /* Software vertex processing:
58      * If the device is full software vertex processing,
59      * then the buffer is supposed to be used only for sw processing.
60      * For mixed vertex processing, buffers with D3DUSAGE_SOFTWAREPROCESSING
61      * can be used for both sw and hw processing.
62      * These buffers are expected to be stored in RAM.
63      * Apps expect locking the full buffer with no flags, then
64      * render a a few primitive, then locking again, etc
65      * to be a fast pattern. Only the SYSTEMMEM DYNAMIC path
66      * will give that pattern ok performance in our case.
67      * An alternative would be when sw processing is detected to
68      * convert Draw* calls to Draw*Up calls. */
69     if (Usage & D3DUSAGE_SOFTWAREPROCESSING ||
70         pParams->device->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) {
71         Pool = D3DPOOL_SYSTEMMEM;
72         Usage |= D3DUSAGE_DYNAMIC;
73         /* Note: the application cannot retrieve Pool and Usage */
74     }
75 
76     /* Always use the DYNAMIC path for SYSTEMMEM.
77      * If the app uses the vertex buffer is a dynamic fashion,
78      * this is going to be very significantly faster that way.
79      * If the app uses the vertex buffer in a static fashion,
80      * instead of being filled all at once, the buffer will be filled
81      * little per little, until it is fully filled, thus the perf hit
82      * will be very small. */
83     if (Pool == D3DPOOL_SYSTEMMEM)
84         Usage |= D3DUSAGE_DYNAMIC;
85 
86     /* It is hard to find clear information on where to place the buffer in
87      * memory depending on the flag.
88      * MSDN: resources are static, except for those with DYNAMIC, thus why you
89      *   can only use DISCARD on them.
90      * ATI doc: The driver has the liberty it wants for having things static
91      *   or not.
92      *   MANAGED: Ram + uploads to Vram copy at unlock (msdn and nvidia doc say
93      *   at first draw call using the buffer)
94      *   DEFAULT + Usage = 0 => System memory backing for easy read access
95      *   (That doc is very unclear on the details, like whether some copies to
96      *   vram copy are involved or not).
97      *   DEFAULT + WRITEONLY => Vram
98      *   DEFAULT + WRITEONLY + DYNAMIC => Either Vram buffer or GTT_WC, depending on what the driver wants.
99      *   SYSTEMMEM: Same as MANAGED, but handled by the driver instead of the runtime (which means
100      *   some small behavior differences between vendors). Implementing exactly as MANAGED should
101      *   be fine.
102      */
103     if (Pool == D3DPOOL_SYSTEMMEM && Usage & D3DUSAGE_DYNAMIC)
104         info->usage = PIPE_USAGE_STREAM;
105     else if (Pool != D3DPOOL_DEFAULT)
106         info->usage = PIPE_USAGE_DEFAULT;
107     else if (Usage & D3DUSAGE_DYNAMIC && Usage & D3DUSAGE_WRITEONLY)
108         info->usage = PIPE_USAGE_STREAM;
109     else if (Usage & D3DUSAGE_WRITEONLY)
110         info->usage = PIPE_USAGE_DEFAULT;
111     /* For the remaining two, PIPE_USAGE_STAGING would probably be
112      * a good fit according to the doc. However it seems rather a mistake
113      * from apps to use these (mistakes that do really happen). Try
114      * to put the flags that are the best compromise between the real
115      * behaviour and what buggy apps should get for better performance. */
116     else if (Usage & D3DUSAGE_DYNAMIC)
117         info->usage = PIPE_USAGE_STREAM;
118     else
119         info->usage = PIPE_USAGE_DYNAMIC;
120 
121     /* When Writeonly is not set, we don't want to enable the
122      * optimizations */
123     This->discard_nooverwrite_only = !!(Usage & D3DUSAGE_WRITEONLY) &&
124                                      pParams->device->buffer_upload;
125     /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */
126     /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */
127     /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
128     /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
129     /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
130     /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */
131 
132     info->height0 = 1;
133     info->depth0 = 1;
134     info->array_size = 1;
135     info->last_level = 0;
136     info->nr_samples = 0;
137     info->nr_storage_samples = 0;
138 
139     hr = NineResource9_ctor(&This->base, pParams, NULL, true,
140                             Type, Pool, Usage);
141 
142     if (FAILED(hr))
143         return hr;
144 
145     if (Pool != D3DPOOL_DEFAULT) {
146         This->managed.data = align_calloc(
147             nine_format_get_level_alloc_size(This->base.info.format,
148                                              Size, 1, 0), 32);
149         if (!This->managed.data)
150             return E_OUTOFMEMORY;
151         This->managed.dirty = true;
152         u_box_1d(0, Size, &This->managed.dirty_box);
153         u_box_1d(0, 0, &This->managed.valid_region);
154         u_box_1d(0, 0, &This->managed.required_valid_region);
155         u_box_1d(0, 0, &This->managed.filled_region);
156         This->managed.can_unsynchronized = true;
157         This->managed.num_worker_thread_syncs = 0;
158         list_inithead(&This->managed.list);
159         list_inithead(&This->managed.list2);
160         list_add(&This->managed.list2, &pParams->device->managed_buffers);
161     }
162 
163     return D3D_OK;
164 }
165 
166 void
NineBuffer9_dtor(struct NineBuffer9 * This)167 NineBuffer9_dtor( struct NineBuffer9 *This )
168 {
169     DBG("This=%p\n", This);
170 
171     if (This->maps) {
172         while (This->nlocks) {
173             NineBuffer9_Unlock(This);
174         }
175         assert(!This->nmaps);
176         FREE(This->maps);
177     }
178 
179     if (This->base.pool != D3DPOOL_DEFAULT) {
180         if (This->managed.data)
181             align_free(This->managed.data);
182         if (list_is_linked(&This->managed.list))
183             list_del(&This->managed.list);
184         if (list_is_linked(&This->managed.list2))
185             list_del(&This->managed.list2);
186     }
187 
188     if (This->buf)
189         nine_upload_release_buffer(This->base.base.device->buffer_upload, This->buf);
190 
191     NineResource9_dtor(&This->base);
192 }
193 
194 struct pipe_resource *
NineBuffer9_GetResource(struct NineBuffer9 * This,unsigned * offset)195 NineBuffer9_GetResource( struct NineBuffer9 *This, unsigned *offset )
196 {
197     if (This->buf)
198         return nine_upload_buffer_resource_and_offset(This->buf, offset);
199     *offset = 0;
200     return NineResource9_GetResource(&This->base);
201 }
202 
203 static void
NineBuffer9_RebindIfRequired(struct NineBuffer9 * This,struct NineDevice9 * device,struct pipe_resource * resource,unsigned offset)204 NineBuffer9_RebindIfRequired( struct NineBuffer9 *This,
205                               struct NineDevice9 *device,
206                               struct pipe_resource *resource,
207                               unsigned offset )
208 {
209     int i;
210 
211     if (!This->bind_count)
212         return;
213     for (i = 0; i < device->caps.MaxStreams; i++) {
214         if (device->state.stream[i] == (struct NineVertexBuffer9 *)This)
215             nine_context_set_stream_source_apply(device, i,
216                                                  resource,
217                                                  device->state.vtxbuf[i].buffer_offset + offset,
218                                                  device->state.vtxstride[i]);
219     }
220     if (device->state.idxbuf == (struct NineIndexBuffer9 *)This)
221         nine_context_set_indices_apply(device, resource,
222                                        ((struct NineIndexBuffer9 *)This)->index_size,
223                                        offset);
224 }
225 
226 HRESULT NINE_WINAPI
NineBuffer9_Lock(struct NineBuffer9 * This,UINT OffsetToLock,UINT SizeToLock,void ** ppbData,DWORD Flags)227 NineBuffer9_Lock( struct NineBuffer9 *This,
228                         UINT OffsetToLock,
229                         UINT SizeToLock,
230                         void **ppbData,
231                         DWORD Flags )
232 {
233     struct NineDevice9 *device = This->base.base.device;
234     struct pipe_box box;
235     struct pipe_context *pipe;
236     void *data;
237     unsigned usage;
238 
239     DBG("This=%p(pipe=%p) OffsetToLock=0x%x, SizeToLock=0x%x, Flags=0x%x\n",
240         This, This->base.resource,
241         OffsetToLock, SizeToLock, Flags);
242 
243     user_assert(ppbData, E_POINTER);
244 
245     if (SizeToLock == 0) {
246         SizeToLock = This->size - OffsetToLock;
247         user_warn(OffsetToLock != 0);
248     }
249 
250     /* Write out of bound seems to have to be taken into account for these.
251      * TODO: Do more tests (is it only at buffer first lock ? etc).
252      * Since these buffers are supposed to be locked once and never
253      * written again (MANAGED or DYNAMIC is used for the other uses cases),
254      * performance should be unaffected. */
255     if (!(This->base.usage & D3DUSAGE_DYNAMIC) && This->base.pool == D3DPOOL_DEFAULT)
256         SizeToLock = This->size - OffsetToLock;
257 
258     SizeToLock = MIN2(SizeToLock, This->size - OffsetToLock); /* Do not read or track out of the buffer */
259     u_box_1d(OffsetToLock, SizeToLock, &box);
260 
261     if (This->base.pool != D3DPOOL_DEFAULT) {
262         /* MANAGED: READONLY doesn't dirty the buffer, nor
263          * wait the upload in the worker thread
264          * SYSTEMMEM: AMD/NVidia: All locks dirty the full buffer. Not on Intel
265          * For Nvidia, SYSTEMMEM behaves are if there is no worker thread.
266          * On AMD, READONLY and NOOVERWRITE do dirty the buffer, but do not sync the previous uploads
267          * in the worker thread. On Intel only NOOVERWRITE has that effect.
268          * We implement the AMD behaviour. */
269         if (This->base.pool == D3DPOOL_MANAGED) {
270             if (!(Flags & D3DLOCK_READONLY)) {
271                 if (!This->managed.dirty) {
272                     assert(list_is_empty(&This->managed.list));
273                     This->managed.dirty = true;
274                     This->managed.dirty_box = box;
275                     /* Flush if regions pending to be uploaded would be dirtied */
276                     if (p_atomic_read(&This->managed.pending_upload)) {
277                         u_box_intersect_1d(&box, &box, &This->managed.upload_pending_regions);
278                         if (box.width != 0)
279                             nine_csmt_process(This->base.base.device);
280                     }
281                 } else
282                     u_box_union_1d(&This->managed.dirty_box, &This->managed.dirty_box, &box);
283                 /* Tests trying to draw while the buffer is locked show that
284                  * SYSTEMMEM/MANAGED buffers are made dirty at Lock time */
285                 BASEBUF_REGISTER_UPDATE(This);
286             }
287         } else {
288             if (!(Flags & (D3DLOCK_READONLY|D3DLOCK_NOOVERWRITE)) &&
289                 p_atomic_read(&This->managed.pending_upload)) {
290                 This->managed.num_worker_thread_syncs++;
291                 /* If we sync too often, pick the vertex_uploader path */
292                 if (This->managed.num_worker_thread_syncs >= 3)
293                     This->managed.can_unsynchronized = false;
294                 nine_csmt_process(This->base.base.device);
295                 /* Note: AS DISCARD is not relevant for SYSTEMMEM,
296                  * NOOVERWRITE might have a similar meaning as what is
297                  * in D3D7 doc. Basically that data from previous draws
298                  * OF THIS FRAME are unaffected. As we flush csmt in Present(),
299                  * we should be correct. In some parts of the doc, the notion
300                  * of frame is implied to be related to Begin/EndScene(),
301                  * but tests show NOOVERWRITE after EndScene() doesn't flush
302                  * the csmt thread. */
303             }
304             This->managed.dirty = true;
305             u_box_1d(0, This->size, &This->managed.dirty_box); /* systemmem non-dynamic */
306             u_box_1d(0, 0, &This->managed.valid_region); /* systemmem dynamic */
307             BASEBUF_REGISTER_UPDATE(This);
308         }
309 
310         *ppbData = (int8_t *)This->managed.data + OffsetToLock;
311         DBG("returning pointer %p\n", *ppbData);
312         This->nlocks++;
313         return D3D_OK;
314     }
315 
316     /* Driver ddi doc: READONLY is never passed to the device. So it can only
317      * have effect on things handled by the driver (MANAGED pool for example).
318      * Msdn doc: DISCARD and NOOVERWRITE are only for DYNAMIC.
319      * ATI doc: You can use DISCARD and NOOVERWRITE without DYNAMIC.
320      * Msdn doc: D3DLOCK_DONOTWAIT is not among the valid flags for buffers.
321      * Our tests: On win 7 nvidia, D3DLOCK_DONOTWAIT does return
322      * D3DERR_WASSTILLDRAWING if the resource is in use, except for DYNAMIC.
323      * Our tests: some apps do use both DISCARD and NOOVERWRITE at the same
324      * time. On windows it seems to return different pointer in some conditions,
325      * creation flags and drivers. However these tests indicate having
326      * NOOVERWRITE win is a valid behaviour (NVidia).
327      */
328 
329     /* Have NOOVERWRITE win over DISCARD. This is allowed (see above) and
330      * it prevents overconsuming buffers if apps do use both at the same time. */
331     if ((Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)) == (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE))
332         Flags &= ~D3DLOCK_DISCARD;
333 
334     if (Flags & D3DLOCK_DISCARD)
335         usage = PIPE_MAP_WRITE | PIPE_MAP_DISCARD_WHOLE_RESOURCE;
336     else if (Flags & D3DLOCK_NOOVERWRITE)
337         usage = PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED;
338     else
339         /* Do not ask for READ if writeonly and default pool (should be safe enough,
340          * as the doc says app shouldn't expect reading to work with writeonly). */
341         usage = (This->base.usage & D3DUSAGE_WRITEONLY) ?
342             PIPE_MAP_WRITE :
343             PIPE_MAP_READ_WRITE;
344     if (Flags & D3DLOCK_DONOTWAIT && !(This->base.usage & D3DUSAGE_DYNAMIC))
345         usage |= PIPE_MAP_DONTBLOCK;
346 
347     This->discard_nooverwrite_only &= !!(Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE));
348 
349     if (This->nmaps == This->maxmaps) {
350         struct NineTransfer *newmaps =
351             REALLOC(This->maps, sizeof(struct NineTransfer)*This->maxmaps,
352                     sizeof(struct NineTransfer)*(This->maxmaps << 1));
353         if (newmaps == NULL)
354             return E_OUTOFMEMORY;
355 
356         This->maxmaps <<= 1;
357         This->maps = newmaps;
358     }
359 
360     if (This->buf && !This->discard_nooverwrite_only) {
361         struct pipe_box src_box;
362         unsigned offset;
363         struct pipe_resource *src_res;
364         DBG("Disabling nine_subbuffer for a buffer having"
365             "used a nine_subbuffer buffer\n");
366         /* Copy buffer content to the buffer resource, which
367          * we will now use.
368          * Note: The behaviour may be different from what is expected
369          * with double lock. However applications can't really make expectations
370          * about double locks, and don't really use them, so that's ok. */
371         src_res = nine_upload_buffer_resource_and_offset(This->buf, &offset);
372         u_box_1d(offset, This->size, &src_box);
373 
374         pipe = NineDevice9_GetPipe(device);
375         pipe->resource_copy_region(pipe, This->base.resource, 0, 0, 0, 0,
376                                    src_res, 0, &src_box);
377         /* Release previous resource */
378         if (This->nmaps >= 1)
379             This->maps[This->nmaps-1].should_destroy_buf = true;
380         else
381             nine_upload_release_buffer(device->buffer_upload, This->buf);
382         This->buf = NULL;
383         /* Rebind buffer */
384         NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0);
385     }
386 
387     This->maps[This->nmaps].transfer = NULL;
388     This->maps[This->nmaps].is_pipe_secondary = false;
389     This->maps[This->nmaps].buf = NULL;
390     This->maps[This->nmaps].should_destroy_buf = false;
391 
392     if (This->discard_nooverwrite_only) {
393         if (This->buf && (Flags & D3DLOCK_DISCARD)) {
394             /* Release previous buffer */
395             if (This->nmaps >= 1)
396                 This->maps[This->nmaps-1].should_destroy_buf = true;
397             else
398                 nine_upload_release_buffer(device->buffer_upload, This->buf);
399             This->buf = NULL;
400         }
401 
402         if (!This->buf) {
403             unsigned offset;
404             struct pipe_resource *res;
405             This->buf = nine_upload_create_buffer(device->buffer_upload, This->base.info.width0);
406             res = nine_upload_buffer_resource_and_offset(This->buf, &offset);
407             NineBuffer9_RebindIfRequired(This, device, res, offset);
408         }
409 
410         if (This->buf) {
411             This->maps[This->nmaps].buf = This->buf;
412             This->nmaps++;
413             This->nlocks++;
414             DBG("Returning %p\n", nine_upload_buffer_get_map(This->buf) + OffsetToLock);
415             *ppbData = nine_upload_buffer_get_map(This->buf) + OffsetToLock;
416             return D3D_OK;
417         } else {
418             /* Fallback to normal path, and don't try again */
419             This->discard_nooverwrite_only = false;
420         }
421     }
422 
423     /* Previous mappings may need pending commands to write to the
424      * buffer (staging buffer for example). Before a NOOVERWRITE,
425      * we thus need a finish, to guarantee any upload is finished.
426      * Note for discard_nooverwrite_only we don't need to do this
427      * check as neither discard nor nooverwrite have issues there */
428     if (This->need_sync_if_nooverwrite && !(Flags & D3DLOCK_DISCARD) &&
429         (Flags & D3DLOCK_NOOVERWRITE)) {
430         struct pipe_screen *screen = NineDevice9_GetScreen(device);
431         struct pipe_fence_handle *fence = NULL;
432 
433         pipe = NineDevice9_GetPipe(device);
434         pipe->flush(pipe, &fence, 0);
435         (void) screen->fence_finish(screen, NULL, fence, OS_TIMEOUT_INFINITE);
436         screen->fence_reference(screen, &fence, NULL);
437     }
438     This->need_sync_if_nooverwrite = !(Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE));
439 
440     /* When csmt is active, we want to avoid stalls as much as possible,
441      * and thus we want to create a new resource on discard and map it
442      * with the secondary pipe, instead of waiting on the main pipe. */
443     if (Flags & D3DLOCK_DISCARD && device->csmt_active) {
444         struct pipe_screen *screen = NineDevice9_GetScreen(device);
445         struct pipe_resource *new_res = nine_resource_create_with_retry(device, screen, &This->base.info);
446         if (new_res) {
447             /* Use the new resource */
448             pipe_resource_reference(&This->base.resource, new_res);
449             pipe_resource_reference(&new_res, NULL);
450             usage = PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED;
451             NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0);
452             This->maps[This->nmaps].is_pipe_secondary = true;
453         }
454     } else if (Flags & D3DLOCK_NOOVERWRITE && device->csmt_active)
455         This->maps[This->nmaps].is_pipe_secondary = true;
456 
457     if (This->maps[This->nmaps].is_pipe_secondary)
458         pipe = device->pipe_secondary;
459     else
460         pipe = NineDevice9_GetPipe(device);
461 
462     data = pipe->buffer_map(pipe, This->base.resource, 0,
463                               usage, &box, &This->maps[This->nmaps].transfer);
464 
465     if (!data) {
466         DBG("pipe::buffer_map failed\n"
467             " usage = %x\n"
468             " box.x = %u\n"
469             " box.width = %u\n",
470             usage, box.x, box.width);
471 
472         if (Flags & D3DLOCK_DONOTWAIT)
473             return D3DERR_WASSTILLDRAWING;
474         return D3DERR_INVALIDCALL;
475     }
476 
477     DBG("returning pointer %p\n", data);
478     This->nmaps++;
479     This->nlocks++;
480     *ppbData = data;
481 
482     return D3D_OK;
483 }
484 
485 HRESULT NINE_WINAPI
NineBuffer9_Unlock(struct NineBuffer9 * This)486 NineBuffer9_Unlock( struct NineBuffer9 *This )
487 {
488     struct NineDevice9 *device = This->base.base.device;
489     struct pipe_context *pipe;
490     int i;
491     DBG("This=%p\n", This);
492 
493     user_assert(This->nlocks > 0, D3DERR_INVALIDCALL);
494     This->nlocks--;
495     if (This->nlocks > 0)
496         return D3D_OK; /* Pending unlocks. Wait all unlocks before unmapping */
497 
498     if (This->base.pool == D3DPOOL_DEFAULT) {
499         for (i = 0; i < This->nmaps; i++) {
500             if (!This->maps[i].buf) {
501                 pipe = This->maps[i].is_pipe_secondary ?
502                     device->pipe_secondary :
503                     nine_context_get_pipe_acquire(device);
504                 pipe->buffer_unmap(pipe, This->maps[i].transfer);
505                 /* We need to flush in case the driver does implicit copies */
506                 if (This->maps[i].is_pipe_secondary)
507                     pipe->flush(pipe, NULL, 0);
508                 else
509                     nine_context_get_pipe_release(device);
510             } else if (This->maps[i].should_destroy_buf)
511                 nine_upload_release_buffer(device->buffer_upload, This->maps[i].buf);
512         }
513         This->nmaps = 0;
514     }
515     return D3D_OK;
516 }
517 
518 void
NineBuffer9_SetDirty(struct NineBuffer9 * This)519 NineBuffer9_SetDirty( struct NineBuffer9 *This )
520 {
521     assert(This->base.pool != D3DPOOL_DEFAULT);
522 
523     This->managed.dirty = true;
524     u_box_1d(0, This->size, &This->managed.dirty_box);
525     BASEBUF_REGISTER_UPDATE(This);
526 }
527 
528 /* Try to remove b from a, supposed to include b */
u_box_try_remove_region_1d(struct pipe_box * dst,const struct pipe_box * a,const struct pipe_box * b)529 static void u_box_try_remove_region_1d(struct pipe_box *dst,
530                                        const struct pipe_box *a,
531                                        const struct pipe_box *b)
532 {
533     int x, width;
534     if (a->x == b->x) {
535         x = a->x + b->width;
536         width = a->width - b->width;
537     } else if ((a->x + a->width) == (b->x + b->width)) {
538         x = a->x;
539         width = a->width - b->width;
540     } else {
541         x = a->x;
542         width = a->width;
543     }
544     dst->x = x;
545     dst->width = width;
546 }
547 
548 void
NineBuffer9_Upload(struct NineBuffer9 * This)549 NineBuffer9_Upload( struct NineBuffer9 *This )
550 {
551     struct NineDevice9 *device = This->base.base.device;
552     unsigned upload_flags = 0;
553     struct pipe_box box_upload;
554 
555     assert(This->base.pool != D3DPOOL_DEFAULT && This->managed.dirty);
556 
557     if (This->base.pool == D3DPOOL_SYSTEMMEM && This->base.usage & D3DUSAGE_DYNAMIC) {
558         struct pipe_box region_already_valid;
559         struct pipe_box conflicting_region;
560         struct pipe_box *valid_region = &This->managed.valid_region;
561         struct pipe_box *required_valid_region = &This->managed.required_valid_region;
562         struct pipe_box *filled_region = &This->managed.filled_region;
563         /* Try to upload SYSTEMMEM DYNAMIC in an efficient fashion.
564          * Unlike non-dynamic for which we upload the whole dirty region, try to
565          * only upload the data needed for the draw. The draw call preparation
566          * fills This->managed.required_valid_region for that */
567         u_box_intersect_1d(&region_already_valid,
568                            valid_region,
569                            required_valid_region);
570         /* If the required valid region is already valid, nothing to do */
571         if (region_already_valid.x == required_valid_region->x &&
572             region_already_valid.width == required_valid_region->width) {
573             /* Rebind if the region happens to be valid in the original buffer
574              * but we have since used vertex_uploader */
575             if (!This->managed.can_unsynchronized)
576                 NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0);
577             u_box_1d(0, 0, required_valid_region);
578             return;
579         }
580         /* (Try to) Remove valid areas from the region to upload */
581         u_box_try_remove_region_1d(&box_upload,
582                                    required_valid_region,
583                                    &region_already_valid);
584         assert(box_upload.width > 0);
585         /* To maintain correctly the valid region, as we will do union later with
586          * box_upload, we must ensure box_upload is consecutive with valid_region */
587         if (box_upload.x > valid_region->x + valid_region->width && valid_region->width > 0) {
588             box_upload.width = box_upload.x + box_upload.width - (valid_region->x + valid_region->width);
589             box_upload.x = valid_region->x + valid_region->width;
590         } else if (box_upload.x + box_upload.width < valid_region->x && valid_region->width > 0) {
591             box_upload.width = valid_region->x - box_upload.x;
592         }
593         /* There is conflict if some areas, that are not valid but are filled for previous draw calls,
594          * intersect with the region we plan to upload. Note by construction valid_region IS
595          * included in filled_region, thus so is region_already_valid. */
596         u_box_intersect_1d(&conflicting_region, &box_upload, filled_region);
597         /* As box_upload could still contain region_already_valid, check the intersection
598          * doesn't happen to be exactly region_already_valid (it cannot be smaller, see above) */
599         if (This->managed.can_unsynchronized && (conflicting_region.width == 0 ||
600             (conflicting_region.x == region_already_valid.x &&
601              conflicting_region.width == region_already_valid.width))) {
602             /* No conflicts. */
603             upload_flags |= PIPE_MAP_UNSYNCHRONIZED;
604         } else {
605             /* We cannot use PIPE_MAP_UNSYNCHRONIZED. We must choose between no flag and DISCARD.
606              * Criteria to discard:
607              * . Most of the resource was filled (but some apps do allocate a big buffer
608              * to only use a small part in a round fashion)
609              * . The region to upload is very small compared to the filled region and
610              * at the start of the buffer (hints at round usage starting again)
611              * . The region to upload is very big compared to the required region
612              * . We have not discarded yet this frame
613              * If the buffer use pattern seems to sync the worker thread too often,
614              * revert to the vertex_uploader */
615             if (This->managed.num_worker_thread_syncs < 3 &&
616                 (filled_region->width > (This->size / 2) ||
617                  (10 * box_upload.width < filled_region->width &&
618                   box_upload.x < (filled_region->x + filled_region->width)/2) ||
619                  box_upload.width > 2 * required_valid_region->width ||
620                  This->managed.frame_count_last_discard != device->frame_count)) {
621                 /* Avoid DISCARDING too much by discarding only if most of the buffer
622                  * has been used */
623                 DBG_FLAG(DBG_INDEXBUFFER|DBG_VERTEXBUFFER,
624              "Uploading %p DISCARD: valid %d %d, filled %d %d, required %d %d, box_upload %d %d, required already_valid %d %d, conflicting %d %d\n",
625              This, valid_region->x, valid_region->width, filled_region->x, filled_region->width,
626              required_valid_region->x, required_valid_region->width, box_upload.x, box_upload.width,
627              region_already_valid.x, region_already_valid.width, conflicting_region.x, conflicting_region.width
628                 );
629                 upload_flags |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
630                 u_box_1d(0, 0, filled_region);
631                 u_box_1d(0, 0, valid_region);
632                 box_upload = This->managed.required_valid_region;
633                 /* Rebind the buffer if we used intermediate alternative buffer */
634                 if (!This->managed.can_unsynchronized)
635                     NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0);
636                 This->managed.can_unsynchronized = true;
637                 This->managed.frame_count_last_discard = device->frame_count;
638             } else {
639                 /* Once we use without UNSYNCHRONIZED, we cannot use it anymore.
640                  * Use a different buffer. */
641                 unsigned buffer_offset = 0;
642                 struct pipe_resource *resource = NULL;
643                 This->managed.can_unsynchronized = false;
644                 u_upload_data(device->vertex_uploader,
645                     required_valid_region->x,
646                     required_valid_region->width,
647                     64,
648                     This->managed.data + required_valid_region->x,
649                     &buffer_offset,
650                     &resource);
651                 buffer_offset -= required_valid_region->x;
652                 u_upload_unmap(device->vertex_uploader);
653                 if (resource) {
654                     NineBuffer9_RebindIfRequired(This, device, resource, buffer_offset);
655                     /* Note: This only works because for these types of buffers this function
656                      * is called before every draw call. Else it wouldn't work when the app
657                      * rebinds buffers. In addition it needs this function to be called only
658                      * once per buffers even if bound several times, which we do. */
659                     u_box_1d(0, 0, required_valid_region);
660                     pipe_resource_reference(&resource, NULL);
661                     return;
662                 }
663             }
664         }
665 
666         u_box_union_1d(filled_region,
667                        filled_region,
668                        &box_upload);
669         u_box_union_1d(valid_region,
670                        valid_region,
671                        &box_upload);
672         u_box_1d(0, 0, required_valid_region);
673     } else
674         box_upload = This->managed.dirty_box;
675 
676     if (box_upload.x == 0 && box_upload.width == This->size) {
677         upload_flags |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
678     }
679 
680     if (This->managed.pending_upload) {
681         u_box_union_1d(&This->managed.upload_pending_regions,
682                        &This->managed.upload_pending_regions,
683                        &box_upload);
684     } else {
685         This->managed.upload_pending_regions = box_upload;
686     }
687 
688     DBG_FLAG(DBG_INDEXBUFFER|DBG_VERTEXBUFFER,
689              "Uploading %p, offset=%d, size=%d, Flags=0x%x\n",
690              This, box_upload.x, box_upload.width, upload_flags);
691     nine_context_range_upload(device, &This->managed.pending_upload,
692                               (struct NineUnknown *)This,
693                               This->base.resource,
694                               box_upload.x,
695                               box_upload.width,
696                               upload_flags,
697                               (int8_t *)This->managed.data + box_upload.x);
698     This->managed.dirty = false;
699 }
700