1 /*
2 * Copyright 2011 Joakim Sindholt <[email protected]>
3 * Copyright 2015 Patrick Rudolph <[email protected]>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "buffer9.h"
8 #include "device9.h"
9 #include "indexbuffer9.h"
10 #include "nine_buffer_upload.h"
11 #include "nine_helpers.h"
12 #include "nine_pipe.h"
13
14 #include "pipe/p_screen.h"
15 #include "pipe/p_context.h"
16 #include "pipe/p_state.h"
17 #include "pipe/p_defines.h"
18 #include "util/format/u_formats.h"
19 #include "util/box.h"
20 #include "util/u_inlines.h"
21
22 #define DBG_CHANNEL (DBG_INDEXBUFFER|DBG_VERTEXBUFFER)
23
24 HRESULT
NineBuffer9_ctor(struct NineBuffer9 * This,struct NineUnknownParams * pParams,D3DRESOURCETYPE Type,DWORD Usage,UINT Size,D3DPOOL Pool)25 NineBuffer9_ctor( struct NineBuffer9 *This,
26 struct NineUnknownParams *pParams,
27 D3DRESOURCETYPE Type,
28 DWORD Usage,
29 UINT Size,
30 D3DPOOL Pool )
31 {
32 struct pipe_resource *info = &This->base.info;
33 HRESULT hr;
34
35 DBG("This=%p Size=0x%x Usage=%x Pool=%u\n", This, Size, Usage, Pool);
36
37 user_assert(Pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL);
38
39 This->maps = MALLOC(sizeof(struct NineTransfer));
40 if (!This->maps)
41 return E_OUTOFMEMORY;
42 This->nlocks = 0;
43 This->nmaps = 0;
44 This->maxmaps = 1;
45 This->size = Size;
46
47 info->screen = pParams->device->screen;
48 info->target = PIPE_BUFFER;
49 info->format = PIPE_FORMAT_R8_UNORM;
50 info->width0 = Size;
51 info->flags = 0;
52
53 /* Note: WRITEONLY is just tip for resource placement, the resource
54 * can still be read (but slower). */
55 info->bind = (Type == D3DRTYPE_INDEXBUFFER) ? PIPE_BIND_INDEX_BUFFER : PIPE_BIND_VERTEX_BUFFER;
56
57 /* Software vertex processing:
58 * If the device is full software vertex processing,
59 * then the buffer is supposed to be used only for sw processing.
60 * For mixed vertex processing, buffers with D3DUSAGE_SOFTWAREPROCESSING
61 * can be used for both sw and hw processing.
62 * These buffers are expected to be stored in RAM.
63 * Apps expect locking the full buffer with no flags, then
64 * render a a few primitive, then locking again, etc
65 * to be a fast pattern. Only the SYSTEMMEM DYNAMIC path
66 * will give that pattern ok performance in our case.
67 * An alternative would be when sw processing is detected to
68 * convert Draw* calls to Draw*Up calls. */
69 if (Usage & D3DUSAGE_SOFTWAREPROCESSING ||
70 pParams->device->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) {
71 Pool = D3DPOOL_SYSTEMMEM;
72 Usage |= D3DUSAGE_DYNAMIC;
73 /* Note: the application cannot retrieve Pool and Usage */
74 }
75
76 /* Always use the DYNAMIC path for SYSTEMMEM.
77 * If the app uses the vertex buffer is a dynamic fashion,
78 * this is going to be very significantly faster that way.
79 * If the app uses the vertex buffer in a static fashion,
80 * instead of being filled all at once, the buffer will be filled
81 * little per little, until it is fully filled, thus the perf hit
82 * will be very small. */
83 if (Pool == D3DPOOL_SYSTEMMEM)
84 Usage |= D3DUSAGE_DYNAMIC;
85
86 /* It is hard to find clear information on where to place the buffer in
87 * memory depending on the flag.
88 * MSDN: resources are static, except for those with DYNAMIC, thus why you
89 * can only use DISCARD on them.
90 * ATI doc: The driver has the liberty it wants for having things static
91 * or not.
92 * MANAGED: Ram + uploads to Vram copy at unlock (msdn and nvidia doc say
93 * at first draw call using the buffer)
94 * DEFAULT + Usage = 0 => System memory backing for easy read access
95 * (That doc is very unclear on the details, like whether some copies to
96 * vram copy are involved or not).
97 * DEFAULT + WRITEONLY => Vram
98 * DEFAULT + WRITEONLY + DYNAMIC => Either Vram buffer or GTT_WC, depending on what the driver wants.
99 * SYSTEMMEM: Same as MANAGED, but handled by the driver instead of the runtime (which means
100 * some small behavior differences between vendors). Implementing exactly as MANAGED should
101 * be fine.
102 */
103 if (Pool == D3DPOOL_SYSTEMMEM && Usage & D3DUSAGE_DYNAMIC)
104 info->usage = PIPE_USAGE_STREAM;
105 else if (Pool != D3DPOOL_DEFAULT)
106 info->usage = PIPE_USAGE_DEFAULT;
107 else if (Usage & D3DUSAGE_DYNAMIC && Usage & D3DUSAGE_WRITEONLY)
108 info->usage = PIPE_USAGE_STREAM;
109 else if (Usage & D3DUSAGE_WRITEONLY)
110 info->usage = PIPE_USAGE_DEFAULT;
111 /* For the remaining two, PIPE_USAGE_STAGING would probably be
112 * a good fit according to the doc. However it seems rather a mistake
113 * from apps to use these (mistakes that do really happen). Try
114 * to put the flags that are the best compromise between the real
115 * behaviour and what buggy apps should get for better performance. */
116 else if (Usage & D3DUSAGE_DYNAMIC)
117 info->usage = PIPE_USAGE_STREAM;
118 else
119 info->usage = PIPE_USAGE_DYNAMIC;
120
121 /* When Writeonly is not set, we don't want to enable the
122 * optimizations */
123 This->discard_nooverwrite_only = !!(Usage & D3DUSAGE_WRITEONLY) &&
124 pParams->device->buffer_upload;
125 /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */
126 /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */
127 /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
128 /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
129 /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
130 /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */
131
132 info->height0 = 1;
133 info->depth0 = 1;
134 info->array_size = 1;
135 info->last_level = 0;
136 info->nr_samples = 0;
137 info->nr_storage_samples = 0;
138
139 hr = NineResource9_ctor(&This->base, pParams, NULL, true,
140 Type, Pool, Usage);
141
142 if (FAILED(hr))
143 return hr;
144
145 if (Pool != D3DPOOL_DEFAULT) {
146 This->managed.data = align_calloc(
147 nine_format_get_level_alloc_size(This->base.info.format,
148 Size, 1, 0), 32);
149 if (!This->managed.data)
150 return E_OUTOFMEMORY;
151 This->managed.dirty = true;
152 u_box_1d(0, Size, &This->managed.dirty_box);
153 u_box_1d(0, 0, &This->managed.valid_region);
154 u_box_1d(0, 0, &This->managed.required_valid_region);
155 u_box_1d(0, 0, &This->managed.filled_region);
156 This->managed.can_unsynchronized = true;
157 This->managed.num_worker_thread_syncs = 0;
158 list_inithead(&This->managed.list);
159 list_inithead(&This->managed.list2);
160 list_add(&This->managed.list2, &pParams->device->managed_buffers);
161 }
162
163 return D3D_OK;
164 }
165
166 void
NineBuffer9_dtor(struct NineBuffer9 * This)167 NineBuffer9_dtor( struct NineBuffer9 *This )
168 {
169 DBG("This=%p\n", This);
170
171 if (This->maps) {
172 while (This->nlocks) {
173 NineBuffer9_Unlock(This);
174 }
175 assert(!This->nmaps);
176 FREE(This->maps);
177 }
178
179 if (This->base.pool != D3DPOOL_DEFAULT) {
180 if (This->managed.data)
181 align_free(This->managed.data);
182 if (list_is_linked(&This->managed.list))
183 list_del(&This->managed.list);
184 if (list_is_linked(&This->managed.list2))
185 list_del(&This->managed.list2);
186 }
187
188 if (This->buf)
189 nine_upload_release_buffer(This->base.base.device->buffer_upload, This->buf);
190
191 NineResource9_dtor(&This->base);
192 }
193
194 struct pipe_resource *
NineBuffer9_GetResource(struct NineBuffer9 * This,unsigned * offset)195 NineBuffer9_GetResource( struct NineBuffer9 *This, unsigned *offset )
196 {
197 if (This->buf)
198 return nine_upload_buffer_resource_and_offset(This->buf, offset);
199 *offset = 0;
200 return NineResource9_GetResource(&This->base);
201 }
202
203 static void
NineBuffer9_RebindIfRequired(struct NineBuffer9 * This,struct NineDevice9 * device,struct pipe_resource * resource,unsigned offset)204 NineBuffer9_RebindIfRequired( struct NineBuffer9 *This,
205 struct NineDevice9 *device,
206 struct pipe_resource *resource,
207 unsigned offset )
208 {
209 int i;
210
211 if (!This->bind_count)
212 return;
213 for (i = 0; i < device->caps.MaxStreams; i++) {
214 if (device->state.stream[i] == (struct NineVertexBuffer9 *)This)
215 nine_context_set_stream_source_apply(device, i,
216 resource,
217 device->state.vtxbuf[i].buffer_offset + offset,
218 device->state.vtxstride[i]);
219 }
220 if (device->state.idxbuf == (struct NineIndexBuffer9 *)This)
221 nine_context_set_indices_apply(device, resource,
222 ((struct NineIndexBuffer9 *)This)->index_size,
223 offset);
224 }
225
226 HRESULT NINE_WINAPI
NineBuffer9_Lock(struct NineBuffer9 * This,UINT OffsetToLock,UINT SizeToLock,void ** ppbData,DWORD Flags)227 NineBuffer9_Lock( struct NineBuffer9 *This,
228 UINT OffsetToLock,
229 UINT SizeToLock,
230 void **ppbData,
231 DWORD Flags )
232 {
233 struct NineDevice9 *device = This->base.base.device;
234 struct pipe_box box;
235 struct pipe_context *pipe;
236 void *data;
237 unsigned usage;
238
239 DBG("This=%p(pipe=%p) OffsetToLock=0x%x, SizeToLock=0x%x, Flags=0x%x\n",
240 This, This->base.resource,
241 OffsetToLock, SizeToLock, Flags);
242
243 user_assert(ppbData, E_POINTER);
244
245 if (SizeToLock == 0) {
246 SizeToLock = This->size - OffsetToLock;
247 user_warn(OffsetToLock != 0);
248 }
249
250 /* Write out of bound seems to have to be taken into account for these.
251 * TODO: Do more tests (is it only at buffer first lock ? etc).
252 * Since these buffers are supposed to be locked once and never
253 * written again (MANAGED or DYNAMIC is used for the other uses cases),
254 * performance should be unaffected. */
255 if (!(This->base.usage & D3DUSAGE_DYNAMIC) && This->base.pool == D3DPOOL_DEFAULT)
256 SizeToLock = This->size - OffsetToLock;
257
258 SizeToLock = MIN2(SizeToLock, This->size - OffsetToLock); /* Do not read or track out of the buffer */
259 u_box_1d(OffsetToLock, SizeToLock, &box);
260
261 if (This->base.pool != D3DPOOL_DEFAULT) {
262 /* MANAGED: READONLY doesn't dirty the buffer, nor
263 * wait the upload in the worker thread
264 * SYSTEMMEM: AMD/NVidia: All locks dirty the full buffer. Not on Intel
265 * For Nvidia, SYSTEMMEM behaves are if there is no worker thread.
266 * On AMD, READONLY and NOOVERWRITE do dirty the buffer, but do not sync the previous uploads
267 * in the worker thread. On Intel only NOOVERWRITE has that effect.
268 * We implement the AMD behaviour. */
269 if (This->base.pool == D3DPOOL_MANAGED) {
270 if (!(Flags & D3DLOCK_READONLY)) {
271 if (!This->managed.dirty) {
272 assert(list_is_empty(&This->managed.list));
273 This->managed.dirty = true;
274 This->managed.dirty_box = box;
275 /* Flush if regions pending to be uploaded would be dirtied */
276 if (p_atomic_read(&This->managed.pending_upload)) {
277 u_box_intersect_1d(&box, &box, &This->managed.upload_pending_regions);
278 if (box.width != 0)
279 nine_csmt_process(This->base.base.device);
280 }
281 } else
282 u_box_union_1d(&This->managed.dirty_box, &This->managed.dirty_box, &box);
283 /* Tests trying to draw while the buffer is locked show that
284 * SYSTEMMEM/MANAGED buffers are made dirty at Lock time */
285 BASEBUF_REGISTER_UPDATE(This);
286 }
287 } else {
288 if (!(Flags & (D3DLOCK_READONLY|D3DLOCK_NOOVERWRITE)) &&
289 p_atomic_read(&This->managed.pending_upload)) {
290 This->managed.num_worker_thread_syncs++;
291 /* If we sync too often, pick the vertex_uploader path */
292 if (This->managed.num_worker_thread_syncs >= 3)
293 This->managed.can_unsynchronized = false;
294 nine_csmt_process(This->base.base.device);
295 /* Note: AS DISCARD is not relevant for SYSTEMMEM,
296 * NOOVERWRITE might have a similar meaning as what is
297 * in D3D7 doc. Basically that data from previous draws
298 * OF THIS FRAME are unaffected. As we flush csmt in Present(),
299 * we should be correct. In some parts of the doc, the notion
300 * of frame is implied to be related to Begin/EndScene(),
301 * but tests show NOOVERWRITE after EndScene() doesn't flush
302 * the csmt thread. */
303 }
304 This->managed.dirty = true;
305 u_box_1d(0, This->size, &This->managed.dirty_box); /* systemmem non-dynamic */
306 u_box_1d(0, 0, &This->managed.valid_region); /* systemmem dynamic */
307 BASEBUF_REGISTER_UPDATE(This);
308 }
309
310 *ppbData = (int8_t *)This->managed.data + OffsetToLock;
311 DBG("returning pointer %p\n", *ppbData);
312 This->nlocks++;
313 return D3D_OK;
314 }
315
316 /* Driver ddi doc: READONLY is never passed to the device. So it can only
317 * have effect on things handled by the driver (MANAGED pool for example).
318 * Msdn doc: DISCARD and NOOVERWRITE are only for DYNAMIC.
319 * ATI doc: You can use DISCARD and NOOVERWRITE without DYNAMIC.
320 * Msdn doc: D3DLOCK_DONOTWAIT is not among the valid flags for buffers.
321 * Our tests: On win 7 nvidia, D3DLOCK_DONOTWAIT does return
322 * D3DERR_WASSTILLDRAWING if the resource is in use, except for DYNAMIC.
323 * Our tests: some apps do use both DISCARD and NOOVERWRITE at the same
324 * time. On windows it seems to return different pointer in some conditions,
325 * creation flags and drivers. However these tests indicate having
326 * NOOVERWRITE win is a valid behaviour (NVidia).
327 */
328
329 /* Have NOOVERWRITE win over DISCARD. This is allowed (see above) and
330 * it prevents overconsuming buffers if apps do use both at the same time. */
331 if ((Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)) == (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE))
332 Flags &= ~D3DLOCK_DISCARD;
333
334 if (Flags & D3DLOCK_DISCARD)
335 usage = PIPE_MAP_WRITE | PIPE_MAP_DISCARD_WHOLE_RESOURCE;
336 else if (Flags & D3DLOCK_NOOVERWRITE)
337 usage = PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED;
338 else
339 /* Do not ask for READ if writeonly and default pool (should be safe enough,
340 * as the doc says app shouldn't expect reading to work with writeonly). */
341 usage = (This->base.usage & D3DUSAGE_WRITEONLY) ?
342 PIPE_MAP_WRITE :
343 PIPE_MAP_READ_WRITE;
344 if (Flags & D3DLOCK_DONOTWAIT && !(This->base.usage & D3DUSAGE_DYNAMIC))
345 usage |= PIPE_MAP_DONTBLOCK;
346
347 This->discard_nooverwrite_only &= !!(Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE));
348
349 if (This->nmaps == This->maxmaps) {
350 struct NineTransfer *newmaps =
351 REALLOC(This->maps, sizeof(struct NineTransfer)*This->maxmaps,
352 sizeof(struct NineTransfer)*(This->maxmaps << 1));
353 if (newmaps == NULL)
354 return E_OUTOFMEMORY;
355
356 This->maxmaps <<= 1;
357 This->maps = newmaps;
358 }
359
360 if (This->buf && !This->discard_nooverwrite_only) {
361 struct pipe_box src_box;
362 unsigned offset;
363 struct pipe_resource *src_res;
364 DBG("Disabling nine_subbuffer for a buffer having"
365 "used a nine_subbuffer buffer\n");
366 /* Copy buffer content to the buffer resource, which
367 * we will now use.
368 * Note: The behaviour may be different from what is expected
369 * with double lock. However applications can't really make expectations
370 * about double locks, and don't really use them, so that's ok. */
371 src_res = nine_upload_buffer_resource_and_offset(This->buf, &offset);
372 u_box_1d(offset, This->size, &src_box);
373
374 pipe = NineDevice9_GetPipe(device);
375 pipe->resource_copy_region(pipe, This->base.resource, 0, 0, 0, 0,
376 src_res, 0, &src_box);
377 /* Release previous resource */
378 if (This->nmaps >= 1)
379 This->maps[This->nmaps-1].should_destroy_buf = true;
380 else
381 nine_upload_release_buffer(device->buffer_upload, This->buf);
382 This->buf = NULL;
383 /* Rebind buffer */
384 NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0);
385 }
386
387 This->maps[This->nmaps].transfer = NULL;
388 This->maps[This->nmaps].is_pipe_secondary = false;
389 This->maps[This->nmaps].buf = NULL;
390 This->maps[This->nmaps].should_destroy_buf = false;
391
392 if (This->discard_nooverwrite_only) {
393 if (This->buf && (Flags & D3DLOCK_DISCARD)) {
394 /* Release previous buffer */
395 if (This->nmaps >= 1)
396 This->maps[This->nmaps-1].should_destroy_buf = true;
397 else
398 nine_upload_release_buffer(device->buffer_upload, This->buf);
399 This->buf = NULL;
400 }
401
402 if (!This->buf) {
403 unsigned offset;
404 struct pipe_resource *res;
405 This->buf = nine_upload_create_buffer(device->buffer_upload, This->base.info.width0);
406 res = nine_upload_buffer_resource_and_offset(This->buf, &offset);
407 NineBuffer9_RebindIfRequired(This, device, res, offset);
408 }
409
410 if (This->buf) {
411 This->maps[This->nmaps].buf = This->buf;
412 This->nmaps++;
413 This->nlocks++;
414 DBG("Returning %p\n", nine_upload_buffer_get_map(This->buf) + OffsetToLock);
415 *ppbData = nine_upload_buffer_get_map(This->buf) + OffsetToLock;
416 return D3D_OK;
417 } else {
418 /* Fallback to normal path, and don't try again */
419 This->discard_nooverwrite_only = false;
420 }
421 }
422
423 /* Previous mappings may need pending commands to write to the
424 * buffer (staging buffer for example). Before a NOOVERWRITE,
425 * we thus need a finish, to guarantee any upload is finished.
426 * Note for discard_nooverwrite_only we don't need to do this
427 * check as neither discard nor nooverwrite have issues there */
428 if (This->need_sync_if_nooverwrite && !(Flags & D3DLOCK_DISCARD) &&
429 (Flags & D3DLOCK_NOOVERWRITE)) {
430 struct pipe_screen *screen = NineDevice9_GetScreen(device);
431 struct pipe_fence_handle *fence = NULL;
432
433 pipe = NineDevice9_GetPipe(device);
434 pipe->flush(pipe, &fence, 0);
435 (void) screen->fence_finish(screen, NULL, fence, OS_TIMEOUT_INFINITE);
436 screen->fence_reference(screen, &fence, NULL);
437 }
438 This->need_sync_if_nooverwrite = !(Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE));
439
440 /* When csmt is active, we want to avoid stalls as much as possible,
441 * and thus we want to create a new resource on discard and map it
442 * with the secondary pipe, instead of waiting on the main pipe. */
443 if (Flags & D3DLOCK_DISCARD && device->csmt_active) {
444 struct pipe_screen *screen = NineDevice9_GetScreen(device);
445 struct pipe_resource *new_res = nine_resource_create_with_retry(device, screen, &This->base.info);
446 if (new_res) {
447 /* Use the new resource */
448 pipe_resource_reference(&This->base.resource, new_res);
449 pipe_resource_reference(&new_res, NULL);
450 usage = PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED;
451 NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0);
452 This->maps[This->nmaps].is_pipe_secondary = true;
453 }
454 } else if (Flags & D3DLOCK_NOOVERWRITE && device->csmt_active)
455 This->maps[This->nmaps].is_pipe_secondary = true;
456
457 if (This->maps[This->nmaps].is_pipe_secondary)
458 pipe = device->pipe_secondary;
459 else
460 pipe = NineDevice9_GetPipe(device);
461
462 data = pipe->buffer_map(pipe, This->base.resource, 0,
463 usage, &box, &This->maps[This->nmaps].transfer);
464
465 if (!data) {
466 DBG("pipe::buffer_map failed\n"
467 " usage = %x\n"
468 " box.x = %u\n"
469 " box.width = %u\n",
470 usage, box.x, box.width);
471
472 if (Flags & D3DLOCK_DONOTWAIT)
473 return D3DERR_WASSTILLDRAWING;
474 return D3DERR_INVALIDCALL;
475 }
476
477 DBG("returning pointer %p\n", data);
478 This->nmaps++;
479 This->nlocks++;
480 *ppbData = data;
481
482 return D3D_OK;
483 }
484
485 HRESULT NINE_WINAPI
NineBuffer9_Unlock(struct NineBuffer9 * This)486 NineBuffer9_Unlock( struct NineBuffer9 *This )
487 {
488 struct NineDevice9 *device = This->base.base.device;
489 struct pipe_context *pipe;
490 int i;
491 DBG("This=%p\n", This);
492
493 user_assert(This->nlocks > 0, D3DERR_INVALIDCALL);
494 This->nlocks--;
495 if (This->nlocks > 0)
496 return D3D_OK; /* Pending unlocks. Wait all unlocks before unmapping */
497
498 if (This->base.pool == D3DPOOL_DEFAULT) {
499 for (i = 0; i < This->nmaps; i++) {
500 if (!This->maps[i].buf) {
501 pipe = This->maps[i].is_pipe_secondary ?
502 device->pipe_secondary :
503 nine_context_get_pipe_acquire(device);
504 pipe->buffer_unmap(pipe, This->maps[i].transfer);
505 /* We need to flush in case the driver does implicit copies */
506 if (This->maps[i].is_pipe_secondary)
507 pipe->flush(pipe, NULL, 0);
508 else
509 nine_context_get_pipe_release(device);
510 } else if (This->maps[i].should_destroy_buf)
511 nine_upload_release_buffer(device->buffer_upload, This->maps[i].buf);
512 }
513 This->nmaps = 0;
514 }
515 return D3D_OK;
516 }
517
518 void
NineBuffer9_SetDirty(struct NineBuffer9 * This)519 NineBuffer9_SetDirty( struct NineBuffer9 *This )
520 {
521 assert(This->base.pool != D3DPOOL_DEFAULT);
522
523 This->managed.dirty = true;
524 u_box_1d(0, This->size, &This->managed.dirty_box);
525 BASEBUF_REGISTER_UPDATE(This);
526 }
527
528 /* Try to remove b from a, supposed to include b */
u_box_try_remove_region_1d(struct pipe_box * dst,const struct pipe_box * a,const struct pipe_box * b)529 static void u_box_try_remove_region_1d(struct pipe_box *dst,
530 const struct pipe_box *a,
531 const struct pipe_box *b)
532 {
533 int x, width;
534 if (a->x == b->x) {
535 x = a->x + b->width;
536 width = a->width - b->width;
537 } else if ((a->x + a->width) == (b->x + b->width)) {
538 x = a->x;
539 width = a->width - b->width;
540 } else {
541 x = a->x;
542 width = a->width;
543 }
544 dst->x = x;
545 dst->width = width;
546 }
547
548 void
NineBuffer9_Upload(struct NineBuffer9 * This)549 NineBuffer9_Upload( struct NineBuffer9 *This )
550 {
551 struct NineDevice9 *device = This->base.base.device;
552 unsigned upload_flags = 0;
553 struct pipe_box box_upload;
554
555 assert(This->base.pool != D3DPOOL_DEFAULT && This->managed.dirty);
556
557 if (This->base.pool == D3DPOOL_SYSTEMMEM && This->base.usage & D3DUSAGE_DYNAMIC) {
558 struct pipe_box region_already_valid;
559 struct pipe_box conflicting_region;
560 struct pipe_box *valid_region = &This->managed.valid_region;
561 struct pipe_box *required_valid_region = &This->managed.required_valid_region;
562 struct pipe_box *filled_region = &This->managed.filled_region;
563 /* Try to upload SYSTEMMEM DYNAMIC in an efficient fashion.
564 * Unlike non-dynamic for which we upload the whole dirty region, try to
565 * only upload the data needed for the draw. The draw call preparation
566 * fills This->managed.required_valid_region for that */
567 u_box_intersect_1d(®ion_already_valid,
568 valid_region,
569 required_valid_region);
570 /* If the required valid region is already valid, nothing to do */
571 if (region_already_valid.x == required_valid_region->x &&
572 region_already_valid.width == required_valid_region->width) {
573 /* Rebind if the region happens to be valid in the original buffer
574 * but we have since used vertex_uploader */
575 if (!This->managed.can_unsynchronized)
576 NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0);
577 u_box_1d(0, 0, required_valid_region);
578 return;
579 }
580 /* (Try to) Remove valid areas from the region to upload */
581 u_box_try_remove_region_1d(&box_upload,
582 required_valid_region,
583 ®ion_already_valid);
584 assert(box_upload.width > 0);
585 /* To maintain correctly the valid region, as we will do union later with
586 * box_upload, we must ensure box_upload is consecutive with valid_region */
587 if (box_upload.x > valid_region->x + valid_region->width && valid_region->width > 0) {
588 box_upload.width = box_upload.x + box_upload.width - (valid_region->x + valid_region->width);
589 box_upload.x = valid_region->x + valid_region->width;
590 } else if (box_upload.x + box_upload.width < valid_region->x && valid_region->width > 0) {
591 box_upload.width = valid_region->x - box_upload.x;
592 }
593 /* There is conflict if some areas, that are not valid but are filled for previous draw calls,
594 * intersect with the region we plan to upload. Note by construction valid_region IS
595 * included in filled_region, thus so is region_already_valid. */
596 u_box_intersect_1d(&conflicting_region, &box_upload, filled_region);
597 /* As box_upload could still contain region_already_valid, check the intersection
598 * doesn't happen to be exactly region_already_valid (it cannot be smaller, see above) */
599 if (This->managed.can_unsynchronized && (conflicting_region.width == 0 ||
600 (conflicting_region.x == region_already_valid.x &&
601 conflicting_region.width == region_already_valid.width))) {
602 /* No conflicts. */
603 upload_flags |= PIPE_MAP_UNSYNCHRONIZED;
604 } else {
605 /* We cannot use PIPE_MAP_UNSYNCHRONIZED. We must choose between no flag and DISCARD.
606 * Criteria to discard:
607 * . Most of the resource was filled (but some apps do allocate a big buffer
608 * to only use a small part in a round fashion)
609 * . The region to upload is very small compared to the filled region and
610 * at the start of the buffer (hints at round usage starting again)
611 * . The region to upload is very big compared to the required region
612 * . We have not discarded yet this frame
613 * If the buffer use pattern seems to sync the worker thread too often,
614 * revert to the vertex_uploader */
615 if (This->managed.num_worker_thread_syncs < 3 &&
616 (filled_region->width > (This->size / 2) ||
617 (10 * box_upload.width < filled_region->width &&
618 box_upload.x < (filled_region->x + filled_region->width)/2) ||
619 box_upload.width > 2 * required_valid_region->width ||
620 This->managed.frame_count_last_discard != device->frame_count)) {
621 /* Avoid DISCARDING too much by discarding only if most of the buffer
622 * has been used */
623 DBG_FLAG(DBG_INDEXBUFFER|DBG_VERTEXBUFFER,
624 "Uploading %p DISCARD: valid %d %d, filled %d %d, required %d %d, box_upload %d %d, required already_valid %d %d, conflicting %d %d\n",
625 This, valid_region->x, valid_region->width, filled_region->x, filled_region->width,
626 required_valid_region->x, required_valid_region->width, box_upload.x, box_upload.width,
627 region_already_valid.x, region_already_valid.width, conflicting_region.x, conflicting_region.width
628 );
629 upload_flags |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
630 u_box_1d(0, 0, filled_region);
631 u_box_1d(0, 0, valid_region);
632 box_upload = This->managed.required_valid_region;
633 /* Rebind the buffer if we used intermediate alternative buffer */
634 if (!This->managed.can_unsynchronized)
635 NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0);
636 This->managed.can_unsynchronized = true;
637 This->managed.frame_count_last_discard = device->frame_count;
638 } else {
639 /* Once we use without UNSYNCHRONIZED, we cannot use it anymore.
640 * Use a different buffer. */
641 unsigned buffer_offset = 0;
642 struct pipe_resource *resource = NULL;
643 This->managed.can_unsynchronized = false;
644 u_upload_data(device->vertex_uploader,
645 required_valid_region->x,
646 required_valid_region->width,
647 64,
648 This->managed.data + required_valid_region->x,
649 &buffer_offset,
650 &resource);
651 buffer_offset -= required_valid_region->x;
652 u_upload_unmap(device->vertex_uploader);
653 if (resource) {
654 NineBuffer9_RebindIfRequired(This, device, resource, buffer_offset);
655 /* Note: This only works because for these types of buffers this function
656 * is called before every draw call. Else it wouldn't work when the app
657 * rebinds buffers. In addition it needs this function to be called only
658 * once per buffers even if bound several times, which we do. */
659 u_box_1d(0, 0, required_valid_region);
660 pipe_resource_reference(&resource, NULL);
661 return;
662 }
663 }
664 }
665
666 u_box_union_1d(filled_region,
667 filled_region,
668 &box_upload);
669 u_box_union_1d(valid_region,
670 valid_region,
671 &box_upload);
672 u_box_1d(0, 0, required_valid_region);
673 } else
674 box_upload = This->managed.dirty_box;
675
676 if (box_upload.x == 0 && box_upload.width == This->size) {
677 upload_flags |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
678 }
679
680 if (This->managed.pending_upload) {
681 u_box_union_1d(&This->managed.upload_pending_regions,
682 &This->managed.upload_pending_regions,
683 &box_upload);
684 } else {
685 This->managed.upload_pending_regions = box_upload;
686 }
687
688 DBG_FLAG(DBG_INDEXBUFFER|DBG_VERTEXBUFFER,
689 "Uploading %p, offset=%d, size=%d, Flags=0x%x\n",
690 This, box_upload.x, box_upload.width, upload_flags);
691 nine_context_range_upload(device, &This->managed.pending_upload,
692 (struct NineUnknown *)This,
693 This->base.resource,
694 box_upload.x,
695 box_upload.width,
696 upload_flags,
697 (int8_t *)This->managed.data + box_upload.x);
698 This->managed.dirty = false;
699 }
700