1 /**************************************************************************
2 *
3 * Copyright 2017 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27 #include "util/u_threaded_context.h"
28 #include "util/u_cpu_detect.h"
29 #include "util/format/u_format.h"
30 #include "util/u_inlines.h"
31 #include "util/u_memory.h"
32 #include "util/u_upload_mgr.h"
33 #include "driver_trace/tr_context.h"
34 #include "util/log.h"
35 #include "util/perf/cpu_trace.h"
36 #include "util/thread_sched.h"
37 #include "compiler/shader_info.h"
38
39 #if TC_DEBUG >= 1
40 #define tc_assert assert
41 #else
42 #define tc_assert(x)
43 #endif
44
45 #if TC_DEBUG >= 2
46 #define tc_printf mesa_logi
47 #define tc_asprintf asprintf
48 #define tc_strcmp strcmp
49 #else
50 #define tc_printf(...)
51 #define tc_asprintf(...) 0
52 #define tc_strcmp(...) 0
53 #endif
54
55 #define TC_SENTINEL 0x5ca1ab1e
56
57 #if TC_DEBUG >= 3 || defined(TC_TRACE)
58 static const char *tc_call_names[] = {
59 #define CALL(name) #name,
60 #include "u_threaded_context_calls.h"
61 #undef CALL
62 };
63 #endif
64
65 #ifdef TC_TRACE
66 # define TC_TRACE_SCOPE(call_id) MESA_TRACE_SCOPE(tc_call_names[call_id])
67 #else
68 # define TC_TRACE_SCOPE(call_id)
69 #endif
70
71 static void
72 tc_buffer_subdata(struct pipe_context *_pipe,
73 struct pipe_resource *resource,
74 unsigned usage, unsigned offset,
75 unsigned size, const void *data);
76
77 static void
tc_batch_check(UNUSED struct tc_batch * batch)78 tc_batch_check(UNUSED struct tc_batch *batch)
79 {
80 tc_assert(batch->sentinel == TC_SENTINEL);
81 tc_assert(batch->num_total_slots <= TC_SLOTS_PER_BATCH);
82 }
83
84 static void
tc_debug_check(struct threaded_context * tc)85 tc_debug_check(struct threaded_context *tc)
86 {
87 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
88 tc_batch_check(&tc->batch_slots[i]);
89 tc_assert(tc->batch_slots[i].tc == tc);
90 }
91 }
92
93 static void
tc_set_driver_thread(struct threaded_context * tc)94 tc_set_driver_thread(struct threaded_context *tc)
95 {
96 #ifndef NDEBUG
97 tc->driver_thread = thrd_current();
98 #endif
99 }
100
101 static void
tc_clear_driver_thread(struct threaded_context * tc)102 tc_clear_driver_thread(struct threaded_context *tc)
103 {
104 #ifndef NDEBUG
105 memset(&tc->driver_thread, 0, sizeof(tc->driver_thread));
106 #endif
107 }
108
109 struct tc_batch_rp_info {
110 /* this is what drivers can see */
111 struct tc_renderpass_info info;
112 /* determines whether the info can be "safely" read by drivers or if it may still be in use */
113 struct util_queue_fence ready;
114 /* when a batch is full, the rp info rollsover onto 'next' */
115 struct tc_batch_rp_info *next;
116 /* when rp info has rolled over onto this struct, 'prev' is used to update pointers for realloc */
117 struct tc_batch_rp_info *prev;
118 };
119
120 static struct tc_batch_rp_info *
tc_batch_rp_info(struct tc_renderpass_info * info)121 tc_batch_rp_info(struct tc_renderpass_info *info)
122 {
123 return (struct tc_batch_rp_info *)info;
124 }
125
126 static void
tc_sanitize_renderpass_info(struct threaded_context * tc)127 tc_sanitize_renderpass_info(struct threaded_context *tc)
128 {
129 tc->renderpass_info_recording->cbuf_invalidate = 0;
130 tc->renderpass_info_recording->zsbuf_invalidate = false;
131 tc->renderpass_info_recording->cbuf_load |= (~tc->renderpass_info_recording->cbuf_clear) & BITFIELD_MASK(PIPE_MAX_COLOR_BUFS);
132 if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] && !tc_renderpass_info_is_zsbuf_used(tc->renderpass_info_recording))
133 /* this should be a "safe" way to indicate to the driver that both loads and stores are required;
134 * driver can always detect invalidation
135 */
136 tc->renderpass_info_recording->zsbuf_clear_partial = true;
137 if (tc->num_queries_active)
138 tc->renderpass_info_recording->has_query_ends = true;
139 }
140
141 /* ensure the batch's array of renderpass data is large enough for the current index */
142 static void
tc_batch_renderpass_infos_resize(struct threaded_context * tc,struct tc_batch * batch)143 tc_batch_renderpass_infos_resize(struct threaded_context *tc, struct tc_batch *batch)
144 {
145 unsigned size = batch->renderpass_infos.capacity;
146 unsigned cur_num = MAX2(batch->renderpass_info_idx, 0);
147
148 if (size / sizeof(struct tc_batch_rp_info) > cur_num)
149 return;
150
151 struct tc_batch_rp_info *infos = batch->renderpass_infos.data;
152 unsigned old_idx = batch->renderpass_info_idx - 1;
153 bool redo = tc->renderpass_info_recording &&
154 tc->renderpass_info_recording == &infos[old_idx].info;
155 if (!util_dynarray_resize(&batch->renderpass_infos, struct tc_batch_rp_info, cur_num + 10))
156 mesa_loge("tc: memory alloc fail!");
157
158 if (size != batch->renderpass_infos.capacity) {
159 /* zero new allocation region */
160 uint8_t *data = batch->renderpass_infos.data;
161 memset(data + size, 0, batch->renderpass_infos.capacity - size);
162 unsigned start = size / sizeof(struct tc_batch_rp_info);
163 unsigned count = (batch->renderpass_infos.capacity - size) /
164 sizeof(struct tc_batch_rp_info);
165 infos = batch->renderpass_infos.data;
166 if (infos->prev)
167 infos->prev->next = infos;
168 for (unsigned i = 0; i < count; i++)
169 util_queue_fence_init(&infos[start + i].ready);
170 /* re-set current recording info on resize */
171 if (redo)
172 tc->renderpass_info_recording = &infos[old_idx].info;
173 }
174 }
175
176 /* signal that the renderpass info is "ready" for use by drivers and will no longer be updated */
177 static void
tc_signal_renderpass_info_ready(struct threaded_context * tc)178 tc_signal_renderpass_info_ready(struct threaded_context *tc)
179 {
180 if (tc->renderpass_info_recording &&
181 !util_queue_fence_is_signalled(&tc_batch_rp_info(tc->renderpass_info_recording)->ready))
182 util_queue_fence_signal(&tc_batch_rp_info(tc->renderpass_info_recording)->ready);
183 }
184
185 /* increment the current renderpass info struct for recording
186 * 'full_copy' is used for preserving data across non-blocking tc batch flushes
187 */
188 static void
tc_batch_increment_renderpass_info(struct threaded_context * tc,unsigned batch_idx,bool full_copy)189 tc_batch_increment_renderpass_info(struct threaded_context *tc, unsigned batch_idx, bool full_copy)
190 {
191 struct tc_batch *batch = &tc->batch_slots[batch_idx];
192 struct tc_batch_rp_info *tc_info = batch->renderpass_infos.data;
193
194 if (tc_info[0].next || batch->num_total_slots) {
195 /* deadlock condition detected: all batches are in flight, renderpass hasn't ended
196 * (probably a cts case)
197 */
198 struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info_recording);
199 if (!util_queue_fence_is_signalled(&info->ready)) {
200 /* this batch is actively executing and the driver is waiting on the recording fence to signal */
201 /* force all buffer usage to avoid data loss */
202 info->info.cbuf_load = ~(BITFIELD_MASK(8) & info->info.cbuf_clear);
203 info->info.zsbuf_clear_partial = true;
204 info->info.has_query_ends = tc->num_queries_active > 0;
205 /* ensure threaded_context_get_renderpass_info() won't deadlock */
206 info->next = NULL;
207 util_queue_fence_signal(&info->ready);
208 }
209 /* always wait on the batch to finish since this will otherwise overwrite thread data */
210 util_queue_fence_wait(&batch->fence);
211 }
212 /* increment rp info and initialize it */
213 batch->renderpass_info_idx++;
214 tc_batch_renderpass_infos_resize(tc, batch);
215 tc_info = batch->renderpass_infos.data;
216
217 if (full_copy) {
218 /* this should only be called when changing batches */
219 assert(batch->renderpass_info_idx == 0);
220 /* copy the previous data in its entirety: this is still the same renderpass */
221 if (tc->renderpass_info_recording) {
222 tc_info[batch->renderpass_info_idx].info.data = tc->renderpass_info_recording->data;
223 tc_batch_rp_info(tc->renderpass_info_recording)->next = &tc_info[batch->renderpass_info_idx];
224 tc_info[batch->renderpass_info_idx].prev = tc_batch_rp_info(tc->renderpass_info_recording);
225 /* guard against deadlock scenario */
226 assert(&tc_batch_rp_info(tc->renderpass_info_recording)->next->info != tc->renderpass_info_recording);
227 } else {
228 tc_info[batch->renderpass_info_idx].info.data = 0;
229 tc_info[batch->renderpass_info_idx].prev = NULL;
230 }
231 } else {
232 /* selectively copy: only the CSO metadata is copied, and a new framebuffer state will be added later */
233 tc_info[batch->renderpass_info_idx].info.data = 0;
234 if (tc->renderpass_info_recording) {
235 tc_info[batch->renderpass_info_idx].info.data16[2] = tc->renderpass_info_recording->data16[2];
236 tc_batch_rp_info(tc->renderpass_info_recording)->next = NULL;
237 tc_info[batch->renderpass_info_idx].prev = NULL;
238 }
239 }
240
241 assert(!full_copy || !tc->renderpass_info_recording || tc_batch_rp_info(tc->renderpass_info_recording)->next);
242 /* signal existing info since it will not be used anymore */
243 tc_signal_renderpass_info_ready(tc);
244 util_queue_fence_reset(&tc_info[batch->renderpass_info_idx].ready);
245 /* guard against deadlock scenario */
246 assert(tc->renderpass_info_recording != &tc_info[batch->renderpass_info_idx].info);
247 /* this is now the current recording renderpass info */
248 tc->renderpass_info_recording = &tc_info[batch->renderpass_info_idx].info;
249 batch->max_renderpass_info_idx = batch->renderpass_info_idx;
250 }
251
252 static ALWAYS_INLINE struct tc_renderpass_info *
tc_get_renderpass_info(struct threaded_context * tc)253 tc_get_renderpass_info(struct threaded_context *tc)
254 {
255 return tc->renderpass_info_recording;
256 }
257
258 /* update metadata at draw time */
259 static void
tc_parse_draw(struct threaded_context * tc)260 tc_parse_draw(struct threaded_context *tc)
261 {
262 struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
263
264 if (info) {
265 /* all buffers that aren't cleared are considered loaded */
266 info->cbuf_load |= ~info->cbuf_clear;
267 if (!info->zsbuf_clear)
268 info->zsbuf_load = true;
269 /* previous invalidates are no longer relevant */
270 info->cbuf_invalidate = 0;
271 info->zsbuf_invalidate = false;
272 info->has_draw = true;
273 info->has_query_ends |= tc->query_ended;
274 }
275
276 tc->in_renderpass = true;
277 tc->seen_fb_state = true;
278 tc->query_ended = false;
279 }
280
281 static void *
to_call_check(void * ptr,unsigned num_slots)282 to_call_check(void *ptr, unsigned num_slots)
283 {
284 #if TC_DEBUG >= 1
285 struct tc_call_base *call = ptr;
286 tc_assert(call->num_slots == num_slots);
287 #endif
288 return ptr;
289 }
290 #define to_call(ptr, type) ((struct type *)to_call_check((void *)(ptr), call_size(type)))
291
292 #define size_to_slots(size) DIV_ROUND_UP(size, 8)
293 #define call_size(type) size_to_slots(sizeof(struct type))
294 #define call_size_with_slots(type, num_slots) size_to_slots( \
295 sizeof(struct type) + sizeof(((struct type*)NULL)->slot[0]) * (num_slots))
296 #define get_next_call(ptr, type) ((struct type*)((uint64_t*)ptr + call_size(type)))
297
298 ALWAYS_INLINE static void
tc_set_resource_batch_usage(struct threaded_context * tc,struct pipe_resource * pres)299 tc_set_resource_batch_usage(struct threaded_context *tc, struct pipe_resource *pres)
300 {
301 /* ignore batch usage when persistent */
302 if (threaded_resource(pres)->last_batch_usage != INT8_MAX)
303 threaded_resource(pres)->last_batch_usage = tc->next;
304 threaded_resource(pres)->batch_generation = tc->batch_generation;
305 }
306
307 ALWAYS_INLINE static void
tc_set_resource_batch_usage_persistent(struct threaded_context * tc,struct pipe_resource * pres,bool enable)308 tc_set_resource_batch_usage_persistent(struct threaded_context *tc, struct pipe_resource *pres, bool enable)
309 {
310 if (!pres)
311 return;
312 /* mark with special value to block any unsynchronized access */
313 threaded_resource(pres)->last_batch_usage = enable ? INT8_MAX : tc->next;
314 threaded_resource(pres)->batch_generation = tc->batch_generation;
315 }
316
317 /* this can ONLY be used to check against the currently recording batch */
318 ALWAYS_INLINE static bool
tc_resource_batch_usage_test_busy(const struct threaded_context * tc,const struct pipe_resource * pres)319 tc_resource_batch_usage_test_busy(const struct threaded_context *tc, const struct pipe_resource *pres)
320 {
321 const struct threaded_resource *tbuf = (const struct threaded_resource*)pres;
322
323 if (!tc->options.unsynchronized_texture_subdata)
324 return true;
325
326 /* resource has persistent access: assume always busy */
327 if (tbuf->last_batch_usage == INT8_MAX)
328 return true;
329
330 /* resource has never been seen */
331 if (tbuf->last_batch_usage == -1)
332 return false;
333
334 /* resource has been seen but no batches have executed */
335 if (tc->last_completed == -1)
336 return true;
337
338 /* begin comparisons checking number of times batches have cycled */
339 unsigned diff = tc->batch_generation - tbuf->batch_generation;
340 /* resource has been seen, batches have fully cycled at least once */
341 if (diff > 1)
342 return false;
343
344 /* resource has been seen in current batch cycle: return whether batch has definitely completed */
345 if (diff == 0)
346 return tc->last_completed >= tbuf->last_batch_usage;
347
348 /* resource has been seen within one batch cycle: check for batch wrapping */
349 if (tc->last_completed >= tbuf->last_batch_usage)
350 /* this or a subsequent pre-wrap batch was the last to definitely complete: resource is idle */
351 return false;
352
353 /* batch execution has not definitely wrapped: resource is definitely not idle */
354 if (tc->last_completed > tc->next)
355 return true;
356
357 /* resource was seen pre-wrap, batch execution has definitely wrapped: idle */
358 if (tbuf->last_batch_usage > tc->last_completed)
359 return false;
360
361 /* tc->last_completed is not an exact measurement, so anything else is considered busy */
362 return true;
363 }
364
365 /* Assign src to dst while dst is uninitialized. */
366 static inline void
tc_set_resource_reference(struct pipe_resource ** dst,struct pipe_resource * src)367 tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
368 {
369 *dst = src;
370 pipe_reference(NULL, &src->reference); /* only increment refcount */
371 }
372
373 /* Assign src to dst while dst is uninitialized. */
374 static inline void
tc_set_vertex_state_reference(struct pipe_vertex_state ** dst,struct pipe_vertex_state * src)375 tc_set_vertex_state_reference(struct pipe_vertex_state **dst,
376 struct pipe_vertex_state *src)
377 {
378 *dst = src;
379 pipe_reference(NULL, &src->reference); /* only increment refcount */
380 }
381
382 /* Unreference dst but don't touch the dst pointer. */
383 static inline void
tc_drop_resource_reference(struct pipe_resource * dst)384 tc_drop_resource_reference(struct pipe_resource *dst)
385 {
386 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
387 pipe_resource_destroy(dst);
388 }
389
390 /* Unreference dst but don't touch the dst pointer. */
391 static inline void
tc_drop_surface_reference(struct pipe_surface * dst)392 tc_drop_surface_reference(struct pipe_surface *dst)
393 {
394 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
395 dst->context->surface_destroy(dst->context, dst);
396 }
397
398 /* Unreference dst but don't touch the dst pointer. */
399 static inline void
tc_drop_so_target_reference(struct pipe_stream_output_target * dst)400 tc_drop_so_target_reference(struct pipe_stream_output_target *dst)
401 {
402 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
403 dst->context->stream_output_target_destroy(dst->context, dst);
404 }
405
406 /**
407 * Subtract the given number of references.
408 */
409 static inline void
tc_drop_vertex_state_references(struct pipe_vertex_state * dst,int num_refs)410 tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs)
411 {
412 int count = p_atomic_add_return(&dst->reference.count, -num_refs);
413
414 assert(count >= 0);
415 /* Underflows shouldn't happen, but let's be safe. */
416 if (count <= 0)
417 dst->screen->vertex_state_destroy(dst->screen, dst);
418 }
419
420 /* We don't want to read or write min_index and max_index, because
421 * it shouldn't be needed by drivers at this point.
422 */
423 #define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
424 offsetof(struct pipe_draw_info, min_index)
425
426 ALWAYS_INLINE static struct tc_renderpass_info *
incr_rp_info(struct tc_renderpass_info * tc_info)427 incr_rp_info(struct tc_renderpass_info *tc_info)
428 {
429 struct tc_batch_rp_info *info = tc_batch_rp_info(tc_info);
430 return &info[1].info;
431 }
432
433 ALWAYS_INLINE static void
batch_execute(struct tc_batch * batch,struct pipe_context * pipe,uint64_t * last,bool parsing)434 batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last, bool parsing)
435 {
436 /* if the framebuffer state is persisting from a previous batch,
437 * begin incrementing renderpass info on the first set_framebuffer_state call
438 */
439 bool first = !batch->first_set_fb;
440 const tc_execute *execute_func = batch->tc->execute_func;
441
442 for (uint64_t *iter = batch->slots; iter != last;) {
443 struct tc_call_base *call = (struct tc_call_base *)iter;
444
445 tc_assert(call->sentinel == TC_SENTINEL);
446
447 #if TC_DEBUG >= 3
448 tc_printf("CALL: %s", tc_call_names[call->call_id]);
449 #endif
450
451 TC_TRACE_SCOPE(call->call_id);
452
453 iter += execute_func[call->call_id](pipe, call);
454
455 if (parsing) {
456 if (call->call_id == TC_CALL_flush) {
457 /* always increment renderpass info for non-deferred flushes */
458 batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info);
459 /* if a flush happens, renderpass info is always incremented after */
460 first = false;
461 } else if (call->call_id == TC_CALL_set_framebuffer_state) {
462 /* the renderpass info pointer is already set at the start of the batch,
463 * so don't increment on the first set_framebuffer_state call
464 */
465 if (!first)
466 batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info);
467 first = false;
468 } else if (call->call_id >= TC_CALL_draw_single &&
469 call->call_id <= TC_CALL_draw_vstate_multi) {
470 /* if a draw happens before a set_framebuffer_state on this batch,
471 * begin incrementing renderpass data
472 */
473 first = false;
474 }
475 }
476 }
477 }
478
479 static void
tc_batch_execute(void * job,UNUSED void * gdata,int thread_index)480 tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
481 {
482 struct tc_batch *batch = job;
483 struct pipe_context *pipe = batch->tc->pipe;
484 uint64_t *last = &batch->slots[batch->num_total_slots];
485
486 tc_batch_check(batch);
487 tc_set_driver_thread(batch->tc);
488
489 assert(!batch->token);
490
491 /* setup renderpass info */
492 batch->tc->renderpass_info = batch->renderpass_infos.data;
493
494 if (batch->tc->options.parse_renderpass_info) {
495 batch_execute(batch, pipe, last, true);
496
497 struct tc_batch_rp_info *info = batch->renderpass_infos.data;
498 for (unsigned i = 0; i < batch->max_renderpass_info_idx + 1; i++) {
499 if (info[i].next)
500 info[i].next->prev = NULL;
501 info[i].next = NULL;
502 }
503 } else {
504 batch_execute(batch, pipe, last, false);
505 }
506
507 /* Add the fence to the list of fences for the driver to signal at the next
508 * flush, which we use for tracking which buffers are referenced by
509 * an unflushed command buffer.
510 */
511 struct threaded_context *tc = batch->tc;
512 struct util_queue_fence *fence =
513 &tc->buffer_lists[batch->buffer_list_index].driver_flushed_fence;
514
515 if (tc->options.driver_calls_flush_notify) {
516 tc->signal_fences_next_flush[tc->num_signal_fences_next_flush++] = fence;
517
518 /* Since our buffer lists are chained as a ring, we need to flush
519 * the context twice as we go around the ring to make the driver signal
520 * the buffer list fences, so that the producer thread can reuse the buffer
521 * list structures for the next batches without waiting.
522 */
523 unsigned half_ring = TC_MAX_BUFFER_LISTS / 2;
524 if (batch->buffer_list_index % half_ring == half_ring - 1)
525 pipe->flush(pipe, NULL, PIPE_FLUSH_ASYNC);
526 } else {
527 util_queue_fence_signal(fence);
528 }
529
530 tc_clear_driver_thread(batch->tc);
531 tc_batch_check(batch);
532 batch->num_total_slots = 0;
533 batch->last_mergeable_call = NULL;
534 batch->first_set_fb = false;
535 batch->max_renderpass_info_idx = 0;
536 batch->tc->last_completed = batch->batch_idx;
537 }
538
539 static void
tc_begin_next_buffer_list(struct threaded_context * tc)540 tc_begin_next_buffer_list(struct threaded_context *tc)
541 {
542 tc->next_buf_list = (tc->next_buf_list + 1) % TC_MAX_BUFFER_LISTS;
543
544 tc->batch_slots[tc->next].buffer_list_index = tc->next_buf_list;
545
546 /* Clear the buffer list in the new empty batch. */
547 struct tc_buffer_list *buf_list = &tc->buffer_lists[tc->next_buf_list];
548 assert(util_queue_fence_is_signalled(&buf_list->driver_flushed_fence));
549 util_queue_fence_reset(&buf_list->driver_flushed_fence); /* set to unsignalled */
550 BITSET_ZERO(buf_list->buffer_list);
551
552 tc->add_all_gfx_bindings_to_buffer_list = true;
553 tc->add_all_compute_bindings_to_buffer_list = true;
554 }
555
556 static void
tc_add_call_end(struct tc_batch * next)557 tc_add_call_end(struct tc_batch *next)
558 {
559 /* Add a dummy last call that won't be executed, but will indicate the end
560 * of the batch. It's for calls that always look at the next call and this
561 * stops them looking farther ahead.
562 */
563 assert(next->num_total_slots < TC_SLOTS_PER_BATCH);
564 struct tc_call_base *call =
565 (struct tc_call_base*)&next->slots[next->num_total_slots];
566 call->call_id = TC_NUM_CALLS;
567 call->num_slots = 1;
568 }
569
570 static void
tc_batch_flush(struct threaded_context * tc,bool full_copy)571 tc_batch_flush(struct threaded_context *tc, bool full_copy)
572 {
573 struct tc_batch *next = &tc->batch_slots[tc->next];
574 unsigned next_id = (tc->next + 1) % TC_MAX_BATCHES;
575
576 tc_assert(next->num_total_slots != 0);
577 tc_add_call_end(next);
578
579 tc_batch_check(next);
580 tc_debug_check(tc);
581 tc->bytes_mapped_estimate = 0;
582 tc->bytes_replaced_estimate = 0;
583 p_atomic_add(&tc->num_offloaded_slots, next->num_total_slots);
584
585 if (next->token) {
586 next->token->tc = NULL;
587 tc_unflushed_batch_token_reference(&next->token, NULL);
588 }
589 /* reset renderpass info index for subsequent use */
590 next->renderpass_info_idx = -1;
591
592 /* always increment renderpass info on batch flush;
593 * renderpass info can only be accessed by its owner batch during execution
594 */
595 if (tc->renderpass_info_recording) {
596 tc->batch_slots[next_id].first_set_fb = full_copy;
597 tc_batch_increment_renderpass_info(tc, next_id, full_copy);
598 }
599
600 util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
601 NULL, 0);
602 tc->last = tc->next;
603 tc->next = next_id;
604 if (next_id == 0)
605 tc->batch_generation++;
606 tc_begin_next_buffer_list(tc);
607
608 }
609
610 /* This is the function that adds variable-sized calls into the current
611 * batch. It also flushes the batch if there is not enough space there.
612 * All other higher-level "add" functions use it.
613 */
614 static void *
tc_add_sized_call(struct threaded_context * tc,enum tc_call_id id,unsigned num_slots)615 tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
616 unsigned num_slots)
617 {
618 TC_TRACE_SCOPE(id);
619 struct tc_batch *next = &tc->batch_slots[tc->next];
620 assert(num_slots <= TC_SLOTS_PER_BATCH - 1);
621 tc_debug_check(tc);
622
623 if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH - 1)) {
624 /* copy existing renderpass info during flush */
625 tc_batch_flush(tc, true);
626 next = &tc->batch_slots[tc->next];
627 tc_assert(next->num_total_slots == 0);
628 tc_assert(next->last_mergeable_call == NULL);
629 }
630
631 tc_assert(util_queue_fence_is_signalled(&next->fence));
632
633 struct tc_call_base *call = (struct tc_call_base*)&next->slots[next->num_total_slots];
634 next->num_total_slots += num_slots;
635
636 #if !defined(NDEBUG) && TC_DEBUG >= 1
637 call->sentinel = TC_SENTINEL;
638 #endif
639 call->call_id = id;
640 call->num_slots = num_slots;
641
642 #if TC_DEBUG >= 3
643 tc_printf("ENQUEUE: %s", tc_call_names[id]);
644 #endif
645
646 tc_debug_check(tc);
647 return call;
648 }
649
650 #define tc_add_call(tc, execute, type) \
651 ((struct type*)tc_add_sized_call(tc, execute, call_size(type)))
652
653 #define tc_add_slot_based_call(tc, execute, type, num_slots) \
654 ((struct type*)tc_add_sized_call(tc, execute, \
655 call_size_with_slots(type, num_slots)))
656
657 /* Returns the last mergeable call that was added to the unflushed
658 * batch, or NULL if the address of that call is not currently known
659 * or no such call exists in the unflushed batch.
660 */
661 static struct tc_call_base *
tc_get_last_mergeable_call(struct threaded_context * tc)662 tc_get_last_mergeable_call(struct threaded_context *tc)
663 {
664 struct tc_batch *batch = &tc->batch_slots[tc->next];
665 struct tc_call_base *call = batch->last_mergeable_call;
666
667 tc_assert(call == NULL || call->num_slots <= batch->num_total_slots);
668
669 if (call && (uint64_t *)call == &batch->slots[batch->num_total_slots - call->num_slots])
670 return call;
671 else
672 return NULL;
673 }
674
675 /* Increases the size of the last call in the unflushed batch to the
676 * given number of slots, if possible, without changing the call's data.
677 */
678 static bool
tc_enlarge_last_mergeable_call(struct threaded_context * tc,unsigned desired_num_slots)679 tc_enlarge_last_mergeable_call(struct threaded_context *tc, unsigned desired_num_slots)
680 {
681 struct tc_batch *batch = &tc->batch_slots[tc->next];
682 struct tc_call_base *call = tc_get_last_mergeable_call(tc);
683
684 tc_assert(call);
685 tc_assert(desired_num_slots >= call->num_slots);
686
687 unsigned added_slots = desired_num_slots - call->num_slots;
688
689 if (unlikely(batch->num_total_slots + added_slots > TC_SLOTS_PER_BATCH - 1))
690 return false;
691
692 batch->num_total_slots += added_slots;
693 call->num_slots += added_slots;
694
695 return true;
696 }
697
698 static void
tc_mark_call_mergeable(struct threaded_context * tc,struct tc_call_base * call)699 tc_mark_call_mergeable(struct threaded_context *tc, struct tc_call_base *call)
700 {
701 struct tc_batch *batch = &tc->batch_slots[tc->next];
702 tc_assert(call->num_slots <= batch->num_total_slots);
703 tc_assert((uint64_t *)call == &batch->slots[batch->num_total_slots - call->num_slots]);
704 batch->last_mergeable_call = call;
705 }
706
707 static bool
tc_is_sync(struct threaded_context * tc)708 tc_is_sync(struct threaded_context *tc)
709 {
710 struct tc_batch *last = &tc->batch_slots[tc->last];
711 struct tc_batch *next = &tc->batch_slots[tc->next];
712
713 return util_queue_fence_is_signalled(&last->fence) &&
714 !next->num_total_slots;
715 }
716
717 static void
_tc_sync(struct threaded_context * tc,UNUSED const char * info,UNUSED const char * func)718 _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char *func)
719 {
720 struct tc_batch *last = &tc->batch_slots[tc->last];
721 struct tc_batch *next = &tc->batch_slots[tc->next];
722 bool synced = false;
723
724 MESA_TRACE_SCOPE(func);
725
726 tc_debug_check(tc);
727
728 if (tc->options.parse_renderpass_info && tc->in_renderpass && !tc->flushing) {
729 /* corner case: if tc syncs for any reason but a driver flush during a renderpass,
730 * then the current renderpass info MUST be signaled to avoid deadlocking the driver
731 *
732 * this is not a "complete" signal operation, however, as it's unknown what calls may
733 * come after this one, which means that framebuffer attachment data is unreliable
734 *
735 * to avoid erroneously passing bad state to the driver (e.g., allowing zsbuf elimination),
736 * force all attachments active and assume the app was going to get bad perf here anyway
737 */
738 tc_sanitize_renderpass_info(tc);
739 }
740 tc_signal_renderpass_info_ready(tc);
741
742 /* Only wait for queued calls... */
743 if (!util_queue_fence_is_signalled(&last->fence)) {
744 util_queue_fence_wait(&last->fence);
745 synced = true;
746 }
747
748 tc_debug_check(tc);
749
750 if (next->token) {
751 next->token->tc = NULL;
752 tc_unflushed_batch_token_reference(&next->token, NULL);
753 }
754
755 /* .. and execute unflushed calls directly. */
756 if (next->num_total_slots) {
757 p_atomic_add(&tc->num_direct_slots, next->num_total_slots);
758 tc->bytes_mapped_estimate = 0;
759 tc->bytes_replaced_estimate = 0;
760 tc_add_call_end(next);
761 tc_batch_execute(next, NULL, 0);
762 tc_begin_next_buffer_list(tc);
763 synced = true;
764 }
765
766 if (synced) {
767 p_atomic_inc(&tc->num_syncs);
768
769 if (tc_strcmp(func, "tc_destroy") != 0) {
770 tc_printf("sync %s %s", func, info);
771 }
772 }
773
774 tc_debug_check(tc);
775
776 if (tc->options.parse_renderpass_info) {
777 int renderpass_info_idx = next->renderpass_info_idx;
778 if (renderpass_info_idx > 0) {
779 /* don't reset if fb state is unflushed */
780 bool fb_no_draw = tc->seen_fb_state && !tc->renderpass_info_recording->has_draw;
781 uint32_t fb_info = tc->renderpass_info_recording->data32[0];
782 next->renderpass_info_idx = -1;
783 tc_batch_increment_renderpass_info(tc, tc->next, false);
784 if (fb_no_draw)
785 tc->renderpass_info_recording->data32[0] = fb_info;
786 } else if (tc->renderpass_info_recording->has_draw) {
787 tc->renderpass_info_recording->data32[0] = 0;
788 }
789 tc->seen_fb_state = false;
790 tc->query_ended = false;
791 }
792 }
793
794 #define tc_sync(tc) _tc_sync(tc, "", __func__)
795 #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
796
797 /**
798 * Call this from fence_finish for same-context fence waits of deferred fences
799 * that haven't been flushed yet.
800 *
801 * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
802 * i.e., the wrapped one.
803 */
804 void
threaded_context_flush(struct pipe_context * _pipe,struct tc_unflushed_batch_token * token,bool prefer_async)805 threaded_context_flush(struct pipe_context *_pipe,
806 struct tc_unflushed_batch_token *token,
807 bool prefer_async)
808 {
809 struct threaded_context *tc = threaded_context(_pipe);
810
811 /* This is called from the gallium frontend / application thread. */
812 if (token->tc && token->tc == tc) {
813 struct tc_batch *last = &tc->batch_slots[tc->last];
814
815 /* Prefer to do the flush in the driver thread if it is already
816 * running. That should be better for cache locality.
817 */
818 if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
819 tc_batch_flush(tc, false);
820 else
821 tc_sync(token->tc);
822 }
823 }
824
825 static void
tc_add_to_buffer_list(struct tc_buffer_list * next,struct pipe_resource * buf)826 tc_add_to_buffer_list(struct tc_buffer_list *next, struct pipe_resource *buf)
827 {
828 uint32_t id = threaded_resource(buf)->buffer_id_unique;
829 BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
830 }
831
832 /* Reset a range of buffer binding slots. */
833 static void
tc_unbind_buffers(uint32_t * binding,unsigned count)834 tc_unbind_buffers(uint32_t *binding, unsigned count)
835 {
836 if (count)
837 memset(binding, 0, sizeof(*binding) * count);
838 }
839
840 static void
tc_add_bindings_to_buffer_list(BITSET_WORD * buffer_list,const uint32_t * bindings,unsigned count)841 tc_add_bindings_to_buffer_list(BITSET_WORD *buffer_list, const uint32_t *bindings,
842 unsigned count)
843 {
844 for (unsigned i = 0; i < count; i++) {
845 if (bindings[i])
846 BITSET_SET(buffer_list, bindings[i] & TC_BUFFER_ID_MASK);
847 }
848 }
849
850 static bool
tc_rebind_bindings(uint32_t old_id,uint32_t new_id,uint32_t * bindings,unsigned count)851 tc_rebind_bindings(uint32_t old_id, uint32_t new_id, uint32_t *bindings,
852 unsigned count)
853 {
854 unsigned rebind_count = 0;
855
856 for (unsigned i = 0; i < count; i++) {
857 if (bindings[i] == old_id) {
858 bindings[i] = new_id;
859 rebind_count++;
860 }
861 }
862 return rebind_count;
863 }
864
865 static void
tc_add_shader_bindings_to_buffer_list(struct threaded_context * tc,BITSET_WORD * buffer_list,enum pipe_shader_type shader)866 tc_add_shader_bindings_to_buffer_list(struct threaded_context *tc,
867 BITSET_WORD *buffer_list,
868 enum pipe_shader_type shader)
869 {
870 tc_add_bindings_to_buffer_list(buffer_list, tc->const_buffers[shader],
871 tc->max_const_buffers);
872 if (tc->seen_shader_buffers[shader]) {
873 tc_add_bindings_to_buffer_list(buffer_list, tc->shader_buffers[shader],
874 tc->max_shader_buffers);
875 }
876 if (tc->seen_image_buffers[shader]) {
877 tc_add_bindings_to_buffer_list(buffer_list, tc->image_buffers[shader],
878 tc->max_images);
879 }
880 if (tc->seen_sampler_buffers[shader]) {
881 tc_add_bindings_to_buffer_list(buffer_list, tc->sampler_buffers[shader],
882 tc->max_samplers);
883 }
884 }
885
886 static unsigned
tc_rebind_shader_bindings(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,enum pipe_shader_type shader,uint32_t * rebind_mask)887 tc_rebind_shader_bindings(struct threaded_context *tc, uint32_t old_id,
888 uint32_t new_id, enum pipe_shader_type shader, uint32_t *rebind_mask)
889 {
890 unsigned ubo = 0, ssbo = 0, img = 0, sampler = 0;
891
892 ubo = tc_rebind_bindings(old_id, new_id, tc->const_buffers[shader],
893 tc->max_const_buffers);
894 if (ubo)
895 *rebind_mask |= BITFIELD_BIT(TC_BINDING_UBO_VS) << shader;
896 if (tc->seen_shader_buffers[shader]) {
897 ssbo = tc_rebind_bindings(old_id, new_id, tc->shader_buffers[shader],
898 tc->max_shader_buffers);
899 if (ssbo)
900 *rebind_mask |= BITFIELD_BIT(TC_BINDING_SSBO_VS) << shader;
901 }
902 if (tc->seen_image_buffers[shader]) {
903 img = tc_rebind_bindings(old_id, new_id, tc->image_buffers[shader],
904 tc->max_images);
905 if (img)
906 *rebind_mask |= BITFIELD_BIT(TC_BINDING_IMAGE_VS) << shader;
907 }
908 if (tc->seen_sampler_buffers[shader]) {
909 sampler = tc_rebind_bindings(old_id, new_id, tc->sampler_buffers[shader],
910 tc->max_samplers);
911 if (sampler)
912 *rebind_mask |= BITFIELD_BIT(TC_BINDING_SAMPLERVIEW_VS) << shader;
913 }
914 return ubo + ssbo + img + sampler;
915 }
916
917 /* Add all bound buffers used by VS/TCS/TES/GS/FS to the buffer list.
918 * This is called by the first draw call in a batch when we want to inherit
919 * all bindings set by the previous batch.
920 */
921 static void
tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context * tc)922 tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context *tc)
923 {
924 BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
925
926 tc_add_bindings_to_buffer_list(buffer_list, tc->vertex_buffers, tc->num_vertex_buffers);
927 if (tc->seen_streamout_buffers)
928 tc_add_bindings_to_buffer_list(buffer_list, tc->streamout_buffers, PIPE_MAX_SO_BUFFERS);
929
930 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_VERTEX);
931 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_FRAGMENT);
932
933 if (tc->seen_tcs)
934 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_CTRL);
935 if (tc->seen_tes)
936 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_EVAL);
937 if (tc->seen_gs)
938 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_GEOMETRY);
939
940 tc->add_all_gfx_bindings_to_buffer_list = false;
941 }
942
943 /* Add all bound buffers used by compute to the buffer list.
944 * This is called by the first compute call in a batch when we want to inherit
945 * all bindings set by the previous batch.
946 */
947 static void
tc_add_all_compute_bindings_to_buffer_list(struct threaded_context * tc)948 tc_add_all_compute_bindings_to_buffer_list(struct threaded_context *tc)
949 {
950 BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
951
952 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_COMPUTE);
953 tc->add_all_compute_bindings_to_buffer_list = false;
954 }
955
956 static unsigned
tc_rebind_buffer(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,uint32_t * rebind_mask)957 tc_rebind_buffer(struct threaded_context *tc, uint32_t old_id, uint32_t new_id, uint32_t *rebind_mask)
958 {
959 unsigned vbo = 0, so = 0;
960
961 vbo = tc_rebind_bindings(old_id, new_id, tc->vertex_buffers,
962 tc->num_vertex_buffers);
963 if (vbo)
964 *rebind_mask |= BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER);
965
966 if (tc->seen_streamout_buffers) {
967 so = tc_rebind_bindings(old_id, new_id, tc->streamout_buffers,
968 PIPE_MAX_SO_BUFFERS);
969 if (so)
970 *rebind_mask |= BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER);
971 }
972 unsigned rebound = vbo + so;
973
974 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_VERTEX, rebind_mask);
975 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_FRAGMENT, rebind_mask);
976
977 if (tc->seen_tcs)
978 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_CTRL, rebind_mask);
979 if (tc->seen_tes)
980 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_EVAL, rebind_mask);
981 if (tc->seen_gs)
982 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_GEOMETRY, rebind_mask);
983
984 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_COMPUTE, rebind_mask);
985
986 if (rebound)
987 BITSET_SET(tc->buffer_lists[tc->next_buf_list].buffer_list, new_id & TC_BUFFER_ID_MASK);
988 return rebound;
989 }
990
991 static bool
tc_is_buffer_bound_with_mask(uint32_t id,uint32_t * bindings,unsigned binding_mask)992 tc_is_buffer_bound_with_mask(uint32_t id, uint32_t *bindings, unsigned binding_mask)
993 {
994 while (binding_mask) {
995 if (bindings[u_bit_scan(&binding_mask)] == id)
996 return true;
997 }
998 return false;
999 }
1000
1001 static bool
tc_is_buffer_shader_bound_for_write(struct threaded_context * tc,uint32_t id,enum pipe_shader_type shader)1002 tc_is_buffer_shader_bound_for_write(struct threaded_context *tc, uint32_t id,
1003 enum pipe_shader_type shader)
1004 {
1005 if (tc->seen_shader_buffers[shader] &&
1006 tc_is_buffer_bound_with_mask(id, tc->shader_buffers[shader],
1007 tc->shader_buffers_writeable_mask[shader]))
1008 return true;
1009
1010 if (tc->seen_image_buffers[shader] &&
1011 tc_is_buffer_bound_with_mask(id, tc->image_buffers[shader],
1012 tc->image_buffers_writeable_mask[shader]))
1013 return true;
1014
1015 return false;
1016 }
1017
1018 static bool
tc_is_buffer_bound_for_write(struct threaded_context * tc,uint32_t id)1019 tc_is_buffer_bound_for_write(struct threaded_context *tc, uint32_t id)
1020 {
1021 if (tc->seen_streamout_buffers &&
1022 tc_is_buffer_bound_with_mask(id, tc->streamout_buffers,
1023 BITFIELD_MASK(PIPE_MAX_SO_BUFFERS)))
1024 return true;
1025
1026 if (tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_VERTEX) ||
1027 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_FRAGMENT) ||
1028 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_COMPUTE))
1029 return true;
1030
1031 if (tc->seen_tcs &&
1032 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_CTRL))
1033 return true;
1034
1035 if (tc->seen_tes &&
1036 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_EVAL))
1037 return true;
1038
1039 if (tc->seen_gs &&
1040 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_GEOMETRY))
1041 return true;
1042
1043 return false;
1044 }
1045
1046 static bool
tc_is_buffer_busy(struct threaded_context * tc,struct threaded_resource * tbuf,unsigned map_usage)1047 tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf,
1048 unsigned map_usage)
1049 {
1050 if (!tc->options.is_resource_busy)
1051 return true;
1052
1053 uint32_t id_hash = tbuf->buffer_id_unique & TC_BUFFER_ID_MASK;
1054
1055 for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
1056 struct tc_buffer_list *buf_list = &tc->buffer_lists[i];
1057
1058 /* If the buffer is referenced by a batch that hasn't been flushed (by tc or the driver),
1059 * then the buffer is considered busy. */
1060 if (!util_queue_fence_is_signalled(&buf_list->driver_flushed_fence) &&
1061 BITSET_TEST(buf_list->buffer_list, id_hash))
1062 return true;
1063 }
1064
1065 /* The buffer isn't referenced by any unflushed batch: we can safely ask to the driver whether
1066 * this buffer is busy or not. */
1067 return tc->options.is_resource_busy(tc->pipe->screen, tbuf->latest, map_usage);
1068 }
1069
1070 /**
1071 * allow_cpu_storage should be false for user memory and imported buffers.
1072 */
1073 void
threaded_resource_init(struct pipe_resource * res,bool allow_cpu_storage)1074 threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage)
1075 {
1076 struct threaded_resource *tres = threaded_resource(res);
1077
1078 tres->latest = &tres->b;
1079 tres->cpu_storage = NULL;
1080 util_range_init(&tres->valid_buffer_range);
1081 tres->is_shared = false;
1082 tres->is_user_ptr = false;
1083 tres->buffer_id_unique = 0;
1084 tres->pending_staging_uploads = 0;
1085 tres->last_batch_usage = -1;
1086 util_range_init(&tres->pending_staging_uploads_range);
1087
1088 if (allow_cpu_storage &&
1089 !(res->flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
1090 PIPE_RESOURCE_FLAG_SPARSE |
1091 PIPE_RESOURCE_FLAG_ENCRYPTED)) &&
1092 /* We need buffer invalidation and buffer busyness tracking for the CPU
1093 * storage, which aren't supported with pipe_vertex_state. */
1094 !(res->bind & PIPE_BIND_VERTEX_STATE))
1095 tres->allow_cpu_storage = true;
1096 else
1097 tres->allow_cpu_storage = false;
1098 }
1099
1100 void
threaded_resource_deinit(struct pipe_resource * res)1101 threaded_resource_deinit(struct pipe_resource *res)
1102 {
1103 struct threaded_resource *tres = threaded_resource(res);
1104
1105 if (tres->latest != &tres->b)
1106 pipe_resource_reference(&tres->latest, NULL);
1107 util_range_destroy(&tres->valid_buffer_range);
1108 util_range_destroy(&tres->pending_staging_uploads_range);
1109 align_free(tres->cpu_storage);
1110 }
1111
1112 struct pipe_context *
threaded_context_unwrap_sync(struct pipe_context * pipe)1113 threaded_context_unwrap_sync(struct pipe_context *pipe)
1114 {
1115 if (!pipe || !pipe->priv)
1116 return pipe;
1117
1118 tc_sync(threaded_context(pipe));
1119 return (struct pipe_context*)pipe->priv;
1120 }
1121
1122
1123 /********************************************************************
1124 * simple functions
1125 */
1126
1127 #define TC_FUNC1(func, qualifier, type, deref, addr, ...) \
1128 struct tc_call_##func { \
1129 struct tc_call_base base; \
1130 type state; \
1131 }; \
1132 \
1133 static uint16_t \
1134 tc_call_##func(struct pipe_context *pipe, void *call) \
1135 { \
1136 pipe->func(pipe, addr(to_call(call, tc_call_##func)->state)); \
1137 return call_size(tc_call_##func); \
1138 } \
1139 \
1140 static void \
1141 tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
1142 { \
1143 struct threaded_context *tc = threaded_context(_pipe); \
1144 struct tc_call_##func *p = (struct tc_call_##func*) \
1145 tc_add_call(tc, TC_CALL_##func, tc_call_##func); \
1146 p->state = deref(param); \
1147 __VA_ARGS__; \
1148 }
1149
1150 TC_FUNC1(set_active_query_state, , bool, , )
1151
1152 TC_FUNC1(set_blend_color, const, struct pipe_blend_color, *, &)
1153 TC_FUNC1(set_stencil_ref, const, struct pipe_stencil_ref, , )
1154 TC_FUNC1(set_clip_state, const, struct pipe_clip_state, *, &)
1155 TC_FUNC1(set_sample_mask, , unsigned, , )
1156 TC_FUNC1(set_min_samples, , unsigned, , )
1157 TC_FUNC1(set_polygon_stipple, const, struct pipe_poly_stipple, *, &)
1158
1159 TC_FUNC1(texture_barrier, , unsigned, , )
1160 TC_FUNC1(memory_barrier, , unsigned, , )
1161 TC_FUNC1(delete_texture_handle, , uint64_t, , )
1162 TC_FUNC1(delete_image_handle, , uint64_t, , )
1163 TC_FUNC1(set_frontend_noop, , bool, , )
1164
1165
1166 /********************************************************************
1167 * queries
1168 */
1169
1170 static struct pipe_query *
tc_create_query(struct pipe_context * _pipe,unsigned query_type,unsigned index)1171 tc_create_query(struct pipe_context *_pipe, unsigned query_type,
1172 unsigned index)
1173 {
1174 struct threaded_context *tc = threaded_context(_pipe);
1175 struct pipe_context *pipe = tc->pipe;
1176
1177 return pipe->create_query(pipe, query_type, index);
1178 }
1179
1180 static struct pipe_query *
tc_create_batch_query(struct pipe_context * _pipe,unsigned num_queries,unsigned * query_types)1181 tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
1182 unsigned *query_types)
1183 {
1184 struct threaded_context *tc = threaded_context(_pipe);
1185 struct pipe_context *pipe = tc->pipe;
1186
1187 return pipe->create_batch_query(pipe, num_queries, query_types);
1188 }
1189
1190 struct tc_query_call {
1191 struct tc_call_base base;
1192 struct pipe_query *query;
1193 };
1194
1195 static uint16_t
tc_call_destroy_query(struct pipe_context * pipe,void * call)1196 tc_call_destroy_query(struct pipe_context *pipe, void *call)
1197 {
1198 struct pipe_query *query = to_call(call, tc_query_call)->query;
1199 struct threaded_query *tq = threaded_query(query);
1200
1201 if (list_is_linked(&tq->head_unflushed))
1202 list_del(&tq->head_unflushed);
1203
1204 pipe->destroy_query(pipe, query);
1205 return call_size(tc_query_call);
1206 }
1207
1208 static void
tc_destroy_query(struct pipe_context * _pipe,struct pipe_query * query)1209 tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
1210 {
1211 struct threaded_context *tc = threaded_context(_pipe);
1212
1213 tc_add_call(tc, TC_CALL_destroy_query, tc_query_call)->query = query;
1214 }
1215
1216 static uint16_t
tc_call_begin_query(struct pipe_context * pipe,void * call)1217 tc_call_begin_query(struct pipe_context *pipe, void *call)
1218 {
1219 pipe->begin_query(pipe, to_call(call, tc_query_call)->query);
1220 return call_size(tc_query_call);
1221 }
1222
1223 static bool
tc_begin_query(struct pipe_context * _pipe,struct pipe_query * query)1224 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
1225 {
1226 struct threaded_context *tc = threaded_context(_pipe);
1227 tc->num_queries_active++;
1228
1229 tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query;
1230 return true; /* we don't care about the return value for this call */
1231 }
1232
1233 struct tc_end_query_call {
1234 struct tc_call_base base;
1235 struct threaded_context *tc;
1236 struct pipe_query *query;
1237 };
1238
1239 static uint16_t
tc_call_end_query(struct pipe_context * pipe,void * call)1240 tc_call_end_query(struct pipe_context *pipe, void *call)
1241 {
1242 struct tc_end_query_call *p = to_call(call, tc_end_query_call);
1243 struct threaded_query *tq = threaded_query(p->query);
1244
1245 if (!list_is_linked(&tq->head_unflushed))
1246 list_add(&tq->head_unflushed, &p->tc->unflushed_queries);
1247
1248 pipe->end_query(pipe, p->query);
1249 return call_size(tc_end_query_call);
1250 }
1251
1252 static bool
tc_end_query(struct pipe_context * _pipe,struct pipe_query * query)1253 tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
1254 {
1255 struct threaded_context *tc = threaded_context(_pipe);
1256 struct threaded_query *tq = threaded_query(query);
1257 struct tc_end_query_call *call =
1258 tc_add_call(tc, TC_CALL_end_query, tc_end_query_call);
1259 tc->num_queries_active--;
1260
1261 call->tc = tc;
1262 call->query = query;
1263
1264 tq->flushed = false;
1265 tc->query_ended = true;
1266
1267 return true; /* we don't care about the return value for this call */
1268 }
1269
1270 static bool
tc_get_query_result(struct pipe_context * _pipe,struct pipe_query * query,bool wait,union pipe_query_result * result)1271 tc_get_query_result(struct pipe_context *_pipe,
1272 struct pipe_query *query, bool wait,
1273 union pipe_query_result *result)
1274 {
1275 struct threaded_context *tc = threaded_context(_pipe);
1276 struct threaded_query *tq = threaded_query(query);
1277 struct pipe_context *pipe = tc->pipe;
1278 bool flushed = tq->flushed;
1279
1280 if (!flushed) {
1281 tc_sync_msg(tc, wait ? "wait" : "nowait");
1282 tc_set_driver_thread(tc);
1283 }
1284
1285 bool success = pipe->get_query_result(pipe, query, wait, result);
1286
1287 if (!flushed)
1288 tc_clear_driver_thread(tc);
1289
1290 if (success) {
1291 tq->flushed = true;
1292 if (list_is_linked(&tq->head_unflushed)) {
1293 /* This is safe because it can only happen after we sync'd. */
1294 list_del(&tq->head_unflushed);
1295 }
1296 }
1297 return success;
1298 }
1299
1300 struct tc_query_result_resource {
1301 struct tc_call_base base;
1302 enum pipe_query_flags flags:8;
1303 enum pipe_query_value_type result_type:8;
1304 int8_t index; /* it can be -1 */
1305 unsigned offset;
1306 struct pipe_query *query;
1307 struct pipe_resource *resource;
1308 };
1309
1310 static uint16_t
tc_call_get_query_result_resource(struct pipe_context * pipe,void * call)1311 tc_call_get_query_result_resource(struct pipe_context *pipe, void *call)
1312 {
1313 struct tc_query_result_resource *p = to_call(call, tc_query_result_resource);
1314
1315 pipe->get_query_result_resource(pipe, p->query, p->flags, p->result_type,
1316 p->index, p->resource, p->offset);
1317 tc_drop_resource_reference(p->resource);
1318 return call_size(tc_query_result_resource);
1319 }
1320
1321 static void
tc_get_query_result_resource(struct pipe_context * _pipe,struct pipe_query * query,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)1322 tc_get_query_result_resource(struct pipe_context *_pipe,
1323 struct pipe_query *query,
1324 enum pipe_query_flags flags,
1325 enum pipe_query_value_type result_type, int index,
1326 struct pipe_resource *resource, unsigned offset)
1327 {
1328 struct threaded_context *tc = threaded_context(_pipe);
1329
1330 tc_buffer_disable_cpu_storage(resource);
1331
1332 struct tc_query_result_resource *p =
1333 tc_add_call(tc, TC_CALL_get_query_result_resource,
1334 tc_query_result_resource);
1335 p->query = query;
1336 p->flags = flags;
1337 p->result_type = result_type;
1338 p->index = index;
1339 tc_set_resource_reference(&p->resource, resource);
1340 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
1341 p->offset = offset;
1342 }
1343
1344 struct tc_render_condition {
1345 struct tc_call_base base;
1346 bool condition;
1347 unsigned mode;
1348 struct pipe_query *query;
1349 };
1350
1351 static uint16_t
tc_call_render_condition(struct pipe_context * pipe,void * call)1352 tc_call_render_condition(struct pipe_context *pipe, void *call)
1353 {
1354 struct tc_render_condition *p = to_call(call, tc_render_condition);
1355 pipe->render_condition(pipe, p->query, p->condition, p->mode);
1356 return call_size(tc_render_condition);
1357 }
1358
1359 static void
tc_render_condition(struct pipe_context * _pipe,struct pipe_query * query,bool condition,enum pipe_render_cond_flag mode)1360 tc_render_condition(struct pipe_context *_pipe,
1361 struct pipe_query *query, bool condition,
1362 enum pipe_render_cond_flag mode)
1363 {
1364 struct threaded_context *tc = threaded_context(_pipe);
1365 struct tc_render_condition *p =
1366 tc_add_call(tc, TC_CALL_render_condition, tc_render_condition);
1367
1368 p->query = query;
1369 p->condition = condition;
1370 p->mode = mode;
1371 }
1372
1373
1374 /********************************************************************
1375 * constant (immutable) states
1376 */
1377
1378 #define TC_CSO_CREATE(name, sname) \
1379 static void * \
1380 tc_create_##name##_state(struct pipe_context *_pipe, \
1381 const struct pipe_##sname##_state *state) \
1382 { \
1383 struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
1384 return pipe->create_##name##_state(pipe, state); \
1385 }
1386
1387 #define TC_CSO_BIND(name, ...) TC_FUNC1(bind_##name##_state, , void *, , , ##__VA_ARGS__)
1388 #define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, , void *, , )
1389
1390 #define TC_CSO(name, sname, ...) \
1391 TC_CSO_CREATE(name, sname) \
1392 TC_CSO_BIND(name, ##__VA_ARGS__) \
1393 TC_CSO_DELETE(name)
1394
1395 #define TC_CSO_WHOLE(name) TC_CSO(name, name)
1396 #define TC_CSO_SHADER(name) TC_CSO(name, shader)
1397 #define TC_CSO_SHADER_TRACK(name) TC_CSO(name, shader, tc->seen_##name = true;)
1398
1399 TC_CSO_WHOLE(blend)
TC_CSO_WHOLE(rasterizer)1400 TC_CSO_WHOLE(rasterizer)
1401 TC_CSO_CREATE(depth_stencil_alpha, depth_stencil_alpha)
1402 TC_CSO_BIND(depth_stencil_alpha,
1403 if (param && tc->options.parse_renderpass_info) {
1404 /* dsa info is only ever added during a renderpass;
1405 * changes outside of a renderpass reset the data
1406 */
1407 if (!tc->in_renderpass) {
1408 tc_get_renderpass_info(tc)->zsbuf_write_dsa = 0;
1409 tc_get_renderpass_info(tc)->zsbuf_read_dsa = 0;
1410 }
1411 /* let the driver parse its own state */
1412 tc->options.dsa_parse(param, tc_get_renderpass_info(tc));
1413 }
1414 )
1415 TC_CSO_DELETE(depth_stencil_alpha)
1416 TC_CSO_WHOLE(compute)
1417 TC_CSO_CREATE(fs, shader)
1418 TC_CSO_BIND(fs,
1419 if (param && tc->options.parse_renderpass_info) {
1420 /* fs info is only ever added during a renderpass;
1421 * changes outside of a renderpass reset the data
1422 */
1423 if (!tc->in_renderpass) {
1424 tc_get_renderpass_info(tc)->cbuf_fbfetch = 0;
1425 tc_get_renderpass_info(tc)->zsbuf_write_fs = 0;
1426 }
1427 /* let the driver parse its own state */
1428 tc->options.fs_parse(param, tc_get_renderpass_info(tc));
1429 }
1430 )
1431 TC_CSO_DELETE(fs)
1432 TC_CSO_SHADER(vs)
1433 TC_CSO_SHADER_TRACK(gs)
1434 TC_CSO_SHADER_TRACK(tcs)
1435 TC_CSO_SHADER_TRACK(tes)
1436 TC_CSO_CREATE(sampler, sampler)
1437 TC_CSO_DELETE(sampler)
1438 TC_CSO_BIND(vertex_elements)
1439 TC_CSO_DELETE(vertex_elements)
1440
1441 static void *
1442 tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
1443 const struct pipe_vertex_element *elems)
1444 {
1445 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1446
1447 return pipe->create_vertex_elements_state(pipe, count, elems);
1448 }
1449
1450 struct tc_sampler_states {
1451 struct tc_call_base base;
1452 uint8_t shader, start, count;
1453 void *slot[0]; /* more will be allocated if needed */
1454 };
1455
1456 static uint16_t
tc_call_bind_sampler_states(struct pipe_context * pipe,void * call)1457 tc_call_bind_sampler_states(struct pipe_context *pipe, void *call)
1458 {
1459 struct tc_sampler_states *p = (struct tc_sampler_states *)call;
1460
1461 pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
1462 return p->base.num_slots;
1463 }
1464
1465 static void
tc_bind_sampler_states(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,void ** states)1466 tc_bind_sampler_states(struct pipe_context *_pipe,
1467 enum pipe_shader_type shader,
1468 unsigned start, unsigned count, void **states)
1469 {
1470 if (!count)
1471 return;
1472
1473 struct threaded_context *tc = threaded_context(_pipe);
1474 struct tc_sampler_states *p =
1475 tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
1476
1477 p->shader = shader;
1478 p->start = start;
1479 p->count = count;
1480 memcpy(p->slot, states, count * sizeof(states[0]));
1481 }
1482
1483 static void
tc_link_shader(struct pipe_context * _pipe,void ** shaders)1484 tc_link_shader(struct pipe_context *_pipe, void **shaders)
1485 {
1486 struct threaded_context *tc = threaded_context(_pipe);
1487 tc->pipe->link_shader(tc->pipe, shaders);
1488 }
1489 /********************************************************************
1490 * immediate states
1491 */
1492
1493 struct tc_framebuffer {
1494 struct tc_call_base base;
1495 struct pipe_framebuffer_state state;
1496 };
1497
1498 static uint16_t
tc_call_set_framebuffer_state(struct pipe_context * pipe,void * call)1499 tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call)
1500 {
1501 struct pipe_framebuffer_state *p = &to_call(call, tc_framebuffer)->state;
1502
1503 pipe->set_framebuffer_state(pipe, p);
1504
1505 unsigned nr_cbufs = p->nr_cbufs;
1506 for (unsigned i = 0; i < nr_cbufs; i++)
1507 tc_drop_surface_reference(p->cbufs[i]);
1508 tc_drop_surface_reference(p->zsbuf);
1509 tc_drop_resource_reference(p->resolve);
1510 return call_size(tc_framebuffer);
1511 }
1512
1513 static void
tc_set_framebuffer_state(struct pipe_context * _pipe,const struct pipe_framebuffer_state * fb)1514 tc_set_framebuffer_state(struct pipe_context *_pipe,
1515 const struct pipe_framebuffer_state *fb)
1516 {
1517 struct threaded_context *tc = threaded_context(_pipe);
1518 struct tc_framebuffer *p =
1519 tc_add_call(tc, TC_CALL_set_framebuffer_state, tc_framebuffer);
1520 unsigned nr_cbufs = fb->nr_cbufs;
1521
1522 p->state.width = fb->width;
1523 p->state.height = fb->height;
1524 p->state.samples = fb->samples;
1525 p->state.layers = fb->layers;
1526 p->state.nr_cbufs = nr_cbufs;
1527 p->state.viewmask = fb->viewmask;
1528
1529 /* when unbinding, mark attachments as used for the current batch */
1530 for (unsigned i = 0; i < tc->nr_cbufs; i++) {
1531 tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[i], false);
1532 pipe_resource_reference(&tc->fb_resources[i], NULL);
1533 }
1534 tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[PIPE_MAX_COLOR_BUFS], false);
1535 tc_set_resource_batch_usage_persistent(tc, tc->fb_resolve, false);
1536
1537 for (unsigned i = 0; i < nr_cbufs; i++) {
1538 p->state.cbufs[i] = NULL;
1539 pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
1540 /* full tracking requires storing the fb attachment resources */
1541 if (fb->cbufs[i])
1542 pipe_resource_reference(&tc->fb_resources[i], fb->cbufs[i]->texture);
1543 tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[i], true);
1544 }
1545 tc->nr_cbufs = nr_cbufs;
1546 if (tc->options.parse_renderpass_info) {
1547 /* ensure this is treated as the first fb set if no fb activity has occurred */
1548 if (!tc->renderpass_info_recording->has_draw &&
1549 !tc->renderpass_info_recording->cbuf_clear &&
1550 !tc->renderpass_info_recording->cbuf_load &&
1551 !tc->renderpass_info_recording->zsbuf_load &&
1552 !tc->renderpass_info_recording->zsbuf_clear_partial)
1553 tc->batch_slots[tc->next].first_set_fb = false;
1554 /* store existing zsbuf data for possible persistence */
1555 uint8_t zsbuf = tc->renderpass_info_recording->has_draw ?
1556 0 :
1557 tc->renderpass_info_recording->data8[3];
1558 bool zsbuf_changed = tc->fb_resources[PIPE_MAX_COLOR_BUFS] !=
1559 (fb->zsbuf ? fb->zsbuf->texture : NULL);
1560
1561 if (tc->seen_fb_state) {
1562 /* this is the end of a renderpass, so increment the renderpass info */
1563 tc_batch_increment_renderpass_info(tc, tc->next, false);
1564 /* if zsbuf hasn't changed (i.e., possibly just adding a color buffer):
1565 * keep zsbuf usage data
1566 */
1567 if (!zsbuf_changed)
1568 tc->renderpass_info_recording->data8[3] = zsbuf;
1569 } else {
1570 /* this is the first time a set_framebuffer_call is triggered;
1571 * just increment the index and keep using the existing info for recording
1572 */
1573 tc->batch_slots[tc->next].renderpass_info_idx = 0;
1574 }
1575 /* future fb state changes will increment the index */
1576 tc->seen_fb_state = true;
1577 }
1578 pipe_resource_reference(&tc->fb_resources[PIPE_MAX_COLOR_BUFS],
1579 fb->zsbuf ? fb->zsbuf->texture : NULL);
1580 pipe_resource_reference(&tc->fb_resolve, fb->resolve);
1581 tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[PIPE_MAX_COLOR_BUFS], true);
1582 tc_set_resource_batch_usage_persistent(tc, tc->fb_resolve, true);
1583 tc->in_renderpass = false;
1584 p->state.zsbuf = NULL;
1585 pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
1586 p->state.resolve = NULL;
1587 pipe_resource_reference(&p->state.resolve, fb->resolve);
1588 }
1589
1590 struct tc_tess_state {
1591 struct tc_call_base base;
1592 float state[6];
1593 };
1594
1595 static uint16_t
tc_call_set_tess_state(struct pipe_context * pipe,void * call)1596 tc_call_set_tess_state(struct pipe_context *pipe, void *call)
1597 {
1598 float *p = to_call(call, tc_tess_state)->state;
1599
1600 pipe->set_tess_state(pipe, p, p + 4);
1601 return call_size(tc_tess_state);
1602 }
1603
1604 static void
tc_set_tess_state(struct pipe_context * _pipe,const float default_outer_level[4],const float default_inner_level[2])1605 tc_set_tess_state(struct pipe_context *_pipe,
1606 const float default_outer_level[4],
1607 const float default_inner_level[2])
1608 {
1609 struct threaded_context *tc = threaded_context(_pipe);
1610 float *p = tc_add_call(tc, TC_CALL_set_tess_state, tc_tess_state)->state;
1611
1612 memcpy(p, default_outer_level, 4 * sizeof(float));
1613 memcpy(p + 4, default_inner_level, 2 * sizeof(float));
1614 }
1615
1616 struct tc_patch_vertices {
1617 struct tc_call_base base;
1618 uint8_t patch_vertices;
1619 };
1620
1621 static uint16_t
tc_call_set_patch_vertices(struct pipe_context * pipe,void * call)1622 tc_call_set_patch_vertices(struct pipe_context *pipe, void *call)
1623 {
1624 uint8_t patch_vertices = to_call(call, tc_patch_vertices)->patch_vertices;
1625
1626 pipe->set_patch_vertices(pipe, patch_vertices);
1627 return call_size(tc_patch_vertices);
1628 }
1629
1630 static void
tc_set_patch_vertices(struct pipe_context * _pipe,uint8_t patch_vertices)1631 tc_set_patch_vertices(struct pipe_context *_pipe, uint8_t patch_vertices)
1632 {
1633 struct threaded_context *tc = threaded_context(_pipe);
1634
1635 tc_add_call(tc, TC_CALL_set_patch_vertices,
1636 tc_patch_vertices)->patch_vertices = patch_vertices;
1637 }
1638
1639 struct tc_constant_buffer_base {
1640 struct tc_call_base base;
1641 uint8_t shader, index;
1642 bool is_null;
1643 };
1644
1645 struct tc_constant_buffer {
1646 struct tc_constant_buffer_base base;
1647 struct pipe_constant_buffer cb;
1648 };
1649
1650 static uint16_t
tc_call_set_constant_buffer(struct pipe_context * pipe,void * call)1651 tc_call_set_constant_buffer(struct pipe_context *pipe, void *call)
1652 {
1653 struct tc_constant_buffer *p = (struct tc_constant_buffer *)call;
1654
1655 if (unlikely(p->base.is_null)) {
1656 pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, false, NULL);
1657 return call_size(tc_constant_buffer_base);
1658 }
1659
1660 pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, true, &p->cb);
1661 return call_size(tc_constant_buffer);
1662 }
1663
1664 static void
tc_set_constant_buffer(struct pipe_context * _pipe,enum pipe_shader_type shader,uint index,bool take_ownership,const struct pipe_constant_buffer * cb)1665 tc_set_constant_buffer(struct pipe_context *_pipe,
1666 enum pipe_shader_type shader, uint index,
1667 bool take_ownership,
1668 const struct pipe_constant_buffer *cb)
1669 {
1670 struct threaded_context *tc = threaded_context(_pipe);
1671
1672 if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) {
1673 struct tc_constant_buffer_base *p =
1674 tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer_base);
1675 p->shader = shader;
1676 p->index = index;
1677 p->is_null = true;
1678 tc_unbind_buffer(&tc->const_buffers[shader][index]);
1679 return;
1680 }
1681
1682 struct pipe_resource *buffer;
1683 unsigned offset;
1684
1685 if (cb->user_buffer) {
1686 /* This must be done before adding set_constant_buffer, because it could
1687 * generate e.g. transfer_unmap and flush partially-uninitialized
1688 * set_constant_buffer to the driver if it was done afterwards.
1689 */
1690 buffer = NULL;
1691 u_upload_data(tc->base.const_uploader, 0, cb->buffer_size,
1692 tc->ubo_alignment, cb->user_buffer, &offset, &buffer);
1693 u_upload_unmap(tc->base.const_uploader);
1694 take_ownership = true;
1695 } else {
1696 buffer = cb->buffer;
1697 offset = cb->buffer_offset;
1698 }
1699
1700 struct tc_constant_buffer *p =
1701 tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer);
1702 p->base.shader = shader;
1703 p->base.index = index;
1704 p->base.is_null = false;
1705 p->cb.user_buffer = NULL;
1706 p->cb.buffer_offset = offset;
1707 p->cb.buffer_size = cb->buffer_size;
1708
1709 if (take_ownership)
1710 p->cb.buffer = buffer;
1711 else
1712 tc_set_resource_reference(&p->cb.buffer, buffer);
1713
1714 if (buffer) {
1715 tc_bind_buffer(&tc->const_buffers[shader][index],
1716 &tc->buffer_lists[tc->next_buf_list], buffer);
1717 } else {
1718 tc_unbind_buffer(&tc->const_buffers[shader][index]);
1719 }
1720 }
1721
1722 struct tc_inlinable_constants {
1723 struct tc_call_base base;
1724 uint8_t shader;
1725 uint8_t num_values;
1726 uint32_t values[MAX_INLINABLE_UNIFORMS];
1727 };
1728
1729 static uint16_t
tc_call_set_inlinable_constants(struct pipe_context * pipe,void * call)1730 tc_call_set_inlinable_constants(struct pipe_context *pipe, void *call)
1731 {
1732 struct tc_inlinable_constants *p = to_call(call, tc_inlinable_constants);
1733
1734 pipe->set_inlinable_constants(pipe, p->shader, p->num_values, p->values);
1735 return call_size(tc_inlinable_constants);
1736 }
1737
1738 static void
tc_set_inlinable_constants(struct pipe_context * _pipe,enum pipe_shader_type shader,uint num_values,uint32_t * values)1739 tc_set_inlinable_constants(struct pipe_context *_pipe,
1740 enum pipe_shader_type shader,
1741 uint num_values, uint32_t *values)
1742 {
1743 struct threaded_context *tc = threaded_context(_pipe);
1744 struct tc_inlinable_constants *p =
1745 tc_add_call(tc, TC_CALL_set_inlinable_constants, tc_inlinable_constants);
1746 p->shader = shader;
1747 p->num_values = num_values;
1748 memcpy(p->values, values, num_values * 4);
1749 }
1750
1751 struct tc_sample_locations {
1752 struct tc_call_base base;
1753 uint16_t size;
1754 uint8_t slot[0];
1755 };
1756
1757
1758 static uint16_t
tc_call_set_sample_locations(struct pipe_context * pipe,void * call)1759 tc_call_set_sample_locations(struct pipe_context *pipe, void *call)
1760 {
1761 struct tc_sample_locations *p = (struct tc_sample_locations *)call;
1762
1763 pipe->set_sample_locations(pipe, p->size, p->slot);
1764 return p->base.num_slots;
1765 }
1766
1767 static void
tc_set_sample_locations(struct pipe_context * _pipe,size_t size,const uint8_t * locations)1768 tc_set_sample_locations(struct pipe_context *_pipe, size_t size, const uint8_t *locations)
1769 {
1770 struct threaded_context *tc = threaded_context(_pipe);
1771 struct tc_sample_locations *p =
1772 tc_add_slot_based_call(tc, TC_CALL_set_sample_locations,
1773 tc_sample_locations, size);
1774
1775 p->size = size;
1776 memcpy(p->slot, locations, size);
1777 }
1778
1779 struct tc_scissors {
1780 struct tc_call_base base;
1781 uint8_t start, count;
1782 struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1783 };
1784
1785 static uint16_t
tc_call_set_scissor_states(struct pipe_context * pipe,void * call)1786 tc_call_set_scissor_states(struct pipe_context *pipe, void *call)
1787 {
1788 struct tc_scissors *p = (struct tc_scissors *)call;
1789
1790 pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
1791 return p->base.num_slots;
1792 }
1793
1794 static void
tc_set_scissor_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_scissor_state * states)1795 tc_set_scissor_states(struct pipe_context *_pipe,
1796 unsigned start, unsigned count,
1797 const struct pipe_scissor_state *states)
1798 {
1799 struct threaded_context *tc = threaded_context(_pipe);
1800 struct tc_scissors *p =
1801 tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
1802
1803 p->start = start;
1804 p->count = count;
1805 memcpy(&p->slot, states, count * sizeof(states[0]));
1806 }
1807
1808 struct tc_viewports {
1809 struct tc_call_base base;
1810 uint8_t start, count;
1811 struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
1812 };
1813
1814 static uint16_t
tc_call_set_viewport_states(struct pipe_context * pipe,void * call)1815 tc_call_set_viewport_states(struct pipe_context *pipe, void *call)
1816 {
1817 struct tc_viewports *p = (struct tc_viewports *)call;
1818
1819 pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
1820 return p->base.num_slots;
1821 }
1822
1823 static void
tc_set_viewport_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_viewport_state * states)1824 tc_set_viewport_states(struct pipe_context *_pipe,
1825 unsigned start, unsigned count,
1826 const struct pipe_viewport_state *states)
1827 {
1828 if (!count)
1829 return;
1830
1831 struct threaded_context *tc = threaded_context(_pipe);
1832 struct tc_viewports *p =
1833 tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
1834
1835 p->start = start;
1836 p->count = count;
1837 memcpy(&p->slot, states, count * sizeof(states[0]));
1838 }
1839
1840 struct tc_window_rects {
1841 struct tc_call_base base;
1842 bool include;
1843 uint8_t count;
1844 struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1845 };
1846
1847 static uint16_t
tc_call_set_window_rectangles(struct pipe_context * pipe,void * call)1848 tc_call_set_window_rectangles(struct pipe_context *pipe, void *call)
1849 {
1850 struct tc_window_rects *p = (struct tc_window_rects *)call;
1851
1852 pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
1853 return p->base.num_slots;
1854 }
1855
1856 static void
tc_set_window_rectangles(struct pipe_context * _pipe,bool include,unsigned count,const struct pipe_scissor_state * rects)1857 tc_set_window_rectangles(struct pipe_context *_pipe, bool include,
1858 unsigned count,
1859 const struct pipe_scissor_state *rects)
1860 {
1861 struct threaded_context *tc = threaded_context(_pipe);
1862 struct tc_window_rects *p =
1863 tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
1864
1865 p->include = include;
1866 p->count = count;
1867 memcpy(p->slot, rects, count * sizeof(rects[0]));
1868 }
1869
1870 struct tc_sampler_views {
1871 struct tc_call_base base;
1872 uint8_t shader, start, count, unbind_num_trailing_slots;
1873 struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
1874 };
1875
1876 static uint16_t
tc_call_set_sampler_views(struct pipe_context * pipe,void * call)1877 tc_call_set_sampler_views(struct pipe_context *pipe, void *call)
1878 {
1879 struct tc_sampler_views *p = (struct tc_sampler_views *)call;
1880
1881 pipe->set_sampler_views(pipe, p->shader, p->start, p->count,
1882 p->unbind_num_trailing_slots, true, p->slot);
1883 return p->base.num_slots;
1884 }
1885
1886 static void
tc_set_sampler_views(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,struct pipe_sampler_view ** views)1887 tc_set_sampler_views(struct pipe_context *_pipe,
1888 enum pipe_shader_type shader,
1889 unsigned start, unsigned count,
1890 unsigned unbind_num_trailing_slots, bool take_ownership,
1891 struct pipe_sampler_view **views)
1892 {
1893 if (!count && !unbind_num_trailing_slots)
1894 return;
1895
1896 struct threaded_context *tc = threaded_context(_pipe);
1897 struct tc_sampler_views *p =
1898 tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views,
1899 views ? count : 0);
1900
1901 p->shader = shader;
1902 p->start = start;
1903
1904 if (views) {
1905 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1906
1907 p->count = count;
1908 p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1909
1910 if (take_ownership) {
1911 memcpy(p->slot, views, sizeof(*views) * count);
1912
1913 for (unsigned i = 0; i < count; i++) {
1914 if (views[i]) {
1915 if (views[i]->target == PIPE_BUFFER)
1916 tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1917 views[i]->texture);
1918 else
1919 tc_set_resource_batch_usage(tc, views[i]->texture);
1920 } else {
1921 tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1922 }
1923 }
1924 } else {
1925 for (unsigned i = 0; i < count; i++) {
1926 p->slot[i] = NULL;
1927 pipe_sampler_view_reference(&p->slot[i], views[i]);
1928
1929 if (views[i]) {
1930 if (views[i]->target == PIPE_BUFFER)
1931 tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1932 views[i]->texture);
1933 else
1934 tc_set_resource_batch_usage(tc, views[i]->texture);
1935 } else {
1936 tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1937 }
1938 }
1939 }
1940
1941 tc_unbind_buffers(&tc->sampler_buffers[shader][start + count],
1942 unbind_num_trailing_slots);
1943 tc->seen_sampler_buffers[shader] = true;
1944 } else {
1945 p->count = 0;
1946 p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1947
1948 tc_unbind_buffers(&tc->sampler_buffers[shader][start],
1949 count + unbind_num_trailing_slots);
1950 }
1951 }
1952
1953 struct tc_shader_images {
1954 struct tc_call_base base;
1955 uint8_t shader, start, count;
1956 uint8_t unbind_num_trailing_slots;
1957 struct pipe_image_view slot[0]; /* more will be allocated if needed */
1958 };
1959
1960 static uint16_t
tc_call_set_shader_images(struct pipe_context * pipe,void * call)1961 tc_call_set_shader_images(struct pipe_context *pipe, void *call)
1962 {
1963 struct tc_shader_images *p = (struct tc_shader_images *)call;
1964 unsigned count = p->count;
1965
1966 if (!p->count) {
1967 pipe->set_shader_images(pipe, p->shader, p->start, 0,
1968 p->unbind_num_trailing_slots, NULL);
1969 return call_size(tc_shader_images);
1970 }
1971
1972 pipe->set_shader_images(pipe, p->shader, p->start, p->count,
1973 p->unbind_num_trailing_slots, p->slot);
1974
1975 for (unsigned i = 0; i < count; i++)
1976 tc_drop_resource_reference(p->slot[i].resource);
1977
1978 return p->base.num_slots;
1979 }
1980
1981 static void
tc_set_shader_images(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,const struct pipe_image_view * images)1982 tc_set_shader_images(struct pipe_context *_pipe,
1983 enum pipe_shader_type shader,
1984 unsigned start, unsigned count,
1985 unsigned unbind_num_trailing_slots,
1986 const struct pipe_image_view *images)
1987 {
1988 if (!count && !unbind_num_trailing_slots)
1989 return;
1990
1991 struct threaded_context *tc = threaded_context(_pipe);
1992 struct tc_shader_images *p =
1993 tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images,
1994 images ? count : 0);
1995 unsigned writable_buffers = 0;
1996
1997 p->shader = shader;
1998 p->start = start;
1999
2000 if (images) {
2001 p->count = count;
2002 p->unbind_num_trailing_slots = unbind_num_trailing_slots;
2003
2004 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2005
2006 for (unsigned i = 0; i < count; i++) {
2007 struct pipe_resource *resource = images[i].resource;
2008
2009 tc_set_resource_reference(&p->slot[i].resource, resource);
2010
2011 if (resource) {
2012 if (resource->target == PIPE_BUFFER) {
2013 tc_bind_buffer(&tc->image_buffers[shader][start + i], next, resource);
2014
2015 if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) {
2016 struct threaded_resource *tres = threaded_resource(resource);
2017
2018 tc_buffer_disable_cpu_storage(resource);
2019 util_range_add(&tres->b, &tres->valid_buffer_range,
2020 images[i].u.buf.offset,
2021 images[i].u.buf.offset + images[i].u.buf.size);
2022 writable_buffers |= BITFIELD_BIT(start + i);
2023 }
2024 } else {
2025 tc_set_resource_batch_usage(tc, resource);
2026 }
2027 } else {
2028 tc_unbind_buffer(&tc->image_buffers[shader][start + i]);
2029 }
2030 }
2031 memcpy(p->slot, images, count * sizeof(images[0]));
2032
2033 tc_unbind_buffers(&tc->image_buffers[shader][start + count],
2034 unbind_num_trailing_slots);
2035 tc->seen_image_buffers[shader] = true;
2036 } else {
2037 p->count = 0;
2038 p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
2039
2040 tc_unbind_buffers(&tc->image_buffers[shader][start],
2041 count + unbind_num_trailing_slots);
2042 }
2043
2044 tc->image_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
2045 tc->image_buffers_writeable_mask[shader] |= writable_buffers;
2046 }
2047
2048 struct tc_shader_buffers {
2049 struct tc_call_base base;
2050 uint8_t shader, start, count;
2051 bool unbind;
2052 unsigned writable_bitmask;
2053 struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
2054 };
2055
2056 static uint16_t
tc_call_set_shader_buffers(struct pipe_context * pipe,void * call)2057 tc_call_set_shader_buffers(struct pipe_context *pipe, void *call)
2058 {
2059 struct tc_shader_buffers *p = (struct tc_shader_buffers *)call;
2060 unsigned count = p->count;
2061
2062 if (p->unbind) {
2063 pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL, 0);
2064 return call_size(tc_shader_buffers);
2065 }
2066
2067 pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot,
2068 p->writable_bitmask);
2069
2070 for (unsigned i = 0; i < count; i++)
2071 tc_drop_resource_reference(p->slot[i].buffer);
2072
2073 return p->base.num_slots;
2074 }
2075
2076 static void
tc_set_shader_buffers(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,const struct pipe_shader_buffer * buffers,unsigned writable_bitmask)2077 tc_set_shader_buffers(struct pipe_context *_pipe,
2078 enum pipe_shader_type shader,
2079 unsigned start, unsigned count,
2080 const struct pipe_shader_buffer *buffers,
2081 unsigned writable_bitmask)
2082 {
2083 if (!count)
2084 return;
2085
2086 struct threaded_context *tc = threaded_context(_pipe);
2087 struct tc_shader_buffers *p =
2088 tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers,
2089 buffers ? count : 0);
2090
2091 p->shader = shader;
2092 p->start = start;
2093 p->count = count;
2094 p->unbind = buffers == NULL;
2095 p->writable_bitmask = writable_bitmask;
2096
2097 if (buffers) {
2098 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2099
2100 for (unsigned i = 0; i < count; i++) {
2101 struct pipe_shader_buffer *dst = &p->slot[i];
2102 const struct pipe_shader_buffer *src = buffers + i;
2103
2104 tc_set_resource_reference(&dst->buffer, src->buffer);
2105 dst->buffer_offset = src->buffer_offset;
2106 dst->buffer_size = src->buffer_size;
2107
2108 if (src->buffer) {
2109 struct threaded_resource *tres = threaded_resource(src->buffer);
2110
2111 tc_bind_buffer(&tc->shader_buffers[shader][start + i], next, &tres->b);
2112
2113 if (writable_bitmask & BITFIELD_BIT(i)) {
2114 tc_buffer_disable_cpu_storage(src->buffer);
2115 util_range_add(&tres->b, &tres->valid_buffer_range,
2116 src->buffer_offset,
2117 src->buffer_offset + src->buffer_size);
2118 }
2119 } else {
2120 tc_unbind_buffer(&tc->shader_buffers[shader][start + i]);
2121 }
2122 }
2123 tc->seen_shader_buffers[shader] = true;
2124 } else {
2125 tc_unbind_buffers(&tc->shader_buffers[shader][start], count);
2126 }
2127
2128 tc->shader_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
2129 tc->shader_buffers_writeable_mask[shader] |= writable_bitmask << start;
2130 }
2131
2132 static uint16_t
tc_call_set_vertex_buffers(struct pipe_context * pipe,void * call)2133 tc_call_set_vertex_buffers(struct pipe_context *pipe, void *call)
2134 {
2135 struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)call;
2136 unsigned count = p->count;
2137
2138 for (unsigned i = 0; i < count; i++)
2139 tc_assert(!p->slot[i].is_user_buffer);
2140
2141 pipe->set_vertex_buffers(pipe, count, p->slot);
2142 return p->base.num_slots;
2143 }
2144
2145 static void
tc_set_vertex_buffers(struct pipe_context * _pipe,unsigned count,const struct pipe_vertex_buffer * buffers)2146 tc_set_vertex_buffers(struct pipe_context *_pipe, unsigned count,
2147 const struct pipe_vertex_buffer *buffers)
2148 {
2149 struct threaded_context *tc = threaded_context(_pipe);
2150
2151 assert(!count || buffers);
2152
2153 if (count) {
2154 struct tc_vertex_buffers *p =
2155 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
2156 p->count = count;
2157
2158 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2159
2160 memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer));
2161
2162 for (unsigned i = 0; i < count; i++) {
2163 struct pipe_resource *buf = buffers[i].buffer.resource;
2164
2165 if (buf) {
2166 tc_bind_buffer(&tc->vertex_buffers[i], next, buf);
2167 } else {
2168 tc_unbind_buffer(&tc->vertex_buffers[i]);
2169 }
2170 }
2171 } else {
2172 struct tc_vertex_buffers *p =
2173 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
2174 p->count = 0;
2175 }
2176
2177 /* We don't need to unbind trailing buffers because we never touch bindings
2178 * after num_vertex_buffers.
2179 */
2180 tc->num_vertex_buffers = count;
2181 }
2182
2183 struct pipe_vertex_buffer *
tc_add_set_vertex_buffers_call(struct pipe_context * _pipe,unsigned count)2184 tc_add_set_vertex_buffers_call(struct pipe_context *_pipe, unsigned count)
2185 {
2186 struct threaded_context *tc = threaded_context(_pipe);
2187
2188 /* We don't need to unbind trailing buffers because we never touch bindings
2189 * after num_vertex_buffers.
2190 */
2191 tc->num_vertex_buffers = count;
2192
2193 struct tc_vertex_buffers *p =
2194 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
2195 p->count = count;
2196 return p->slot;
2197 }
2198
2199 struct tc_stream_outputs {
2200 struct tc_call_base base;
2201 unsigned count;
2202 struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
2203 unsigned offsets[PIPE_MAX_SO_BUFFERS];
2204 };
2205
2206 static uint16_t
tc_call_set_stream_output_targets(struct pipe_context * pipe,void * call)2207 tc_call_set_stream_output_targets(struct pipe_context *pipe, void *call)
2208 {
2209 struct tc_stream_outputs *p = to_call(call, tc_stream_outputs);
2210 unsigned count = p->count;
2211
2212 pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets);
2213 for (unsigned i = 0; i < count; i++)
2214 tc_drop_so_target_reference(p->targets[i]);
2215
2216 return call_size(tc_stream_outputs);
2217 }
2218
2219 static void
tc_set_stream_output_targets(struct pipe_context * _pipe,unsigned count,struct pipe_stream_output_target ** tgs,const unsigned * offsets)2220 tc_set_stream_output_targets(struct pipe_context *_pipe,
2221 unsigned count,
2222 struct pipe_stream_output_target **tgs,
2223 const unsigned *offsets)
2224 {
2225 struct threaded_context *tc = threaded_context(_pipe);
2226 struct tc_stream_outputs *p =
2227 tc_add_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs);
2228 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2229
2230 for (unsigned i = 0; i < count; i++) {
2231 p->targets[i] = NULL;
2232 pipe_so_target_reference(&p->targets[i], tgs[i]);
2233 if (tgs[i]) {
2234 tc_buffer_disable_cpu_storage(tgs[i]->buffer);
2235 tc_bind_buffer(&tc->streamout_buffers[i], next, tgs[i]->buffer);
2236 } else {
2237 tc_unbind_buffer(&tc->streamout_buffers[i]);
2238 }
2239 }
2240 p->count = count;
2241 memcpy(p->offsets, offsets, count * sizeof(unsigned));
2242
2243 tc_unbind_buffers(&tc->streamout_buffers[count], PIPE_MAX_SO_BUFFERS - count);
2244 if (count)
2245 tc->seen_streamout_buffers = true;
2246 }
2247
2248 static void
tc_set_compute_resources(struct pipe_context * _pipe,unsigned start,unsigned count,struct pipe_surface ** resources)2249 tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
2250 unsigned count, struct pipe_surface **resources)
2251 {
2252 struct threaded_context *tc = threaded_context(_pipe);
2253 struct pipe_context *pipe = tc->pipe;
2254
2255 tc_sync(tc);
2256 pipe->set_compute_resources(pipe, start, count, resources);
2257 }
2258
2259 static void
tc_set_global_binding(struct pipe_context * _pipe,unsigned first,unsigned count,struct pipe_resource ** resources,uint32_t ** handles)2260 tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
2261 unsigned count, struct pipe_resource **resources,
2262 uint32_t **handles)
2263 {
2264 struct threaded_context *tc = threaded_context(_pipe);
2265 struct pipe_context *pipe = tc->pipe;
2266
2267 tc_sync(tc);
2268 pipe->set_global_binding(pipe, first, count, resources, handles);
2269 }
2270
2271
2272 /********************************************************************
2273 * views
2274 */
2275
2276 static struct pipe_surface *
tc_create_surface(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_surface * surf_tmpl)2277 tc_create_surface(struct pipe_context *_pipe,
2278 struct pipe_resource *resource,
2279 const struct pipe_surface *surf_tmpl)
2280 {
2281 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2282 struct pipe_surface *view =
2283 pipe->create_surface(pipe, resource, surf_tmpl);
2284
2285 if (view)
2286 view->context = _pipe;
2287 return view;
2288 }
2289
2290 static void
tc_surface_destroy(struct pipe_context * _pipe,struct pipe_surface * surf)2291 tc_surface_destroy(struct pipe_context *_pipe,
2292 struct pipe_surface *surf)
2293 {
2294 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2295
2296 pipe->surface_destroy(pipe, surf);
2297 }
2298
2299 static struct pipe_sampler_view *
tc_create_sampler_view(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_sampler_view * templ)2300 tc_create_sampler_view(struct pipe_context *_pipe,
2301 struct pipe_resource *resource,
2302 const struct pipe_sampler_view *templ)
2303 {
2304 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2305 struct pipe_sampler_view *view =
2306 pipe->create_sampler_view(pipe, resource, templ);
2307
2308 if (view)
2309 view->context = _pipe;
2310 return view;
2311 }
2312
2313 static void
tc_sampler_view_destroy(struct pipe_context * _pipe,struct pipe_sampler_view * view)2314 tc_sampler_view_destroy(struct pipe_context *_pipe,
2315 struct pipe_sampler_view *view)
2316 {
2317 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2318
2319 pipe->sampler_view_destroy(pipe, view);
2320 }
2321
2322 static struct pipe_stream_output_target *
tc_create_stream_output_target(struct pipe_context * _pipe,struct pipe_resource * res,unsigned buffer_offset,unsigned buffer_size)2323 tc_create_stream_output_target(struct pipe_context *_pipe,
2324 struct pipe_resource *res,
2325 unsigned buffer_offset,
2326 unsigned buffer_size)
2327 {
2328 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2329 struct threaded_resource *tres = threaded_resource(res);
2330 struct pipe_stream_output_target *view;
2331
2332 util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset,
2333 buffer_offset + buffer_size);
2334
2335 view = pipe->create_stream_output_target(pipe, res, buffer_offset,
2336 buffer_size);
2337 if (view)
2338 view->context = _pipe;
2339 return view;
2340 }
2341
2342 static void
tc_stream_output_target_destroy(struct pipe_context * _pipe,struct pipe_stream_output_target * target)2343 tc_stream_output_target_destroy(struct pipe_context *_pipe,
2344 struct pipe_stream_output_target *target)
2345 {
2346 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2347
2348 pipe->stream_output_target_destroy(pipe, target);
2349 }
2350
2351
2352 /********************************************************************
2353 * bindless
2354 */
2355
2356 static uint64_t
tc_create_texture_handle(struct pipe_context * _pipe,struct pipe_sampler_view * view,const struct pipe_sampler_state * state)2357 tc_create_texture_handle(struct pipe_context *_pipe,
2358 struct pipe_sampler_view *view,
2359 const struct pipe_sampler_state *state)
2360 {
2361 struct threaded_context *tc = threaded_context(_pipe);
2362 struct pipe_context *pipe = tc->pipe;
2363
2364 tc_sync(tc);
2365 return pipe->create_texture_handle(pipe, view, state);
2366 }
2367
2368 struct tc_make_texture_handle_resident {
2369 struct tc_call_base base;
2370 bool resident;
2371 uint64_t handle;
2372 };
2373
2374 static uint16_t
tc_call_make_texture_handle_resident(struct pipe_context * pipe,void * call)2375 tc_call_make_texture_handle_resident(struct pipe_context *pipe, void *call)
2376 {
2377 struct tc_make_texture_handle_resident *p =
2378 to_call(call, tc_make_texture_handle_resident);
2379
2380 pipe->make_texture_handle_resident(pipe, p->handle, p->resident);
2381 return call_size(tc_make_texture_handle_resident);
2382 }
2383
2384 static void
tc_make_texture_handle_resident(struct pipe_context * _pipe,uint64_t handle,bool resident)2385 tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle,
2386 bool resident)
2387 {
2388 struct threaded_context *tc = threaded_context(_pipe);
2389 struct tc_make_texture_handle_resident *p =
2390 tc_add_call(tc, TC_CALL_make_texture_handle_resident,
2391 tc_make_texture_handle_resident);
2392
2393 p->handle = handle;
2394 p->resident = resident;
2395 }
2396
2397 static uint64_t
tc_create_image_handle(struct pipe_context * _pipe,const struct pipe_image_view * image)2398 tc_create_image_handle(struct pipe_context *_pipe,
2399 const struct pipe_image_view *image)
2400 {
2401 struct threaded_context *tc = threaded_context(_pipe);
2402 struct pipe_context *pipe = tc->pipe;
2403
2404 if (image->resource->target == PIPE_BUFFER)
2405 tc_buffer_disable_cpu_storage(image->resource);
2406
2407 tc_sync(tc);
2408 return pipe->create_image_handle(pipe, image);
2409 }
2410
2411 struct tc_make_image_handle_resident {
2412 struct tc_call_base base;
2413 bool resident;
2414 unsigned access;
2415 uint64_t handle;
2416 };
2417
2418 static uint16_t
tc_call_make_image_handle_resident(struct pipe_context * pipe,void * call)2419 tc_call_make_image_handle_resident(struct pipe_context *pipe, void *call)
2420 {
2421 struct tc_make_image_handle_resident *p =
2422 to_call(call, tc_make_image_handle_resident);
2423
2424 pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident);
2425 return call_size(tc_make_image_handle_resident);
2426 }
2427
2428 static void
tc_make_image_handle_resident(struct pipe_context * _pipe,uint64_t handle,unsigned access,bool resident)2429 tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle,
2430 unsigned access, bool resident)
2431 {
2432 struct threaded_context *tc = threaded_context(_pipe);
2433 struct tc_make_image_handle_resident *p =
2434 tc_add_call(tc, TC_CALL_make_image_handle_resident,
2435 tc_make_image_handle_resident);
2436
2437 p->handle = handle;
2438 p->access = access;
2439 p->resident = resident;
2440 }
2441
2442
2443 /********************************************************************
2444 * transfer
2445 */
2446
2447 static void
2448 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2449 unsigned flags);
2450
2451 struct tc_replace_buffer_storage {
2452 struct tc_call_base base;
2453 uint16_t num_rebinds;
2454 uint32_t rebind_mask;
2455 uint32_t delete_buffer_id;
2456 struct pipe_resource *dst;
2457 struct pipe_resource *src;
2458 tc_replace_buffer_storage_func func;
2459 };
2460
2461 static uint16_t
tc_call_replace_buffer_storage(struct pipe_context * pipe,void * call)2462 tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call)
2463 {
2464 struct tc_replace_buffer_storage *p = to_call(call, tc_replace_buffer_storage);
2465
2466 p->func(pipe, p->dst, p->src, p->num_rebinds, p->rebind_mask, p->delete_buffer_id);
2467
2468 tc_drop_resource_reference(p->dst);
2469 tc_drop_resource_reference(p->src);
2470 return call_size(tc_replace_buffer_storage);
2471 }
2472
2473 /* Return true if the buffer has been invalidated or is idle. */
2474 static bool
tc_invalidate_buffer(struct threaded_context * tc,struct threaded_resource * tbuf)2475 tc_invalidate_buffer(struct threaded_context *tc,
2476 struct threaded_resource *tbuf)
2477 {
2478 if (!tc_is_buffer_busy(tc, tbuf, PIPE_MAP_READ_WRITE)) {
2479 /* It's idle, so invalidation would be a no-op, but we can still clear
2480 * the valid range because we are technically doing invalidation, but
2481 * skipping it because it's useless.
2482 *
2483 * If the buffer is bound for write, we can't invalidate the range.
2484 */
2485 if (!tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique))
2486 util_range_set_empty(&tbuf->valid_buffer_range);
2487 return true;
2488 }
2489
2490 struct pipe_screen *screen = tc->base.screen;
2491 struct pipe_resource *new_buf;
2492
2493 /* Shared, pinned, and sparse buffers can't be reallocated. */
2494 if (tbuf->is_shared ||
2495 tbuf->is_user_ptr ||
2496 tbuf->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE))
2497 return false;
2498
2499 assert(tbuf->b.target == PIPE_BUFFER);
2500 tc->bytes_replaced_estimate += tbuf->b.width0;
2501
2502 if (tc->bytes_replaced_limit && (tc->bytes_replaced_estimate > tc->bytes_replaced_limit)) {
2503 tc_flush(&tc->base, NULL, PIPE_FLUSH_ASYNC);
2504 }
2505
2506 /* Allocate a new one. */
2507 new_buf = screen->resource_create(screen, &tbuf->b);
2508 if (!new_buf)
2509 return false;
2510
2511 /* Replace the "latest" pointer. */
2512 if (tbuf->latest != &tbuf->b)
2513 pipe_resource_reference(&tbuf->latest, NULL);
2514
2515 tbuf->latest = new_buf;
2516
2517 uint32_t delete_buffer_id = tbuf->buffer_id_unique;
2518
2519 /* Enqueue storage replacement of the original buffer. */
2520 struct tc_replace_buffer_storage *p =
2521 tc_add_call(tc, TC_CALL_replace_buffer_storage,
2522 tc_replace_buffer_storage);
2523
2524 p->func = tc->replace_buffer_storage;
2525 tc_set_resource_reference(&p->dst, &tbuf->b);
2526 tc_set_resource_reference(&p->src, new_buf);
2527 p->delete_buffer_id = delete_buffer_id;
2528 p->rebind_mask = 0;
2529
2530 /* Treat the current buffer as the new buffer. */
2531 bool bound_for_write = tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique);
2532 p->num_rebinds = tc_rebind_buffer(tc, tbuf->buffer_id_unique,
2533 threaded_resource(new_buf)->buffer_id_unique,
2534 &p->rebind_mask);
2535
2536 /* If the buffer is not bound for write, clear the valid range. */
2537 if (!bound_for_write)
2538 util_range_set_empty(&tbuf->valid_buffer_range);
2539
2540 tbuf->buffer_id_unique = threaded_resource(new_buf)->buffer_id_unique;
2541 threaded_resource(new_buf)->buffer_id_unique = 0;
2542
2543 return true;
2544 }
2545
2546 static unsigned
tc_improve_map_buffer_flags(struct threaded_context * tc,struct threaded_resource * tres,unsigned usage,unsigned offset,unsigned size)2547 tc_improve_map_buffer_flags(struct threaded_context *tc,
2548 struct threaded_resource *tres, unsigned usage,
2549 unsigned offset, unsigned size)
2550 {
2551 /* Never invalidate inside the driver and never infer "unsynchronized". */
2552 unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE |
2553 TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
2554
2555 /* Prevent a reentry. */
2556 if (usage & tc_flags)
2557 return usage;
2558
2559 /* Use the staging upload if it's preferred. */
2560 if (usage & (PIPE_MAP_DISCARD_RANGE |
2561 PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
2562 !(usage & PIPE_MAP_PERSISTENT) &&
2563 tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY &&
2564 tc->use_forced_staging_uploads) {
2565 usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE |
2566 PIPE_MAP_UNSYNCHRONIZED);
2567
2568 return usage | tc_flags | PIPE_MAP_DISCARD_RANGE;
2569 }
2570
2571 /* Sparse buffers can't be mapped directly and can't be reallocated
2572 * (fully invalidated). That may just be a radeonsi limitation, but
2573 * the threaded context must obey it with radeonsi.
2574 */
2575 if (tres->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) {
2576 /* We can use DISCARD_RANGE instead of full discard. This is the only
2577 * fast path for sparse buffers that doesn't need thread synchronization.
2578 */
2579 if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
2580 usage |= PIPE_MAP_DISCARD_RANGE;
2581
2582 /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
2583 * The threaded context doesn't do unsychronized mappings and invalida-
2584 * tions of sparse buffers, therefore a correct driver behavior won't
2585 * result in an incorrect behavior with the threaded context.
2586 */
2587 return usage;
2588 }
2589
2590 usage |= tc_flags;
2591
2592 /* Handle CPU reads trivially. */
2593 if (usage & PIPE_MAP_READ) {
2594 if (usage & PIPE_MAP_UNSYNCHRONIZED)
2595 usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* don't sync */
2596
2597 /* Drivers aren't allowed to do buffer invalidations. */
2598 return usage & ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2599 }
2600
2601 /* See if the buffer range being mapped has never been initialized or
2602 * the buffer is idle, in which case it can be mapped unsynchronized. */
2603 if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
2604 ((!tres->is_shared &&
2605 !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) ||
2606 !tc_is_buffer_busy(tc, tres, usage)))
2607 usage |= PIPE_MAP_UNSYNCHRONIZED;
2608
2609 if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
2610 /* If discarding the entire valid range, discard the whole resource instead. */
2611 if (usage & PIPE_MAP_DISCARD_RANGE &&
2612 util_ranges_covered(&tres->valid_buffer_range, offset, offset + size))
2613 usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2614
2615 /* Discard the whole resource if needed. */
2616 if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
2617 if (tc_invalidate_buffer(tc, tres))
2618 usage |= PIPE_MAP_UNSYNCHRONIZED;
2619 else
2620 usage |= PIPE_MAP_DISCARD_RANGE; /* fallback */
2621 }
2622 }
2623
2624 /* We won't need this flag anymore. */
2625 /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
2626 usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2627
2628 /* GL_AMD_pinned_memory and persistent mappings can't use staging
2629 * buffers. */
2630 if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2631 PIPE_MAP_PERSISTENT) ||
2632 tres->is_user_ptr)
2633 usage &= ~PIPE_MAP_DISCARD_RANGE;
2634
2635 /* Unsychronized buffer mappings don't have to synchronize the thread. */
2636 if (usage & PIPE_MAP_UNSYNCHRONIZED) {
2637 usage &= ~PIPE_MAP_DISCARD_RANGE;
2638 usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
2639 }
2640
2641 return usage;
2642 }
2643
2644 static void *
tc_buffer_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2645 tc_buffer_map(struct pipe_context *_pipe,
2646 struct pipe_resource *resource, unsigned level,
2647 unsigned usage, const struct pipe_box *box,
2648 struct pipe_transfer **transfer)
2649 {
2650 struct threaded_context *tc = threaded_context(_pipe);
2651 struct threaded_resource *tres = threaded_resource(resource);
2652 struct pipe_context *pipe = tc->pipe;
2653
2654 /* PIPE_MAP_THREAD_SAFE is for glthread, which shouldn't use the CPU storage and
2655 * this shouldn't normally be necessary because glthread only uses large buffers.
2656 */
2657 if (usage & PIPE_MAP_THREAD_SAFE)
2658 tc_buffer_disable_cpu_storage(resource);
2659
2660 usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
2661
2662 /* If the CPU storage is enabled, return it directly. */
2663 if (tres->allow_cpu_storage && !(usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2664 /* We can't let resource_copy_region disable the CPU storage. */
2665 assert(!(tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY));
2666
2667 if (!tres->cpu_storage) {
2668 tres->cpu_storage = align_malloc(resource->width0, tc->map_buffer_alignment);
2669
2670 if (tres->cpu_storage && tres->valid_buffer_range.end) {
2671 /* The GPU buffer contains valid data. Copy them to the CPU storage. */
2672 struct pipe_box box2;
2673 struct pipe_transfer *transfer2;
2674
2675 unsigned valid_range_len = tres->valid_buffer_range.end - tres->valid_buffer_range.start;
2676 u_box_1d(tres->valid_buffer_range.start, valid_range_len, &box2);
2677
2678 tc_sync_msg(tc, "cpu storage GPU -> CPU copy");
2679 tc_set_driver_thread(tc);
2680
2681 void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2682 0, PIPE_MAP_READ, &box2, &transfer2);
2683 memcpy(&((uint8_t*)tres->cpu_storage)[tres->valid_buffer_range.start],
2684 ret,
2685 valid_range_len);
2686 pipe->buffer_unmap(pipe, transfer2);
2687
2688 tc_clear_driver_thread(tc);
2689 }
2690 }
2691
2692 if (tres->cpu_storage) {
2693 struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2694 ttrans->b.resource = resource;
2695 ttrans->b.usage = usage;
2696 ttrans->b.box = *box;
2697 ttrans->valid_buffer_range = &tres->valid_buffer_range;
2698 ttrans->cpu_storage_mapped = true;
2699 *transfer = &ttrans->b;
2700
2701 return (uint8_t*)tres->cpu_storage + box->x;
2702 } else {
2703 tres->allow_cpu_storage = false;
2704 }
2705 }
2706
2707 /* Do a staging transfer within the threaded context. The driver should
2708 * only get resource_copy_region.
2709 */
2710 if (usage & PIPE_MAP_DISCARD_RANGE) {
2711 struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2712 uint8_t *map;
2713
2714 u_upload_alloc(tc->base.stream_uploader, 0,
2715 box->width + (box->x % tc->map_buffer_alignment),
2716 tc->map_buffer_alignment, &ttrans->b.offset,
2717 &ttrans->staging, (void**)&map);
2718 if (!map) {
2719 slab_free(&tc->pool_transfers, ttrans);
2720 return NULL;
2721 }
2722
2723 ttrans->b.resource = resource;
2724 ttrans->b.level = 0;
2725 ttrans->b.usage = usage;
2726 ttrans->b.box = *box;
2727 ttrans->b.stride = 0;
2728 ttrans->b.layer_stride = 0;
2729 ttrans->valid_buffer_range = &tres->valid_buffer_range;
2730 ttrans->cpu_storage_mapped = false;
2731 *transfer = &ttrans->b;
2732
2733 p_atomic_inc(&tres->pending_staging_uploads);
2734 util_range_add(resource, &tres->pending_staging_uploads_range,
2735 box->x, box->x + box->width);
2736
2737 return map + (box->x % tc->map_buffer_alignment);
2738 }
2739
2740 if (usage & PIPE_MAP_UNSYNCHRONIZED &&
2741 p_atomic_read(&tres->pending_staging_uploads) &&
2742 util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) {
2743 /* Write conflict detected between a staging transfer and the direct mapping we're
2744 * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping
2745 * will have to wait for the staging transfer completion.
2746 * Note: The conflict detection is only based on the mapped range, not on the actual
2747 * written range(s).
2748 */
2749 usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC;
2750 tc->use_forced_staging_uploads = false;
2751 }
2752
2753 /* Unsychronized buffer mappings don't have to synchronize the thread. */
2754 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) {
2755 tc_sync_msg(tc, usage & PIPE_MAP_DISCARD_RANGE ? " discard_range" :
2756 usage & PIPE_MAP_READ ? " read" : " staging conflict");
2757 tc_set_driver_thread(tc);
2758 }
2759
2760 tc->bytes_mapped_estimate += box->width;
2761
2762 void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2763 level, usage, box, transfer);
2764 threaded_transfer(*transfer)->valid_buffer_range = &tres->valid_buffer_range;
2765 threaded_transfer(*transfer)->cpu_storage_mapped = false;
2766
2767 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2768 tc_clear_driver_thread(tc);
2769
2770 return ret;
2771 }
2772
2773 static void *
tc_texture_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2774 tc_texture_map(struct pipe_context *_pipe,
2775 struct pipe_resource *resource, unsigned level,
2776 unsigned usage, const struct pipe_box *box,
2777 struct pipe_transfer **transfer)
2778 {
2779 struct threaded_context *tc = threaded_context(_pipe);
2780 struct threaded_resource *tres = threaded_resource(resource);
2781 struct pipe_context *pipe = tc->pipe;
2782
2783 tc_sync_msg(tc, "texture");
2784 tc_set_driver_thread(tc);
2785 /* block all unsync texture subdata during map */
2786 tc_set_resource_batch_usage_persistent(tc, resource, true);
2787
2788 tc->bytes_mapped_estimate += box->width;
2789
2790 void *ret = pipe->texture_map(pipe, tres->latest ? tres->latest : resource,
2791 level, usage, box, transfer);
2792
2793 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2794 tc_clear_driver_thread(tc);
2795
2796 return ret;
2797 }
2798
2799 struct tc_transfer_flush_region {
2800 struct tc_call_base base;
2801 struct pipe_box box;
2802 struct pipe_transfer *transfer;
2803 };
2804
2805 static uint16_t
tc_call_transfer_flush_region(struct pipe_context * pipe,void * call)2806 tc_call_transfer_flush_region(struct pipe_context *pipe, void *call)
2807 {
2808 struct tc_transfer_flush_region *p = to_call(call, tc_transfer_flush_region);
2809
2810 pipe->transfer_flush_region(pipe, p->transfer, &p->box);
2811 return call_size(tc_transfer_flush_region);
2812 }
2813
2814 struct tc_resource_copy_region {
2815 struct tc_call_base base;
2816 unsigned dst_level;
2817 unsigned dstx, dsty, dstz;
2818 unsigned src_level;
2819 struct pipe_box src_box;
2820 struct pipe_resource *dst;
2821 struct pipe_resource *src;
2822 };
2823
2824 static void
2825 tc_resource_copy_region(struct pipe_context *_pipe,
2826 struct pipe_resource *dst, unsigned dst_level,
2827 unsigned dstx, unsigned dsty, unsigned dstz,
2828 struct pipe_resource *src, unsigned src_level,
2829 const struct pipe_box *src_box);
2830
2831 static void
tc_buffer_do_flush_region(struct threaded_context * tc,struct threaded_transfer * ttrans,const struct pipe_box * box)2832 tc_buffer_do_flush_region(struct threaded_context *tc,
2833 struct threaded_transfer *ttrans,
2834 const struct pipe_box *box)
2835 {
2836 struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
2837
2838 if (ttrans->staging) {
2839 struct pipe_box src_box;
2840
2841 u_box_1d(ttrans->b.offset + ttrans->b.box.x % tc->map_buffer_alignment +
2842 (box->x - ttrans->b.box.x),
2843 box->width, &src_box);
2844
2845 /* Copy the staging buffer into the original one. */
2846 tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
2847 ttrans->staging, 0, &src_box);
2848 }
2849
2850 /* Don't update the valid range when we're uploading the CPU storage
2851 * because it includes the uninitialized range too.
2852 */
2853 if (!(ttrans->b.usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2854 util_range_add(&tres->b, ttrans->valid_buffer_range,
2855 box->x, box->x + box->width);
2856 }
2857 }
2858
2859 static void
tc_transfer_flush_region(struct pipe_context * _pipe,struct pipe_transfer * transfer,const struct pipe_box * rel_box)2860 tc_transfer_flush_region(struct pipe_context *_pipe,
2861 struct pipe_transfer *transfer,
2862 const struct pipe_box *rel_box)
2863 {
2864 struct threaded_context *tc = threaded_context(_pipe);
2865 struct threaded_transfer *ttrans = threaded_transfer(transfer);
2866 struct threaded_resource *tres = threaded_resource(transfer->resource);
2867 unsigned required_usage = PIPE_MAP_WRITE |
2868 PIPE_MAP_FLUSH_EXPLICIT;
2869
2870 if (tres->b.target == PIPE_BUFFER) {
2871 if ((transfer->usage & required_usage) == required_usage) {
2872 struct pipe_box box;
2873
2874 u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
2875 tc_buffer_do_flush_region(tc, ttrans, &box);
2876 }
2877
2878 /* Staging transfers don't send the call to the driver.
2879 *
2880 * Transfers using the CPU storage shouldn't call transfer_flush_region
2881 * in the driver because the buffer is not really mapped on the driver
2882 * side and the CPU storage always re-uploads everything (flush_region
2883 * makes no difference).
2884 */
2885 if (ttrans->staging || ttrans->cpu_storage_mapped)
2886 return;
2887 }
2888
2889 struct tc_transfer_flush_region *p =
2890 tc_add_call(tc, TC_CALL_transfer_flush_region, tc_transfer_flush_region);
2891 p->transfer = transfer;
2892 p->box = *rel_box;
2893 }
2894
2895 struct tc_buffer_unmap {
2896 struct tc_call_base base;
2897 bool was_staging_transfer;
2898 union {
2899 struct pipe_transfer *transfer;
2900 struct pipe_resource *resource;
2901 };
2902 };
2903
2904 static uint16_t
tc_call_buffer_unmap(struct pipe_context * pipe,void * call)2905 tc_call_buffer_unmap(struct pipe_context *pipe, void *call)
2906 {
2907 struct tc_buffer_unmap *p = to_call(call, tc_buffer_unmap);
2908
2909 if (p->was_staging_transfer) {
2910 struct threaded_resource *tres = threaded_resource(p->resource);
2911 /* Nothing to do except keeping track of staging uploads */
2912 assert(tres->pending_staging_uploads > 0);
2913 p_atomic_dec(&tres->pending_staging_uploads);
2914 tc_drop_resource_reference(p->resource);
2915 } else {
2916 pipe->buffer_unmap(pipe, p->transfer);
2917 }
2918
2919 return call_size(tc_buffer_unmap);
2920 }
2921
2922 static void
tc_buffer_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2923 tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2924 {
2925 struct threaded_context *tc = threaded_context(_pipe);
2926 struct threaded_transfer *ttrans = threaded_transfer(transfer);
2927 struct threaded_resource *tres = threaded_resource(transfer->resource);
2928
2929 /* PIPE_MAP_THREAD_SAFE is only valid with UNSYNCHRONIZED. It can be
2930 * called from any thread and bypasses all multithreaded queues.
2931 */
2932 if (transfer->usage & PIPE_MAP_THREAD_SAFE) {
2933 assert(transfer->usage & PIPE_MAP_UNSYNCHRONIZED);
2934 assert(!(transfer->usage & (PIPE_MAP_FLUSH_EXPLICIT |
2935 PIPE_MAP_DISCARD_RANGE)));
2936
2937 struct pipe_context *pipe = tc->pipe;
2938 util_range_add(&tres->b, ttrans->valid_buffer_range,
2939 transfer->box.x, transfer->box.x + transfer->box.width);
2940
2941 pipe->buffer_unmap(pipe, transfer);
2942 return;
2943 }
2944
2945 if (transfer->usage & PIPE_MAP_WRITE &&
2946 !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
2947 tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
2948
2949 if (ttrans->cpu_storage_mapped) {
2950 /* GL allows simultaneous GPU stores with mapped buffers as long as GPU stores don't
2951 * touch the mapped range. That's a problem because GPU stores free the CPU storage.
2952 * If that happens, we just ignore the unmap call and don't upload anything to prevent
2953 * a crash.
2954 *
2955 * Disallow the CPU storage in the driver to work around this.
2956 */
2957 assert(tres->cpu_storage);
2958
2959 if (tres->cpu_storage) {
2960 tc_invalidate_buffer(tc, tres);
2961 tc_buffer_subdata(&tc->base, &tres->b,
2962 PIPE_MAP_UNSYNCHRONIZED |
2963 TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE,
2964 0, tres->b.width0, tres->cpu_storage);
2965 /* This shouldn't have been freed by buffer_subdata. */
2966 assert(tres->cpu_storage);
2967 } else {
2968 static bool warned_once = false;
2969 if (!warned_once) {
2970 fprintf(stderr, "This application is incompatible with cpu_storage.\n");
2971 fprintf(stderr, "Use tc_max_cpu_storage_size=0 to disable it and report this issue to Mesa.\n");
2972 warned_once = true;
2973 }
2974 }
2975
2976 tc_drop_resource_reference(ttrans->staging);
2977 slab_free(&tc->pool_transfers, ttrans);
2978 return;
2979 }
2980
2981 bool was_staging_transfer = false;
2982
2983 if (ttrans->staging) {
2984 was_staging_transfer = true;
2985
2986 tc_drop_resource_reference(ttrans->staging);
2987 slab_free(&tc->pool_transfers, ttrans);
2988 }
2989
2990 struct tc_buffer_unmap *p = tc_add_call(tc, TC_CALL_buffer_unmap,
2991 tc_buffer_unmap);
2992 if (was_staging_transfer) {
2993 tc_set_resource_reference(&p->resource, &tres->b);
2994 p->was_staging_transfer = true;
2995 } else {
2996 p->transfer = transfer;
2997 p->was_staging_transfer = false;
2998 }
2999
3000 /* tc_buffer_map directly maps the buffers, but tc_buffer_unmap
3001 * defers the unmap operation to the batch execution.
3002 * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
3003 * and if it goes over an optional limit the current batch is flushed,
3004 * to reclaim some RAM. */
3005 if (!ttrans->staging && tc->bytes_mapped_limit &&
3006 tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
3007 tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
3008 }
3009 }
3010
3011 struct tc_texture_unmap {
3012 struct tc_call_base base;
3013 struct pipe_transfer *transfer;
3014 };
3015
3016 static uint16_t
tc_call_texture_unmap(struct pipe_context * pipe,void * call)3017 tc_call_texture_unmap(struct pipe_context *pipe, void *call)
3018 {
3019 struct tc_texture_unmap *p = (struct tc_texture_unmap *) call;
3020
3021 pipe->texture_unmap(pipe, p->transfer);
3022 return call_size(tc_texture_unmap);
3023 }
3024
3025 static void
tc_texture_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)3026 tc_texture_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
3027 {
3028 struct threaded_context *tc = threaded_context(_pipe);
3029 struct threaded_transfer *ttrans = threaded_transfer(transfer);
3030
3031 /* enable subdata again once resource is no longer mapped */
3032 tc_set_resource_batch_usage_persistent(tc, transfer->resource, false);
3033
3034 tc_add_call(tc, TC_CALL_texture_unmap, tc_texture_unmap)->transfer = transfer;
3035
3036 /* tc_texture_map directly maps the textures, but tc_texture_unmap
3037 * defers the unmap operation to the batch execution.
3038 * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
3039 * and if it goes over an optional limit the current batch is flushed,
3040 * to reclaim some RAM. */
3041 if (!ttrans->staging && tc->bytes_mapped_limit &&
3042 tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
3043 tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
3044 }
3045 }
3046
3047 struct tc_buffer_subdata {
3048 struct tc_call_base base;
3049 unsigned usage, offset, size;
3050 struct pipe_resource *resource;
3051 char slot[0]; /* more will be allocated if needed */
3052 };
3053
3054 static uint16_t
tc_call_buffer_subdata(struct pipe_context * pipe,void * call)3055 tc_call_buffer_subdata(struct pipe_context *pipe, void *call)
3056 {
3057 struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)call;
3058
3059 pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
3060 p->slot);
3061 tc_drop_resource_reference(p->resource);
3062 return p->base.num_slots;
3063 }
3064
3065 static bool
is_mergeable_buffer_subdata(const struct tc_call_base * previous_call,unsigned usage,unsigned offset,struct pipe_resource * resource)3066 is_mergeable_buffer_subdata(const struct tc_call_base *previous_call,
3067 unsigned usage, unsigned offset,
3068 struct pipe_resource *resource)
3069 {
3070 if (!previous_call || previous_call->call_id != TC_CALL_buffer_subdata)
3071 return false;
3072
3073 struct tc_buffer_subdata *subdata = (struct tc_buffer_subdata *)previous_call;
3074
3075 return subdata->usage == usage && subdata->resource == resource
3076 && (subdata->offset + subdata->size) == offset;
3077 }
3078
3079 static void
tc_buffer_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned usage,unsigned offset,unsigned size,const void * data)3080 tc_buffer_subdata(struct pipe_context *_pipe,
3081 struct pipe_resource *resource,
3082 unsigned usage, unsigned offset,
3083 unsigned size, const void *data)
3084 {
3085 struct threaded_context *tc = threaded_context(_pipe);
3086 struct threaded_resource *tres = threaded_resource(resource);
3087
3088 if (!size)
3089 return;
3090
3091 usage |= PIPE_MAP_WRITE;
3092
3093 /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */
3094 if (!(usage & PIPE_MAP_DIRECTLY))
3095 usage |= PIPE_MAP_DISCARD_RANGE;
3096
3097 usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
3098
3099 /* Unsychronized and big transfers should use transfer_map. Also handle
3100 * full invalidations, because drivers aren't allowed to do them.
3101 */
3102 if (usage & (PIPE_MAP_UNSYNCHRONIZED |
3103 PIPE_MAP_DISCARD_WHOLE_RESOURCE) ||
3104 size > TC_MAX_SUBDATA_BYTES ||
3105 tres->cpu_storage) {
3106 struct pipe_transfer *transfer;
3107 struct pipe_box box;
3108 uint8_t *map = NULL;
3109
3110 u_box_1d(offset, size, &box);
3111
3112 /* CPU storage is only useful for partial updates. It can add overhead
3113 * on glBufferData calls so avoid using it.
3114 */
3115 if (!tres->cpu_storage && offset == 0 && size == resource->width0)
3116 usage |= TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE;
3117
3118 map = tc_buffer_map(_pipe, resource, 0, usage, &box, &transfer);
3119 if (map) {
3120 memcpy(map, data, size);
3121 tc_buffer_unmap(_pipe, transfer);
3122 }
3123 return;
3124 }
3125
3126 util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
3127
3128 /* We can potentially merge this subdata call with the previous one (if any),
3129 * if the application does a whole-buffer upload piecewise. */
3130 {
3131 struct tc_call_base *last_call = tc_get_last_mergeable_call(tc);
3132 struct tc_buffer_subdata *merge_dest = (struct tc_buffer_subdata *)last_call;
3133
3134 if (is_mergeable_buffer_subdata(last_call, usage, offset, resource) &&
3135 tc_enlarge_last_mergeable_call(tc, call_size_with_slots(tc_buffer_subdata, merge_dest->size + size))) {
3136 memcpy(merge_dest->slot + merge_dest->size, data, size);
3137 merge_dest->size += size;
3138
3139 /* TODO: We *could* do an invalidate + upload here if we detect that
3140 * the merged subdata call overwrites the entire buffer. However, that's
3141 * a little complicated since we can't add further calls to our batch
3142 * until we have removed the merged subdata call, which means that
3143 * calling tc_invalidate_buffer before we have removed the call will
3144 * blow things up.
3145 *
3146 * Just leave a large, merged subdata call in the batch for now, which is
3147 * at least better than tons of tiny subdata calls.
3148 */
3149
3150 return;
3151 }
3152 }
3153
3154 /* The upload is small. Enqueue it. */
3155 struct tc_buffer_subdata *p =
3156 tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
3157
3158 tc_set_resource_reference(&p->resource, resource);
3159 /* This is will always be busy because if it wasn't, tc_improve_map_buffer-
3160 * _flags would set UNSYNCHRONIZED and we wouldn't get here.
3161 */
3162 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
3163 p->usage = usage;
3164 p->offset = offset;
3165 p->size = size;
3166 memcpy(p->slot, data, size);
3167
3168 tc_mark_call_mergeable(tc, &p->base);
3169 }
3170
3171 struct tc_texture_subdata {
3172 struct tc_call_base base;
3173 unsigned level, usage, stride;
3174 struct pipe_box box;
3175 struct pipe_resource *resource;
3176 uintptr_t layer_stride;
3177 char slot[0]; /* more will be allocated if needed */
3178 };
3179
3180 static uint16_t
tc_call_texture_subdata(struct pipe_context * pipe,void * call)3181 tc_call_texture_subdata(struct pipe_context *pipe, void *call)
3182 {
3183 struct tc_texture_subdata *p = (struct tc_texture_subdata *)call;
3184
3185 pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
3186 p->slot, p->stride, p->layer_stride);
3187 tc_drop_resource_reference(p->resource);
3188 return p->base.num_slots;
3189 }
3190
3191 static void
tc_texture_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,const void * data,unsigned stride,uintptr_t layer_stride)3192 tc_texture_subdata(struct pipe_context *_pipe,
3193 struct pipe_resource *resource,
3194 unsigned level, unsigned usage,
3195 const struct pipe_box *box,
3196 const void *data, unsigned stride,
3197 uintptr_t layer_stride)
3198 {
3199 struct threaded_context *tc = threaded_context(_pipe);
3200 uint64_t size;
3201
3202 assert(box->height >= 1);
3203 assert(box->depth >= 1);
3204
3205 size = (box->depth - 1) * layer_stride +
3206 (box->height - 1) * (uint64_t)stride +
3207 box->width * util_format_get_blocksize(resource->format);
3208 if (!size)
3209 return;
3210
3211 /* Small uploads can be enqueued, big uploads must sync. */
3212 if (size <= TC_MAX_SUBDATA_BYTES) {
3213 struct tc_texture_subdata *p =
3214 tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
3215
3216 tc_set_resource_batch_usage(tc, resource);
3217 tc_set_resource_reference(&p->resource, resource);
3218 p->level = level;
3219 p->usage = usage;
3220 p->box = *box;
3221 p->stride = stride;
3222 p->layer_stride = layer_stride;
3223 memcpy(p->slot, data, size);
3224 } else {
3225 struct pipe_context *pipe = tc->pipe;
3226 struct threaded_resource *tres = threaded_resource(resource);
3227 unsigned unsync_usage = TC_TRANSFER_MAP_THREADED_UNSYNC | PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_WRITE;
3228 bool can_unsync = !tc_resource_batch_usage_test_busy(tc, resource) &&
3229 tc->options.is_resource_busy &&
3230 !tc->options.is_resource_busy(tc->pipe->screen, tres->latest, usage | unsync_usage);
3231
3232 if (!can_unsync && resource->usage != PIPE_USAGE_STAGING &&
3233 tc->options.parse_renderpass_info && tc->in_renderpass) {
3234 enum pipe_format format = resource->format;
3235 if (usage & PIPE_MAP_DEPTH_ONLY)
3236 format = util_format_get_depth_only(format);
3237 else if (usage & PIPE_MAP_STENCIL_ONLY)
3238 format = PIPE_FORMAT_S8_UINT;
3239
3240 unsigned fmt_stride = util_format_get_stride(format, box->width);
3241 uint64_t fmt_layer_stride = util_format_get_2d_size(format, stride, box->height);
3242 assert(fmt_layer_stride * box->depth <= UINT32_MAX);
3243
3244 struct pipe_resource *pres = pipe_buffer_create(pipe->screen, 0, PIPE_USAGE_STREAM, layer_stride * box->depth);
3245 pipe->buffer_subdata(pipe, pres, unsync_usage, 0, layer_stride * box->depth, data);
3246 struct pipe_box src_box = *box;
3247 src_box.x = src_box.y = src_box.z = 0;
3248
3249 if (fmt_stride == stride && fmt_layer_stride == layer_stride) {
3250 /* if stride matches, single copy is fine*/
3251 tc->base.resource_copy_region(&tc->base, resource, level, box->x, box->y, box->z, pres, 0, &src_box);
3252 } else {
3253 /* if stride doesn't match, inline util_copy_box on the GPU and assume the driver will optimize */
3254 src_box.depth = 1;
3255 for (unsigned z = 0; z < box->depth; ++z, src_box.x = z * layer_stride) {
3256 unsigned dst_x = box->x, dst_y = box->y, width = box->width, height = box->height, dst_z = box->z + z;
3257 int blocksize = util_format_get_blocksize(format);
3258 int blockwidth = util_format_get_blockwidth(format);
3259 int blockheight = util_format_get_blockheight(format);
3260
3261 assert(blocksize > 0);
3262 assert(blockwidth > 0);
3263 assert(blockheight > 0);
3264
3265 dst_x /= blockwidth;
3266 dst_y /= blockheight;
3267 width = DIV_ROUND_UP(width, blockwidth);
3268 height = DIV_ROUND_UP(height, blockheight);
3269
3270 width *= blocksize;
3271
3272 if (width == fmt_stride && width == (unsigned)stride) {
3273 ASSERTED uint64_t size = (uint64_t)height * width;
3274
3275 assert(size <= SIZE_MAX);
3276 assert(dst_x + src_box.width < u_minify(pres->width0, level));
3277 assert(dst_y + src_box.height < u_minify(pres->height0, level));
3278 assert(pres->target != PIPE_TEXTURE_3D || z + src_box.depth < u_minify(pres->depth0, level));
3279 tc->base.resource_copy_region(&tc->base, resource, level, dst_x, dst_y, dst_z, pres, 0, &src_box);
3280 } else {
3281 src_box.height = 1;
3282 for (unsigned i = 0; i < height; i++, dst_y++, src_box.x += stride)
3283 tc->base.resource_copy_region(&tc->base, resource, level, dst_x, dst_y, dst_z, pres, 0, &src_box);
3284 }
3285 }
3286 }
3287
3288 pipe_resource_reference(&pres, NULL);
3289 } else {
3290 if (can_unsync) {
3291 usage |= unsync_usage;
3292 } else {
3293 tc_sync(tc);
3294 tc_set_driver_thread(tc);
3295 }
3296 pipe->texture_subdata(pipe, resource, level, usage, box, data,
3297 stride, layer_stride);
3298 if (!can_unsync)
3299 tc_clear_driver_thread(tc);
3300 }
3301 }
3302 }
3303
3304
3305 /********************************************************************
3306 * miscellaneous
3307 */
3308
3309 #define TC_FUNC_SYNC_RET0(ret_type, func) \
3310 static ret_type \
3311 tc_##func(struct pipe_context *_pipe) \
3312 { \
3313 struct threaded_context *tc = threaded_context(_pipe); \
3314 struct pipe_context *pipe = tc->pipe; \
3315 tc_sync(tc); \
3316 return pipe->func(pipe); \
3317 }
3318
TC_FUNC_SYNC_RET0(uint64_t,get_timestamp)3319 TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
3320
3321 static void
3322 tc_get_sample_position(struct pipe_context *_pipe,
3323 unsigned sample_count, unsigned sample_index,
3324 float *out_value)
3325 {
3326 struct threaded_context *tc = threaded_context(_pipe);
3327 struct pipe_context *pipe = tc->pipe;
3328
3329 pipe->get_sample_position(pipe, sample_count, sample_index,
3330 out_value);
3331 }
3332
3333 static enum pipe_reset_status
tc_get_device_reset_status(struct pipe_context * _pipe)3334 tc_get_device_reset_status(struct pipe_context *_pipe)
3335 {
3336 struct threaded_context *tc = threaded_context(_pipe);
3337 struct pipe_context *pipe = tc->pipe;
3338
3339 if (!tc->options.unsynchronized_get_device_reset_status)
3340 tc_sync(tc);
3341
3342 return pipe->get_device_reset_status(pipe);
3343 }
3344
3345 static void
tc_set_device_reset_callback(struct pipe_context * _pipe,const struct pipe_device_reset_callback * cb)3346 tc_set_device_reset_callback(struct pipe_context *_pipe,
3347 const struct pipe_device_reset_callback *cb)
3348 {
3349 struct threaded_context *tc = threaded_context(_pipe);
3350 struct pipe_context *pipe = tc->pipe;
3351
3352 tc_sync(tc);
3353 pipe->set_device_reset_callback(pipe, cb);
3354 }
3355
3356 struct tc_string_marker {
3357 struct tc_call_base base;
3358 int len;
3359 char slot[0]; /* more will be allocated if needed */
3360 };
3361
3362 static uint16_t
tc_call_emit_string_marker(struct pipe_context * pipe,void * call)3363 tc_call_emit_string_marker(struct pipe_context *pipe, void *call)
3364 {
3365 struct tc_string_marker *p = (struct tc_string_marker *)call;
3366 pipe->emit_string_marker(pipe, p->slot, p->len);
3367 return p->base.num_slots;
3368 }
3369
3370 static void
tc_emit_string_marker(struct pipe_context * _pipe,const char * string,int len)3371 tc_emit_string_marker(struct pipe_context *_pipe,
3372 const char *string, int len)
3373 {
3374 struct threaded_context *tc = threaded_context(_pipe);
3375
3376 if (len <= TC_MAX_STRING_MARKER_BYTES) {
3377 struct tc_string_marker *p =
3378 tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
3379
3380 memcpy(p->slot, string, len);
3381 p->len = len;
3382 } else {
3383 struct pipe_context *pipe = tc->pipe;
3384
3385 tc_sync(tc);
3386 tc_set_driver_thread(tc);
3387 pipe->emit_string_marker(pipe, string, len);
3388 tc_clear_driver_thread(tc);
3389 }
3390 }
3391
3392 static void
tc_dump_debug_state(struct pipe_context * _pipe,FILE * stream,unsigned flags)3393 tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
3394 unsigned flags)
3395 {
3396 struct threaded_context *tc = threaded_context(_pipe);
3397 struct pipe_context *pipe = tc->pipe;
3398
3399 tc_sync(tc);
3400 pipe->dump_debug_state(pipe, stream, flags);
3401 }
3402
3403 static void
tc_set_debug_callback(struct pipe_context * _pipe,const struct util_debug_callback * cb)3404 tc_set_debug_callback(struct pipe_context *_pipe,
3405 const struct util_debug_callback *cb)
3406 {
3407 struct threaded_context *tc = threaded_context(_pipe);
3408 struct pipe_context *pipe = tc->pipe;
3409
3410 tc_sync(tc);
3411
3412 /* Drop all synchronous debug callbacks. Drivers are expected to be OK
3413 * with this. shader-db will use an environment variable to disable
3414 * the threaded context.
3415 */
3416 if (cb && !cb->async)
3417 pipe->set_debug_callback(pipe, NULL);
3418 else
3419 pipe->set_debug_callback(pipe, cb);
3420 }
3421
3422 static void
tc_set_log_context(struct pipe_context * _pipe,struct u_log_context * log)3423 tc_set_log_context(struct pipe_context *_pipe, struct u_log_context *log)
3424 {
3425 struct threaded_context *tc = threaded_context(_pipe);
3426 struct pipe_context *pipe = tc->pipe;
3427
3428 tc_sync(tc);
3429 pipe->set_log_context(pipe, log);
3430 }
3431
3432 static void
tc_create_fence_fd(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,int fd,enum pipe_fd_type type)3433 tc_create_fence_fd(struct pipe_context *_pipe,
3434 struct pipe_fence_handle **fence, int fd,
3435 enum pipe_fd_type type)
3436 {
3437 struct threaded_context *tc = threaded_context(_pipe);
3438 struct pipe_context *pipe = tc->pipe;
3439
3440 if (!tc->options.unsynchronized_create_fence_fd)
3441 tc_sync(tc);
3442
3443 pipe->create_fence_fd(pipe, fence, fd, type);
3444 }
3445
3446 struct tc_fence_call {
3447 struct tc_call_base base;
3448 struct pipe_fence_handle *fence;
3449 };
3450
3451 static uint16_t
tc_call_fence_server_sync(struct pipe_context * pipe,void * call)3452 tc_call_fence_server_sync(struct pipe_context *pipe, void *call)
3453 {
3454 struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
3455
3456 pipe->fence_server_sync(pipe, fence);
3457 pipe->screen->fence_reference(pipe->screen, &fence, NULL);
3458 return call_size(tc_fence_call);
3459 }
3460
3461 static void
tc_fence_server_sync(struct pipe_context * _pipe,struct pipe_fence_handle * fence)3462 tc_fence_server_sync(struct pipe_context *_pipe,
3463 struct pipe_fence_handle *fence)
3464 {
3465 struct threaded_context *tc = threaded_context(_pipe);
3466 struct pipe_screen *screen = tc->pipe->screen;
3467 struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_sync,
3468 tc_fence_call);
3469
3470 call->fence = NULL;
3471 screen->fence_reference(screen, &call->fence, fence);
3472 }
3473
3474 static void
tc_fence_server_signal(struct pipe_context * _pipe,struct pipe_fence_handle * fence)3475 tc_fence_server_signal(struct pipe_context *_pipe,
3476 struct pipe_fence_handle *fence)
3477 {
3478 struct threaded_context *tc = threaded_context(_pipe);
3479 struct pipe_context *pipe = tc->pipe;
3480 tc_sync(tc);
3481 pipe->fence_server_signal(pipe, fence);
3482 }
3483
3484 static struct pipe_video_codec *
tc_create_video_codec(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_codec * templ)3485 tc_create_video_codec(UNUSED struct pipe_context *_pipe,
3486 UNUSED const struct pipe_video_codec *templ)
3487 {
3488 unreachable("Threaded context should not be enabled for video APIs");
3489 return NULL;
3490 }
3491
3492 static struct pipe_video_buffer *
tc_create_video_buffer(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_buffer * templ)3493 tc_create_video_buffer(UNUSED struct pipe_context *_pipe,
3494 UNUSED const struct pipe_video_buffer *templ)
3495 {
3496 unreachable("Threaded context should not be enabled for video APIs");
3497 return NULL;
3498 }
3499
3500 struct tc_context_param {
3501 struct tc_call_base base;
3502 enum pipe_context_param param;
3503 unsigned value;
3504 };
3505
3506 static uint16_t
tc_call_set_context_param(struct pipe_context * pipe,void * call)3507 tc_call_set_context_param(struct pipe_context *pipe, void *call)
3508 {
3509 struct tc_context_param *p = to_call(call, tc_context_param);
3510
3511 if (pipe->set_context_param)
3512 pipe->set_context_param(pipe, p->param, p->value);
3513
3514 return call_size(tc_context_param);
3515 }
3516
3517 static void
tc_set_context_param(struct pipe_context * _pipe,enum pipe_context_param param,unsigned value)3518 tc_set_context_param(struct pipe_context *_pipe,
3519 enum pipe_context_param param,
3520 unsigned value)
3521 {
3522 struct threaded_context *tc = threaded_context(_pipe);
3523
3524 if (param == PIPE_CONTEXT_PARAM_UPDATE_THREAD_SCHEDULING) {
3525 util_thread_sched_apply_policy(tc->queue.threads[0],
3526 UTIL_THREAD_THREADED_CONTEXT, value,
3527 NULL);
3528
3529 /* Execute this immediately (without enqueuing).
3530 * It's required to be thread-safe.
3531 */
3532 struct pipe_context *pipe = tc->pipe;
3533 if (pipe->set_context_param)
3534 pipe->set_context_param(pipe, param, value);
3535 return;
3536 }
3537
3538 if (tc->pipe->set_context_param) {
3539 struct tc_context_param *call =
3540 tc_add_call(tc, TC_CALL_set_context_param, tc_context_param);
3541
3542 call->param = param;
3543 call->value = value;
3544 }
3545 }
3546
3547
3548 /********************************************************************
3549 * draw, launch, clear, blit, copy, flush
3550 */
3551
3552 struct tc_flush_deferred_call {
3553 struct tc_call_base base;
3554 unsigned flags;
3555 struct pipe_fence_handle *fence;
3556 };
3557
3558 struct tc_flush_call {
3559 struct tc_call_base base;
3560 unsigned flags;
3561 struct pipe_fence_handle *fence;
3562 struct threaded_context *tc;
3563 };
3564
3565 static void
tc_flush_queries(struct threaded_context * tc)3566 tc_flush_queries(struct threaded_context *tc)
3567 {
3568 struct threaded_query *tq, *tmp;
3569 LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
3570 list_del(&tq->head_unflushed);
3571
3572 /* Memory release semantics: due to a possible race with
3573 * tc_get_query_result, we must ensure that the linked list changes
3574 * are visible before setting tq->flushed.
3575 */
3576 p_atomic_set(&tq->flushed, true);
3577 }
3578 }
3579
3580 static uint16_t
tc_call_flush_deferred(struct pipe_context * pipe,void * call)3581 tc_call_flush_deferred(struct pipe_context *pipe, void *call)
3582 {
3583 struct tc_flush_deferred_call *p = to_call(call, tc_flush_deferred_call);
3584 struct pipe_screen *screen = pipe->screen;
3585
3586 pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
3587 screen->fence_reference(screen, &p->fence, NULL);
3588
3589 return call_size(tc_flush_deferred_call);
3590 }
3591
3592 static uint16_t
tc_call_flush(struct pipe_context * pipe,void * call)3593 tc_call_flush(struct pipe_context *pipe, void *call)
3594 {
3595 struct tc_flush_call *p = to_call(call, tc_flush_call);
3596 struct pipe_screen *screen = pipe->screen;
3597
3598 pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
3599 screen->fence_reference(screen, &p->fence, NULL);
3600
3601 tc_flush_queries(p->tc);
3602
3603 return call_size(tc_flush_call);
3604 }
3605
3606 static void
tc_flush(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,unsigned flags)3607 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
3608 unsigned flags)
3609 {
3610 struct threaded_context *tc = threaded_context(_pipe);
3611 struct pipe_context *pipe = tc->pipe;
3612 struct pipe_screen *screen = pipe->screen;
3613 bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
3614 bool deferred = (flags & PIPE_FLUSH_DEFERRED) > 0;
3615
3616 if (!deferred || !fence)
3617 tc->in_renderpass = false;
3618
3619 if (async && tc->options.create_fence) {
3620 if (fence) {
3621 struct tc_batch *next = &tc->batch_slots[tc->next];
3622
3623 if (!next->token) {
3624 next->token = malloc(sizeof(*next->token));
3625 if (!next->token)
3626 goto out_of_memory;
3627
3628 pipe_reference_init(&next->token->ref, 1);
3629 next->token->tc = tc;
3630 }
3631
3632 screen->fence_reference(screen, fence,
3633 tc->options.create_fence(pipe, next->token));
3634 if (!*fence)
3635 goto out_of_memory;
3636 }
3637
3638 struct tc_flush_call *p;
3639 if (deferred) {
3640 /* these have identical fields */
3641 p = (struct tc_flush_call *)tc_add_call(tc, TC_CALL_flush_deferred, tc_flush_deferred_call);
3642 } else {
3643 p = tc_add_call(tc, TC_CALL_flush, tc_flush_call);
3644 p->tc = tc;
3645 }
3646 p->fence = fence ? *fence : NULL;
3647 p->flags = flags | TC_FLUSH_ASYNC;
3648
3649 if (!deferred) {
3650 /* non-deferred async flushes indicate completion of existing renderpass info */
3651 tc_signal_renderpass_info_ready(tc);
3652 tc_batch_flush(tc, false);
3653 tc->seen_fb_state = false;
3654 }
3655
3656 return;
3657 }
3658
3659 out_of_memory:
3660 tc->flushing = true;
3661 /* renderpass info is signaled during sync */
3662 tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
3663 flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
3664
3665 if (!deferred) {
3666 tc_flush_queries(tc);
3667 tc->seen_fb_state = false;
3668 tc->query_ended = false;
3669 }
3670 tc_set_driver_thread(tc);
3671 pipe->flush(pipe, fence, flags);
3672 tc_clear_driver_thread(tc);
3673 tc->flushing = false;
3674 }
3675
3676 struct tc_draw_single_drawid {
3677 struct tc_draw_single base;
3678 unsigned drawid_offset;
3679 };
3680
3681 static uint16_t
tc_call_draw_single_drawid(struct pipe_context * pipe,void * call)3682 tc_call_draw_single_drawid(struct pipe_context *pipe, void *call)
3683 {
3684 struct tc_draw_single_drawid *info_drawid = to_call(call, tc_draw_single_drawid);
3685 struct tc_draw_single *info = &info_drawid->base;
3686
3687 /* u_threaded_context stores start/count in min/max_index for single draws. */
3688 /* Drivers using u_threaded_context shouldn't use min/max_index. */
3689 struct pipe_draw_start_count_bias draw;
3690
3691 draw.start = info->info.min_index;
3692 draw.count = info->info.max_index;
3693 draw.index_bias = info->index_bias;
3694
3695 info->info.index_bounds_valid = false;
3696 info->info.has_user_indices = false;
3697 info->info.take_index_buffer_ownership = false;
3698
3699 pipe->draw_vbo(pipe, &info->info, info_drawid->drawid_offset, NULL, &draw, 1);
3700 if (info->info.index_size)
3701 tc_drop_resource_reference(info->info.index.resource);
3702
3703 return call_size(tc_draw_single_drawid);
3704 }
3705
3706 static void
simplify_draw_info(struct pipe_draw_info * info)3707 simplify_draw_info(struct pipe_draw_info *info)
3708 {
3709 /* Clear these fields to facilitate draw merging.
3710 * Drivers shouldn't use them.
3711 */
3712 info->has_user_indices = false;
3713 info->index_bounds_valid = false;
3714 info->take_index_buffer_ownership = false;
3715 info->index_bias_varies = false;
3716 info->_pad = 0;
3717
3718 /* This shouldn't be set when merging single draws. */
3719 info->increment_draw_id = false;
3720
3721 if (info->index_size) {
3722 if (!info->primitive_restart)
3723 info->restart_index = 0;
3724 } else {
3725 assert(!info->primitive_restart);
3726 info->primitive_restart = false;
3727 info->restart_index = 0;
3728 info->index.resource = NULL;
3729 }
3730 }
3731
3732 static bool
is_next_call_a_mergeable_draw(struct tc_draw_single * first,struct tc_draw_single * next)3733 is_next_call_a_mergeable_draw(struct tc_draw_single *first,
3734 struct tc_draw_single *next)
3735 {
3736 if (next->base.call_id != TC_CALL_draw_single)
3737 return false;
3738
3739 STATIC_ASSERT(offsetof(struct pipe_draw_info, min_index) ==
3740 sizeof(struct pipe_draw_info) - 8);
3741 STATIC_ASSERT(offsetof(struct pipe_draw_info, max_index) ==
3742 sizeof(struct pipe_draw_info) - 4);
3743 /* All fields must be the same except start and count. */
3744 /* u_threaded_context stores start/count in min/max_index for single draws. */
3745 return memcmp((uint32_t*)&first->info, (uint32_t*)&next->info,
3746 DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX) == 0;
3747 }
3748
3749 static uint16_t
tc_call_draw_single(struct pipe_context * pipe,void * call)3750 tc_call_draw_single(struct pipe_context *pipe, void *call)
3751 {
3752 /* Draw call merging. */
3753 struct tc_draw_single *first = to_call(call, tc_draw_single);
3754 struct tc_draw_single *next = get_next_call(first, tc_draw_single);
3755
3756 /* If at least 2 consecutive draw calls can be merged... */
3757 if (next->base.call_id == TC_CALL_draw_single) {
3758 if (is_next_call_a_mergeable_draw(first, next)) {
3759 /* The maximum number of merged draws is given by the batch size. */
3760 struct pipe_draw_start_count_bias multi[TC_SLOTS_PER_BATCH / call_size(tc_draw_single)];
3761 unsigned num_draws = 2;
3762 bool index_bias_varies = first->index_bias != next->index_bias;
3763
3764 /* u_threaded_context stores start/count in min/max_index for single draws. */
3765 multi[0].start = first->info.min_index;
3766 multi[0].count = first->info.max_index;
3767 multi[0].index_bias = first->index_bias;
3768 multi[1].start = next->info.min_index;
3769 multi[1].count = next->info.max_index;
3770 multi[1].index_bias = next->index_bias;
3771
3772 /* Find how many other draws can be merged. */
3773 next = get_next_call(next, tc_draw_single);
3774 for (; is_next_call_a_mergeable_draw(first, next);
3775 next = get_next_call(next, tc_draw_single), num_draws++) {
3776 /* u_threaded_context stores start/count in min/max_index for single draws. */
3777 multi[num_draws].start = next->info.min_index;
3778 multi[num_draws].count = next->info.max_index;
3779 multi[num_draws].index_bias = next->index_bias;
3780 index_bias_varies |= first->index_bias != next->index_bias;
3781 }
3782
3783 first->info.index_bias_varies = index_bias_varies;
3784 pipe->draw_vbo(pipe, &first->info, 0, NULL, multi, num_draws);
3785
3786 /* Since all draws use the same index buffer, drop all references at once. */
3787 if (first->info.index_size)
3788 pipe_drop_resource_references(first->info.index.resource, num_draws);
3789
3790 return call_size(tc_draw_single) * num_draws;
3791 }
3792 }
3793
3794 /* u_threaded_context stores start/count in min/max_index for single draws. */
3795 /* Drivers using u_threaded_context shouldn't use min/max_index. */
3796 struct pipe_draw_start_count_bias draw;
3797
3798 draw.start = first->info.min_index;
3799 draw.count = first->info.max_index;
3800 draw.index_bias = first->index_bias;
3801
3802 first->info.index_bounds_valid = false;
3803 first->info.has_user_indices = false;
3804 first->info.take_index_buffer_ownership = false;
3805
3806 pipe->draw_vbo(pipe, &first->info, 0, NULL, &draw, 1);
3807 if (first->info.index_size)
3808 tc_drop_resource_reference(first->info.index.resource);
3809
3810 return call_size(tc_draw_single);
3811 }
3812
3813 struct tc_draw_indirect {
3814 struct tc_call_base base;
3815 struct pipe_draw_start_count_bias draw;
3816 struct pipe_draw_info info;
3817 struct pipe_draw_indirect_info indirect;
3818 };
3819
3820 static uint16_t
tc_call_draw_indirect(struct pipe_context * pipe,void * call)3821 tc_call_draw_indirect(struct pipe_context *pipe, void *call)
3822 {
3823 struct tc_draw_indirect *info = to_call(call, tc_draw_indirect);
3824
3825 info->info.index_bounds_valid = false;
3826 info->info.take_index_buffer_ownership = false;
3827
3828 pipe->draw_vbo(pipe, &info->info, 0, &info->indirect, &info->draw, 1);
3829 if (info->info.index_size)
3830 tc_drop_resource_reference(info->info.index.resource);
3831
3832 tc_drop_resource_reference(info->indirect.buffer);
3833 tc_drop_resource_reference(info->indirect.indirect_draw_count);
3834 tc_drop_so_target_reference(info->indirect.count_from_stream_output);
3835 return call_size(tc_draw_indirect);
3836 }
3837
3838 struct tc_draw_multi {
3839 struct tc_call_base base;
3840 unsigned num_draws;
3841 struct pipe_draw_info info;
3842 struct pipe_draw_start_count_bias slot[]; /* variable-sized array */
3843 };
3844
3845 static uint16_t
tc_call_draw_multi(struct pipe_context * pipe,void * call)3846 tc_call_draw_multi(struct pipe_context *pipe, void *call)
3847 {
3848 struct tc_draw_multi *info = (struct tc_draw_multi*)call;
3849
3850 info->info.has_user_indices = false;
3851 info->info.index_bounds_valid = false;
3852 info->info.take_index_buffer_ownership = false;
3853
3854 pipe->draw_vbo(pipe, &info->info, 0, NULL, info->slot, info->num_draws);
3855 if (info->info.index_size)
3856 tc_drop_resource_reference(info->info.index.resource);
3857
3858 return info->base.num_slots;
3859 }
3860
3861 #define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \
3862 offsetof(struct pipe_draw_info, index)
3863
3864 /* Single draw with drawid_offset == 0. */
3865 static void
tc_draw_single(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3866 tc_draw_single(struct pipe_context *_pipe, const struct pipe_draw_info *info,
3867 unsigned drawid_offset,
3868 const struct pipe_draw_indirect_info *indirect,
3869 const struct pipe_draw_start_count_bias *draws,
3870 unsigned num_draws)
3871 {
3872 struct threaded_context *tc = threaded_context(_pipe);
3873 struct tc_draw_single *p =
3874 tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3875
3876 if (info->index_size) {
3877 if (!info->take_index_buffer_ownership) {
3878 tc_set_resource_reference(&p->info.index.resource,
3879 info->index.resource);
3880 }
3881 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3882 }
3883 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3884 /* u_threaded_context stores start/count in min/max_index for single draws. */
3885 p->info.min_index = draws[0].start;
3886 p->info.max_index = draws[0].count;
3887 p->index_bias = draws[0].index_bias;
3888 simplify_draw_info(&p->info);
3889 }
3890
3891 /* Single draw with drawid_offset > 0. */
3892 static void
tc_draw_single_draw_id(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3893 tc_draw_single_draw_id(struct pipe_context *_pipe,
3894 const struct pipe_draw_info *info,
3895 unsigned drawid_offset,
3896 const struct pipe_draw_indirect_info *indirect,
3897 const struct pipe_draw_start_count_bias *draws,
3898 unsigned num_draws)
3899 {
3900 struct threaded_context *tc = threaded_context(_pipe);
3901 struct tc_draw_single *p =
3902 &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base;
3903
3904 if (info->index_size) {
3905 if (!info->take_index_buffer_ownership) {
3906 tc_set_resource_reference(&p->info.index.resource,
3907 info->index.resource);
3908 }
3909 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3910 }
3911 ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3912 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3913 /* u_threaded_context stores start/count in min/max_index for single draws. */
3914 p->info.min_index = draws[0].start;
3915 p->info.max_index = draws[0].count;
3916 p->index_bias = draws[0].index_bias;
3917 simplify_draw_info(&p->info);
3918 }
3919
3920 /* Single draw with user indices and drawid_offset == 0. */
3921 static void
tc_draw_user_indices_single(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3922 tc_draw_user_indices_single(struct pipe_context *_pipe,
3923 const struct pipe_draw_info *info,
3924 unsigned drawid_offset,
3925 const struct pipe_draw_indirect_info *indirect,
3926 const struct pipe_draw_start_count_bias *draws,
3927 unsigned num_draws)
3928 {
3929 struct threaded_context *tc = threaded_context(_pipe);
3930 unsigned index_size = info->index_size;
3931 unsigned size = draws[0].count * index_size;
3932 struct pipe_resource *buffer = NULL;
3933 unsigned offset;
3934
3935 if (!size)
3936 return;
3937
3938 /* This must be done before adding draw_vbo, because it could generate
3939 * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3940 * to the driver if it was done afterwards.
3941 */
3942 u_upload_data(tc->base.stream_uploader, 0, size, 4,
3943 (uint8_t*)info->index.user + draws[0].start * index_size,
3944 &offset, &buffer);
3945 if (unlikely(!buffer))
3946 return;
3947
3948 struct tc_draw_single *p =
3949 tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3950 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3951 p->info.index.resource = buffer;
3952 /* u_threaded_context stores start/count in min/max_index for single draws. */
3953 p->info.min_index = offset >> util_logbase2(index_size);
3954 p->info.max_index = draws[0].count;
3955 p->index_bias = draws[0].index_bias;
3956 simplify_draw_info(&p->info);
3957 }
3958
3959 /* Single draw with user indices and drawid_offset > 0. */
3960 static void
tc_draw_user_indices_single_draw_id(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3961 tc_draw_user_indices_single_draw_id(struct pipe_context *_pipe,
3962 const struct pipe_draw_info *info,
3963 unsigned drawid_offset,
3964 const struct pipe_draw_indirect_info *indirect,
3965 const struct pipe_draw_start_count_bias *draws,
3966 unsigned num_draws)
3967 {
3968 struct threaded_context *tc = threaded_context(_pipe);
3969 unsigned index_size = info->index_size;
3970 unsigned size = draws[0].count * index_size;
3971 struct pipe_resource *buffer = NULL;
3972 unsigned offset;
3973
3974 if (!size)
3975 return;
3976
3977 /* This must be done before adding draw_vbo, because it could generate
3978 * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3979 * to the driver if it was done afterwards.
3980 */
3981 u_upload_data(tc->base.stream_uploader, 0, size, 4,
3982 (uint8_t*)info->index.user + draws[0].start * index_size,
3983 &offset, &buffer);
3984 if (unlikely(!buffer))
3985 return;
3986
3987 struct tc_draw_single *p =
3988 &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base;
3989 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3990 p->info.index.resource = buffer;
3991 ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3992 /* u_threaded_context stores start/count in min/max_index for single draws. */
3993 p->info.min_index = offset >> util_logbase2(index_size);
3994 p->info.max_index = draws[0].count;
3995 p->index_bias = draws[0].index_bias;
3996 simplify_draw_info(&p->info);
3997 }
3998
3999 #define DRAW_OVERHEAD_BYTES sizeof(struct tc_draw_multi)
4000 #define ONE_DRAW_SLOT_BYTES sizeof(((struct tc_draw_multi*)NULL)->slot[0])
4001
4002 #define SLOTS_FOR_ONE_DRAW \
4003 DIV_ROUND_UP(DRAW_OVERHEAD_BYTES + ONE_DRAW_SLOT_BYTES, \
4004 sizeof(struct tc_call_base))
4005
4006 static void
tc_draw_multi(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4007 tc_draw_multi(struct pipe_context *_pipe, const struct pipe_draw_info *info,
4008 unsigned drawid_offset,
4009 const struct pipe_draw_indirect_info *indirect,
4010 const struct pipe_draw_start_count_bias *draws,
4011 unsigned num_draws)
4012 {
4013 struct threaded_context *tc = threaded_context(_pipe);
4014 int total_offset = 0;
4015 bool take_index_buffer_ownership = info->take_index_buffer_ownership;
4016
4017 while (num_draws) {
4018 struct tc_batch *next = &tc->batch_slots[tc->next];
4019
4020 int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4021 /* If there isn't enough place for one draw, try to fill the next one */
4022 if (nb_slots_left < SLOTS_FOR_ONE_DRAW)
4023 nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4024 const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4025
4026 /* How many draws can we fit in the current batch */
4027 const int dr = MIN2(num_draws, (size_left_bytes - DRAW_OVERHEAD_BYTES) /
4028 ONE_DRAW_SLOT_BYTES);
4029
4030 /* Non-indexed call or indexed with a real index buffer. */
4031 struct tc_draw_multi *p =
4032 tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
4033 dr);
4034 if (info->index_size) {
4035 if (!take_index_buffer_ownership) {
4036 tc_set_resource_reference(&p->info.index.resource,
4037 info->index.resource);
4038 }
4039 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
4040 }
4041 take_index_buffer_ownership = false;
4042 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
4043 p->num_draws = dr;
4044 memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
4045 num_draws -= dr;
4046
4047 total_offset += dr;
4048 }
4049 }
4050
4051 static void
tc_draw_user_indices_multi(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4052 tc_draw_user_indices_multi(struct pipe_context *_pipe,
4053 const struct pipe_draw_info *info,
4054 unsigned drawid_offset,
4055 const struct pipe_draw_indirect_info *indirect,
4056 const struct pipe_draw_start_count_bias *draws,
4057 unsigned num_draws)
4058 {
4059 struct threaded_context *tc = threaded_context(_pipe);
4060 struct pipe_resource *buffer = NULL;
4061 unsigned buffer_offset, total_count = 0;
4062 unsigned index_size_shift = util_logbase2(info->index_size);
4063 uint8_t *ptr = NULL;
4064
4065 /* Get the total count. */
4066 for (unsigned i = 0; i < num_draws; i++)
4067 total_count += draws[i].count;
4068
4069 if (!total_count)
4070 return;
4071
4072 /* Allocate space for all index buffers.
4073 *
4074 * This must be done before adding draw_vbo, because it could generate
4075 * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
4076 * to the driver if it was done afterwards.
4077 */
4078 u_upload_alloc(tc->base.stream_uploader, 0,
4079 total_count << index_size_shift, 4,
4080 &buffer_offset, &buffer, (void**)&ptr);
4081 if (unlikely(!buffer))
4082 return;
4083
4084 int total_offset = 0;
4085 unsigned offset = 0;
4086 while (num_draws) {
4087 struct tc_batch *next = &tc->batch_slots[tc->next];
4088
4089 int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4090 /* If there isn't enough place for one draw, try to fill the next one */
4091 if (nb_slots_left < SLOTS_FOR_ONE_DRAW)
4092 nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4093 const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4094
4095 /* How many draws can we fit in the current batch */
4096 const int dr = MIN2(num_draws, (size_left_bytes - DRAW_OVERHEAD_BYTES) /
4097 ONE_DRAW_SLOT_BYTES);
4098
4099 struct tc_draw_multi *p =
4100 tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
4101 dr);
4102 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
4103
4104 if (total_offset == 0)
4105 /* the first slot inherits the reference from u_upload_alloc() */
4106 p->info.index.resource = buffer;
4107 else
4108 /* all following slots need a new reference */
4109 tc_set_resource_reference(&p->info.index.resource, buffer);
4110
4111 p->num_draws = dr;
4112
4113 /* Upload index buffers. */
4114 for (unsigned i = 0; i < dr; i++) {
4115 unsigned count = draws[i + total_offset].count;
4116
4117 if (!count) {
4118 p->slot[i].start = 0;
4119 p->slot[i].count = 0;
4120 p->slot[i].index_bias = 0;
4121 continue;
4122 }
4123
4124 unsigned size = count << index_size_shift;
4125 memcpy(ptr + offset,
4126 (uint8_t*)info->index.user +
4127 (draws[i + total_offset].start << index_size_shift), size);
4128 p->slot[i].start = (buffer_offset + offset) >> index_size_shift;
4129 p->slot[i].count = count;
4130 p->slot[i].index_bias = draws[i + total_offset].index_bias;
4131 offset += size;
4132 }
4133
4134 total_offset += dr;
4135 num_draws -= dr;
4136 }
4137 }
4138
4139 static void
tc_draw_indirect(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4140 tc_draw_indirect(struct pipe_context *_pipe, const struct pipe_draw_info *info,
4141 unsigned drawid_offset,
4142 const struct pipe_draw_indirect_info *indirect,
4143 const struct pipe_draw_start_count_bias *draws,
4144 unsigned num_draws)
4145 {
4146 struct threaded_context *tc = threaded_context(_pipe);
4147 assert(!info->has_user_indices);
4148 assert(num_draws == 1);
4149
4150 struct tc_draw_indirect *p =
4151 tc_add_call(tc, TC_CALL_draw_indirect, tc_draw_indirect);
4152 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
4153
4154 if (info->index_size) {
4155 if (!info->take_index_buffer_ownership) {
4156 tc_set_resource_reference(&p->info.index.resource,
4157 info->index.resource);
4158 }
4159 tc_add_to_buffer_list(next, info->index.resource);
4160 }
4161 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
4162
4163 tc_set_resource_reference(&p->indirect.buffer, indirect->buffer);
4164 tc_set_resource_reference(&p->indirect.indirect_draw_count,
4165 indirect->indirect_draw_count);
4166 p->indirect.count_from_stream_output = NULL;
4167 pipe_so_target_reference(&p->indirect.count_from_stream_output,
4168 indirect->count_from_stream_output);
4169
4170 if (indirect->buffer)
4171 tc_add_to_buffer_list(next, indirect->buffer);
4172 if (indirect->indirect_draw_count)
4173 tc_add_to_buffer_list(next, indirect->indirect_draw_count);
4174 if (indirect->count_from_stream_output)
4175 tc_add_to_buffer_list(next, indirect->count_from_stream_output->buffer);
4176
4177 memcpy(&p->indirect, indirect, sizeof(*indirect));
4178 p->draw.start = draws[0].start;
4179 }
4180
4181 /* Dispatch table for tc_draw_vbo:
4182 *
4183 * Indexed by:
4184 * [is_indirect * 8 + index_size_and_has_user_indices * 4 +
4185 * is_multi_draw * 2 + non_zero_draw_id]
4186 */
4187 static pipe_draw_func draw_funcs[16] = {
4188 tc_draw_single,
4189 tc_draw_single_draw_id,
4190 tc_draw_multi,
4191 tc_draw_multi,
4192 tc_draw_user_indices_single,
4193 tc_draw_user_indices_single_draw_id,
4194 tc_draw_user_indices_multi,
4195 tc_draw_user_indices_multi,
4196 tc_draw_indirect,
4197 tc_draw_indirect,
4198 tc_draw_indirect,
4199 tc_draw_indirect,
4200 tc_draw_indirect,
4201 tc_draw_indirect,
4202 tc_draw_indirect,
4203 tc_draw_indirect,
4204 };
4205
4206 void
tc_draw_vbo(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4207 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
4208 unsigned drawid_offset,
4209 const struct pipe_draw_indirect_info *indirect,
4210 const struct pipe_draw_start_count_bias *draws,
4211 unsigned num_draws)
4212 {
4213 STATIC_ASSERT(DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX +
4214 sizeof(intptr_t) == offsetof(struct pipe_draw_info, min_index));
4215
4216 struct threaded_context *tc = threaded_context(_pipe);
4217 if (tc->options.parse_renderpass_info)
4218 tc_parse_draw(tc);
4219
4220 /* Use a function table to call the desired variant of draw_vbo. */
4221 unsigned index = (indirect != NULL) * 8 +
4222 (info->index_size && info->has_user_indices) * 4 +
4223 (num_draws > 1) * 2 + (drawid_offset != 0);
4224 draw_funcs[index](_pipe, info, drawid_offset, indirect, draws, num_draws);
4225
4226 /* This must be after tc_add_*call, which can flush the batch. */
4227 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4228 tc_add_all_gfx_bindings_to_buffer_list(tc);
4229 }
4230
4231 struct tc_draw_single *
tc_add_draw_single_call(struct pipe_context * _pipe,struct pipe_resource * index_bo)4232 tc_add_draw_single_call(struct pipe_context *_pipe,
4233 struct pipe_resource *index_bo)
4234 {
4235 struct threaded_context *tc = threaded_context(_pipe);
4236
4237 if (tc->options.parse_renderpass_info)
4238 tc_parse_draw(tc);
4239
4240 struct tc_draw_single *p =
4241 tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
4242
4243 if (index_bo)
4244 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], index_bo);
4245
4246 /* This must be after tc_add_*call, which can flush the batch. */
4247 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4248 tc_add_all_gfx_bindings_to_buffer_list(tc);
4249
4250 return p;
4251 }
4252
4253 struct tc_draw_vstate_single {
4254 struct tc_call_base base;
4255 struct pipe_draw_start_count_bias draw;
4256
4257 /* The following states must be together without holes because they are
4258 * compared by draw merging.
4259 */
4260 struct pipe_vertex_state *state;
4261 uint32_t partial_velem_mask;
4262 struct pipe_draw_vertex_state_info info;
4263 };
4264
4265 static bool
is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single * first,struct tc_draw_vstate_single * next)4266 is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single *first,
4267 struct tc_draw_vstate_single *next)
4268 {
4269 if (next->base.call_id != TC_CALL_draw_vstate_single)
4270 return false;
4271
4272 return !memcmp(&first->state, &next->state,
4273 offsetof(struct tc_draw_vstate_single, info) +
4274 sizeof(struct pipe_draw_vertex_state_info) -
4275 offsetof(struct tc_draw_vstate_single, state));
4276 }
4277
4278 static uint16_t
tc_call_draw_vstate_single(struct pipe_context * pipe,void * call)4279 tc_call_draw_vstate_single(struct pipe_context *pipe, void *call)
4280 {
4281 /* Draw call merging. */
4282 struct tc_draw_vstate_single *first = to_call(call, tc_draw_vstate_single);
4283 struct tc_draw_vstate_single *next = get_next_call(first, tc_draw_vstate_single);
4284
4285 /* If at least 2 consecutive draw calls can be merged... */
4286 if (is_next_call_a_mergeable_draw_vstate(first, next)) {
4287 /* The maximum number of merged draws is given by the batch size. */
4288 struct pipe_draw_start_count_bias draws[TC_SLOTS_PER_BATCH /
4289 call_size(tc_draw_vstate_single)];
4290 unsigned num_draws = 2;
4291
4292 draws[0] = first->draw;
4293 draws[1] = next->draw;
4294
4295 /* Find how many other draws can be merged. */
4296 next = get_next_call(next, tc_draw_vstate_single);
4297 for (; is_next_call_a_mergeable_draw_vstate(first, next);
4298 next = get_next_call(next, tc_draw_vstate_single),
4299 num_draws++)
4300 draws[num_draws] = next->draw;
4301
4302 pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
4303 first->info, draws, num_draws);
4304 /* Since all draws use the same state, drop all references at once. */
4305 tc_drop_vertex_state_references(first->state, num_draws);
4306
4307 return call_size(tc_draw_vstate_single) * num_draws;
4308 }
4309
4310 pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
4311 first->info, &first->draw, 1);
4312 tc_drop_vertex_state_references(first->state, 1);
4313 return call_size(tc_draw_vstate_single);
4314 }
4315
4316 struct tc_draw_vstate_multi {
4317 struct tc_call_base base;
4318 uint32_t partial_velem_mask;
4319 struct pipe_draw_vertex_state_info info;
4320 unsigned num_draws;
4321 struct pipe_vertex_state *state;
4322 struct pipe_draw_start_count_bias slot[0];
4323 };
4324
4325 static uint16_t
tc_call_draw_vstate_multi(struct pipe_context * pipe,void * call)4326 tc_call_draw_vstate_multi(struct pipe_context *pipe, void *call)
4327 {
4328 struct tc_draw_vstate_multi *info = (struct tc_draw_vstate_multi*)call;
4329
4330 pipe->draw_vertex_state(pipe, info->state, info->partial_velem_mask,
4331 info->info, info->slot, info->num_draws);
4332 tc_drop_vertex_state_references(info->state, 1);
4333 return info->base.num_slots;
4334 }
4335
4336 static void
tc_draw_vertex_state(struct pipe_context * _pipe,struct pipe_vertex_state * state,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4337 tc_draw_vertex_state(struct pipe_context *_pipe,
4338 struct pipe_vertex_state *state,
4339 uint32_t partial_velem_mask,
4340 struct pipe_draw_vertex_state_info info,
4341 const struct pipe_draw_start_count_bias *draws,
4342 unsigned num_draws)
4343 {
4344 struct threaded_context *tc = threaded_context(_pipe);
4345 if (tc->options.parse_renderpass_info)
4346 tc_parse_draw(tc);
4347
4348 if (num_draws == 1) {
4349 /* Single draw. */
4350 struct tc_draw_vstate_single *p =
4351 tc_add_call(tc, TC_CALL_draw_vstate_single, tc_draw_vstate_single);
4352 p->partial_velem_mask = partial_velem_mask;
4353 p->draw = draws[0];
4354 p->info.mode = info.mode;
4355 p->info.take_vertex_state_ownership = false;
4356
4357 /* This should be always 0 for simplicity because we assume that
4358 * index_bias doesn't vary.
4359 */
4360 assert(draws[0].index_bias == 0);
4361
4362 if (!info.take_vertex_state_ownership)
4363 tc_set_vertex_state_reference(&p->state, state);
4364 else
4365 p->state = state;
4366
4367
4368 /* This must be after tc_add_*call, which can flush the batch. */
4369 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4370 tc_add_all_gfx_bindings_to_buffer_list(tc);
4371 return;
4372 }
4373
4374 const int draw_overhead_bytes = sizeof(struct tc_draw_vstate_multi);
4375 const int one_draw_slot_bytes = sizeof(((struct tc_draw_vstate_multi*)NULL)->slot[0]);
4376 const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
4377 sizeof(struct tc_call_base));
4378 /* Multi draw. */
4379 int total_offset = 0;
4380 bool take_vertex_state_ownership = info.take_vertex_state_ownership;
4381 while (num_draws) {
4382 struct tc_batch *next = &tc->batch_slots[tc->next];
4383
4384 int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4385 /* If there isn't enough place for one draw, try to fill the next one */
4386 if (nb_slots_left < slots_for_one_draw)
4387 nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4388 const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4389
4390 /* How many draws can we fit in the current batch */
4391 const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
4392
4393 /* Non-indexed call or indexed with a real index buffer. */
4394 struct tc_draw_vstate_multi *p =
4395 tc_add_slot_based_call(tc, TC_CALL_draw_vstate_multi, tc_draw_vstate_multi, dr);
4396
4397 if (!take_vertex_state_ownership)
4398 tc_set_vertex_state_reference(&p->state, state);
4399 else
4400 p->state = state;
4401
4402 take_vertex_state_ownership = false;
4403 p->partial_velem_mask = partial_velem_mask;
4404 p->info.mode = info.mode;
4405 p->info.take_vertex_state_ownership = false;
4406 p->num_draws = dr;
4407 memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
4408 num_draws -= dr;
4409
4410 total_offset += dr;
4411 }
4412
4413
4414 /* This must be after tc_add_*call, which can flush the batch. */
4415 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4416 tc_add_all_gfx_bindings_to_buffer_list(tc);
4417 }
4418
4419 struct tc_launch_grid_call {
4420 struct tc_call_base base;
4421 struct pipe_grid_info info;
4422 };
4423
4424 static uint16_t
tc_call_launch_grid(struct pipe_context * pipe,void * call)4425 tc_call_launch_grid(struct pipe_context *pipe, void *call)
4426 {
4427 struct pipe_grid_info *p = &to_call(call, tc_launch_grid_call)->info;
4428
4429 pipe->launch_grid(pipe, p);
4430 tc_drop_resource_reference(p->indirect);
4431 return call_size(tc_launch_grid_call);
4432 }
4433
4434 static void
tc_launch_grid(struct pipe_context * _pipe,const struct pipe_grid_info * info)4435 tc_launch_grid(struct pipe_context *_pipe,
4436 const struct pipe_grid_info *info)
4437 {
4438 struct threaded_context *tc = threaded_context(_pipe);
4439 struct tc_launch_grid_call *p = tc_add_call(tc, TC_CALL_launch_grid,
4440 tc_launch_grid_call);
4441 assert(info->input == NULL);
4442
4443 tc_set_resource_reference(&p->info.indirect, info->indirect);
4444 memcpy(&p->info, info, sizeof(*info));
4445
4446 if (info->indirect)
4447 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->indirect);
4448
4449 /* This must be after tc_add_*call, which can flush the batch. */
4450 if (unlikely(tc->add_all_compute_bindings_to_buffer_list))
4451 tc_add_all_compute_bindings_to_buffer_list(tc);
4452 }
4453
4454 static uint16_t
tc_call_resource_copy_region(struct pipe_context * pipe,void * call)4455 tc_call_resource_copy_region(struct pipe_context *pipe, void *call)
4456 {
4457 struct tc_resource_copy_region *p = to_call(call, tc_resource_copy_region);
4458
4459 pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
4460 p->dstz, p->src, p->src_level, &p->src_box);
4461 tc_drop_resource_reference(p->dst);
4462 tc_drop_resource_reference(p->src);
4463 return call_size(tc_resource_copy_region);
4464 }
4465
4466 static void
tc_resource_copy_region(struct pipe_context * _pipe,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)4467 tc_resource_copy_region(struct pipe_context *_pipe,
4468 struct pipe_resource *dst, unsigned dst_level,
4469 unsigned dstx, unsigned dsty, unsigned dstz,
4470 struct pipe_resource *src, unsigned src_level,
4471 const struct pipe_box *src_box)
4472 {
4473 struct threaded_context *tc = threaded_context(_pipe);
4474 struct threaded_resource *tdst = threaded_resource(dst);
4475 struct tc_resource_copy_region *p =
4476 tc_add_call(tc, TC_CALL_resource_copy_region,
4477 tc_resource_copy_region);
4478
4479 if (dst->target == PIPE_BUFFER)
4480 tc_buffer_disable_cpu_storage(dst);
4481
4482 tc_set_resource_batch_usage(tc, dst);
4483 tc_set_resource_reference(&p->dst, dst);
4484 p->dst_level = dst_level;
4485 p->dstx = dstx;
4486 p->dsty = dsty;
4487 p->dstz = dstz;
4488 tc_set_resource_batch_usage(tc, src);
4489 tc_set_resource_reference(&p->src, src);
4490 p->src_level = src_level;
4491 p->src_box = *src_box;
4492
4493 if (dst->target == PIPE_BUFFER) {
4494 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
4495
4496 tc_add_to_buffer_list(next, src);
4497 tc_add_to_buffer_list(next, dst);
4498
4499 util_range_add(&tdst->b, &tdst->valid_buffer_range,
4500 dstx, dstx + src_box->width);
4501 }
4502 }
4503
4504 struct tc_blit_call {
4505 struct tc_call_base base;
4506 struct pipe_blit_info info;
4507 };
4508
4509 static uint16_t
tc_call_blit(struct pipe_context * pipe,void * call)4510 tc_call_blit(struct pipe_context *pipe, void *call)
4511 {
4512 struct pipe_blit_info *blit = &to_call(call, tc_blit_call)->info;
4513
4514 pipe->blit(pipe, blit);
4515 tc_drop_resource_reference(blit->dst.resource);
4516 tc_drop_resource_reference(blit->src.resource);
4517 return call_size(tc_blit_call);
4518 }
4519
4520 static void
tc_blit_enqueue(struct threaded_context * tc,const struct pipe_blit_info * info)4521 tc_blit_enqueue(struct threaded_context *tc, const struct pipe_blit_info *info)
4522 {
4523 struct tc_blit_call *blit = tc_add_call(tc, TC_CALL_blit, tc_blit_call);
4524
4525 tc_set_resource_batch_usage(tc, info->dst.resource);
4526 tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource);
4527 tc_set_resource_batch_usage(tc, info->src.resource);
4528 tc_set_resource_reference(&blit->info.src.resource, info->src.resource);
4529 memcpy(&blit->info, info, sizeof(*info));
4530 }
4531
4532 static void
tc_blit(struct pipe_context * _pipe,const struct pipe_blit_info * info)4533 tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
4534 {
4535 struct threaded_context *tc = threaded_context(_pipe);
4536
4537 /* filter out untracked non-resolves */
4538 if (!tc->options.parse_renderpass_info ||
4539 info->src.resource->nr_samples <= 1 ||
4540 info->dst.resource->nr_samples > 1) {
4541 tc_blit_enqueue(tc, info);
4542 return;
4543 }
4544
4545 if (tc->fb_resolve == info->dst.resource) {
4546 /* optimize out this blit entirely */
4547 tc->renderpass_info_recording->has_resolve = true;
4548 return;
4549 }
4550 for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
4551 if (tc->fb_resources[i] == info->src.resource) {
4552 tc->renderpass_info_recording->has_resolve = true;
4553 break;
4554 }
4555 }
4556 tc_blit_enqueue(tc, info);
4557 }
4558
4559 struct tc_generate_mipmap {
4560 struct tc_call_base base;
4561 enum pipe_format format;
4562 unsigned base_level;
4563 unsigned last_level;
4564 unsigned first_layer;
4565 unsigned last_layer;
4566 struct pipe_resource *res;
4567 };
4568
4569 static uint16_t
tc_call_generate_mipmap(struct pipe_context * pipe,void * call)4570 tc_call_generate_mipmap(struct pipe_context *pipe, void *call)
4571 {
4572 struct tc_generate_mipmap *p = to_call(call, tc_generate_mipmap);
4573 ASSERTED bool result = pipe->generate_mipmap(pipe, p->res, p->format,
4574 p->base_level,
4575 p->last_level,
4576 p->first_layer,
4577 p->last_layer);
4578 assert(result);
4579 tc_drop_resource_reference(p->res);
4580 return call_size(tc_generate_mipmap);
4581 }
4582
4583 static bool
tc_generate_mipmap(struct pipe_context * _pipe,struct pipe_resource * res,enum pipe_format format,unsigned base_level,unsigned last_level,unsigned first_layer,unsigned last_layer)4584 tc_generate_mipmap(struct pipe_context *_pipe,
4585 struct pipe_resource *res,
4586 enum pipe_format format,
4587 unsigned base_level,
4588 unsigned last_level,
4589 unsigned first_layer,
4590 unsigned last_layer)
4591 {
4592 struct threaded_context *tc = threaded_context(_pipe);
4593 struct pipe_context *pipe = tc->pipe;
4594 struct pipe_screen *screen = pipe->screen;
4595 unsigned bind = PIPE_BIND_SAMPLER_VIEW;
4596
4597 if (util_format_is_depth_or_stencil(format))
4598 bind = PIPE_BIND_DEPTH_STENCIL;
4599 else
4600 bind = PIPE_BIND_RENDER_TARGET;
4601
4602 if (!screen->is_format_supported(screen, format, res->target,
4603 res->nr_samples, res->nr_storage_samples,
4604 bind))
4605 return false;
4606
4607 struct tc_generate_mipmap *p =
4608 tc_add_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
4609
4610 tc_set_resource_batch_usage(tc, res);
4611 tc_set_resource_reference(&p->res, res);
4612 p->format = format;
4613 p->base_level = base_level;
4614 p->last_level = last_level;
4615 p->first_layer = first_layer;
4616 p->last_layer = last_layer;
4617 return true;
4618 }
4619
4620 struct tc_resource_call {
4621 struct tc_call_base base;
4622 struct pipe_resource *resource;
4623 };
4624
4625 static uint16_t
tc_call_flush_resource(struct pipe_context * pipe,void * call)4626 tc_call_flush_resource(struct pipe_context *pipe, void *call)
4627 {
4628 struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
4629
4630 pipe->flush_resource(pipe, resource);
4631 tc_drop_resource_reference(resource);
4632 return call_size(tc_resource_call);
4633 }
4634
4635 static void
tc_flush_resource(struct pipe_context * _pipe,struct pipe_resource * resource)4636 tc_flush_resource(struct pipe_context *_pipe, struct pipe_resource *resource)
4637 {
4638 struct threaded_context *tc = threaded_context(_pipe);
4639 struct tc_resource_call *call = tc_add_call(tc, TC_CALL_flush_resource,
4640 tc_resource_call);
4641
4642 tc_set_resource_batch_usage(tc, resource);
4643 tc_set_resource_reference(&call->resource, resource);
4644 }
4645
4646 static uint16_t
tc_call_invalidate_resource(struct pipe_context * pipe,void * call)4647 tc_call_invalidate_resource(struct pipe_context *pipe, void *call)
4648 {
4649 struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
4650
4651 pipe->invalidate_resource(pipe, resource);
4652 tc_drop_resource_reference(resource);
4653 return call_size(tc_resource_call);
4654 }
4655
4656 static void
tc_invalidate_resource(struct pipe_context * _pipe,struct pipe_resource * resource)4657 tc_invalidate_resource(struct pipe_context *_pipe,
4658 struct pipe_resource *resource)
4659 {
4660 struct threaded_context *tc = threaded_context(_pipe);
4661
4662 if (resource->target == PIPE_BUFFER) {
4663 tc_invalidate_buffer(tc, threaded_resource(resource));
4664 return;
4665 }
4666
4667 struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource,
4668 tc_resource_call);
4669 tc_set_resource_batch_usage(tc, resource);
4670 tc_set_resource_reference(&call->resource, resource);
4671
4672 struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4673 if (info) {
4674 if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] == resource) {
4675 info->zsbuf_invalidate = true;
4676 } else {
4677 for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
4678 if (tc->fb_resources[i] == resource)
4679 info->cbuf_invalidate |= BITFIELD_BIT(i);
4680 }
4681 }
4682 }
4683 }
4684
4685 struct tc_clear {
4686 struct tc_call_base base;
4687 bool scissor_state_set;
4688 uint8_t stencil;
4689 uint16_t buffers;
4690 float depth;
4691 struct pipe_scissor_state scissor_state;
4692 union pipe_color_union color;
4693 };
4694
4695 static uint16_t
tc_call_clear(struct pipe_context * pipe,void * call)4696 tc_call_clear(struct pipe_context *pipe, void *call)
4697 {
4698 struct tc_clear *p = to_call(call, tc_clear);
4699
4700 pipe->clear(pipe, p->buffers, p->scissor_state_set ? &p->scissor_state : NULL, &p->color, p->depth, p->stencil);
4701 return call_size(tc_clear);
4702 }
4703
4704 static void
tc_clear(struct pipe_context * _pipe,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)4705 tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor_state *scissor_state,
4706 const union pipe_color_union *color, double depth,
4707 unsigned stencil)
4708 {
4709 struct threaded_context *tc = threaded_context(_pipe);
4710 struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear);
4711
4712 p->buffers = buffers;
4713 if (scissor_state) {
4714 p->scissor_state = *scissor_state;
4715 struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4716 /* partial clear info is useful for drivers to know whether any zs writes occur;
4717 * drivers are responsible for optimizing partial clear -> full clear
4718 */
4719 if (info && buffers & PIPE_CLEAR_DEPTHSTENCIL)
4720 info->zsbuf_clear_partial |= !info->zsbuf_clear;
4721 } else {
4722 struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4723 if (info) {
4724 /* full clears use a different load operation, but are only valid if draws haven't occurred yet */
4725 info->cbuf_clear |= (buffers >> 2) & ~info->cbuf_load;
4726 if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
4727 if (!info->zsbuf_load && !info->zsbuf_clear_partial)
4728 info->zsbuf_clear = true;
4729 else if (!info->zsbuf_clear)
4730 /* this is a clear that occurred after a draw: flag as partial to ensure it isn't ignored */
4731 info->zsbuf_clear_partial = true;
4732 }
4733 }
4734 }
4735 p->scissor_state_set = !!scissor_state;
4736 p->color = *color;
4737 p->depth = depth;
4738 p->stencil = stencil;
4739 }
4740
4741 struct tc_clear_render_target {
4742 struct tc_call_base base;
4743 bool render_condition_enabled;
4744 unsigned dstx;
4745 unsigned dsty;
4746 unsigned width;
4747 unsigned height;
4748 union pipe_color_union color;
4749 struct pipe_surface *dst;
4750 };
4751
4752 static uint16_t
tc_call_clear_render_target(struct pipe_context * pipe,void * call)4753 tc_call_clear_render_target(struct pipe_context *pipe, void *call)
4754 {
4755 struct tc_clear_render_target *p = to_call(call, tc_clear_render_target);
4756
4757 pipe->clear_render_target(pipe, p->dst, &p->color, p->dstx, p->dsty, p->width, p->height,
4758 p->render_condition_enabled);
4759 tc_drop_surface_reference(p->dst);
4760 return call_size(tc_clear_render_target);
4761 }
4762
4763 static void
tc_clear_render_target(struct pipe_context * _pipe,struct pipe_surface * dst,const union pipe_color_union * color,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)4764 tc_clear_render_target(struct pipe_context *_pipe,
4765 struct pipe_surface *dst,
4766 const union pipe_color_union *color,
4767 unsigned dstx, unsigned dsty,
4768 unsigned width, unsigned height,
4769 bool render_condition_enabled)
4770 {
4771 struct threaded_context *tc = threaded_context(_pipe);
4772 struct tc_clear_render_target *p = tc_add_call(tc, TC_CALL_clear_render_target, tc_clear_render_target);
4773 p->dst = NULL;
4774 pipe_surface_reference(&p->dst, dst);
4775 p->color = *color;
4776 p->dstx = dstx;
4777 p->dsty = dsty;
4778 p->width = width;
4779 p->height = height;
4780 p->render_condition_enabled = render_condition_enabled;
4781 }
4782
4783
4784 struct tc_clear_depth_stencil {
4785 struct tc_call_base base;
4786 bool render_condition_enabled;
4787 float depth;
4788 unsigned clear_flags;
4789 unsigned stencil;
4790 unsigned dstx;
4791 unsigned dsty;
4792 unsigned width;
4793 unsigned height;
4794 struct pipe_surface *dst;
4795 };
4796
4797
4798 static uint16_t
tc_call_clear_depth_stencil(struct pipe_context * pipe,void * call)4799 tc_call_clear_depth_stencil(struct pipe_context *pipe, void *call)
4800 {
4801 struct tc_clear_depth_stencil *p = to_call(call, tc_clear_depth_stencil);
4802
4803 pipe->clear_depth_stencil(pipe, p->dst, p->clear_flags, p->depth, p->stencil,
4804 p->dstx, p->dsty, p->width, p->height,
4805 p->render_condition_enabled);
4806 tc_drop_surface_reference(p->dst);
4807 return call_size(tc_clear_depth_stencil);
4808 }
4809
4810 static void
tc_clear_depth_stencil(struct pipe_context * _pipe,struct pipe_surface * dst,unsigned clear_flags,double depth,unsigned stencil,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)4811 tc_clear_depth_stencil(struct pipe_context *_pipe,
4812 struct pipe_surface *dst, unsigned clear_flags,
4813 double depth, unsigned stencil, unsigned dstx,
4814 unsigned dsty, unsigned width, unsigned height,
4815 bool render_condition_enabled)
4816 {
4817 struct threaded_context *tc = threaded_context(_pipe);
4818 struct tc_clear_depth_stencil *p = tc_add_call(tc, TC_CALL_clear_depth_stencil, tc_clear_depth_stencil);
4819 p->dst = NULL;
4820 pipe_surface_reference(&p->dst, dst);
4821 p->clear_flags = clear_flags;
4822 p->depth = depth;
4823 p->stencil = stencil;
4824 p->dstx = dstx;
4825 p->dsty = dsty;
4826 p->width = width;
4827 p->height = height;
4828 p->render_condition_enabled = render_condition_enabled;
4829 }
4830
4831 struct tc_clear_buffer {
4832 struct tc_call_base base;
4833 uint8_t clear_value_size;
4834 unsigned offset;
4835 unsigned size;
4836 char clear_value[16];
4837 struct pipe_resource *res;
4838 };
4839
4840 static uint16_t
tc_call_clear_buffer(struct pipe_context * pipe,void * call)4841 tc_call_clear_buffer(struct pipe_context *pipe, void *call)
4842 {
4843 struct tc_clear_buffer *p = to_call(call, tc_clear_buffer);
4844
4845 pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
4846 p->clear_value_size);
4847 tc_drop_resource_reference(p->res);
4848 return call_size(tc_clear_buffer);
4849 }
4850
4851 static void
tc_clear_buffer(struct pipe_context * _pipe,struct pipe_resource * res,unsigned offset,unsigned size,const void * clear_value,int clear_value_size)4852 tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
4853 unsigned offset, unsigned size,
4854 const void *clear_value, int clear_value_size)
4855 {
4856 struct threaded_context *tc = threaded_context(_pipe);
4857 struct threaded_resource *tres = threaded_resource(res);
4858 struct tc_clear_buffer *p =
4859 tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
4860
4861 tc_buffer_disable_cpu_storage(res);
4862
4863 tc_set_resource_reference(&p->res, res);
4864 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], res);
4865 p->offset = offset;
4866 p->size = size;
4867 memcpy(p->clear_value, clear_value, clear_value_size);
4868 p->clear_value_size = clear_value_size;
4869
4870 util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
4871 }
4872
4873 struct tc_clear_texture {
4874 struct tc_call_base base;
4875 unsigned level;
4876 struct pipe_box box;
4877 char data[16];
4878 struct pipe_resource *res;
4879 };
4880
4881 static uint16_t
tc_call_clear_texture(struct pipe_context * pipe,void * call)4882 tc_call_clear_texture(struct pipe_context *pipe, void *call)
4883 {
4884 struct tc_clear_texture *p = to_call(call, tc_clear_texture);
4885
4886 pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
4887 tc_drop_resource_reference(p->res);
4888 return call_size(tc_clear_texture);
4889 }
4890
4891 static void
tc_clear_texture(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,const struct pipe_box * box,const void * data)4892 tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
4893 unsigned level, const struct pipe_box *box, const void *data)
4894 {
4895 struct threaded_context *tc = threaded_context(_pipe);
4896 struct tc_clear_texture *p =
4897 tc_add_call(tc, TC_CALL_clear_texture, tc_clear_texture);
4898
4899 tc_set_resource_batch_usage(tc, res);
4900 tc_set_resource_reference(&p->res, res);
4901 p->level = level;
4902 p->box = *box;
4903 memcpy(p->data, data,
4904 util_format_get_blocksize(res->format));
4905 }
4906
4907 struct tc_resource_commit {
4908 struct tc_call_base base;
4909 bool commit;
4910 unsigned level;
4911 struct pipe_box box;
4912 struct pipe_resource *res;
4913 };
4914
4915 static uint16_t
tc_call_resource_commit(struct pipe_context * pipe,void * call)4916 tc_call_resource_commit(struct pipe_context *pipe, void *call)
4917 {
4918 struct tc_resource_commit *p = to_call(call, tc_resource_commit);
4919
4920 pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
4921 tc_drop_resource_reference(p->res);
4922 return call_size(tc_resource_commit);
4923 }
4924
4925 static bool
tc_resource_commit(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,struct pipe_box * box,bool commit)4926 tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
4927 unsigned level, struct pipe_box *box, bool commit)
4928 {
4929 struct threaded_context *tc = threaded_context(_pipe);
4930 struct tc_resource_commit *p =
4931 tc_add_call(tc, TC_CALL_resource_commit, tc_resource_commit);
4932
4933 tc_set_resource_reference(&p->res, res);
4934 tc_set_resource_batch_usage(tc, res);
4935 p->level = level;
4936 p->box = *box;
4937 p->commit = commit;
4938 return true; /* we don't care about the return value for this call */
4939 }
4940
4941 static unsigned
tc_init_intel_perf_query_info(struct pipe_context * _pipe)4942 tc_init_intel_perf_query_info(struct pipe_context *_pipe)
4943 {
4944 struct threaded_context *tc = threaded_context(_pipe);
4945 struct pipe_context *pipe = tc->pipe;
4946
4947 return pipe->init_intel_perf_query_info(pipe);
4948 }
4949
4950 static void
tc_get_intel_perf_query_info(struct pipe_context * _pipe,unsigned query_index,const char ** name,uint32_t * data_size,uint32_t * n_counters,uint32_t * n_active)4951 tc_get_intel_perf_query_info(struct pipe_context *_pipe,
4952 unsigned query_index,
4953 const char **name,
4954 uint32_t *data_size,
4955 uint32_t *n_counters,
4956 uint32_t *n_active)
4957 {
4958 struct threaded_context *tc = threaded_context(_pipe);
4959 struct pipe_context *pipe = tc->pipe;
4960
4961 tc_sync(tc); /* n_active vs begin/end_intel_perf_query */
4962 pipe->get_intel_perf_query_info(pipe, query_index, name, data_size,
4963 n_counters, n_active);
4964 }
4965
4966 static void
tc_get_intel_perf_query_counter_info(struct pipe_context * _pipe,unsigned query_index,unsigned counter_index,const char ** name,const char ** desc,uint32_t * offset,uint32_t * data_size,uint32_t * type_enum,uint32_t * data_type_enum,uint64_t * raw_max)4967 tc_get_intel_perf_query_counter_info(struct pipe_context *_pipe,
4968 unsigned query_index,
4969 unsigned counter_index,
4970 const char **name,
4971 const char **desc,
4972 uint32_t *offset,
4973 uint32_t *data_size,
4974 uint32_t *type_enum,
4975 uint32_t *data_type_enum,
4976 uint64_t *raw_max)
4977 {
4978 struct threaded_context *tc = threaded_context(_pipe);
4979 struct pipe_context *pipe = tc->pipe;
4980
4981 pipe->get_intel_perf_query_counter_info(pipe, query_index, counter_index,
4982 name, desc, offset, data_size, type_enum, data_type_enum, raw_max);
4983 }
4984
4985 static struct pipe_query *
tc_new_intel_perf_query_obj(struct pipe_context * _pipe,unsigned query_index)4986 tc_new_intel_perf_query_obj(struct pipe_context *_pipe, unsigned query_index)
4987 {
4988 struct threaded_context *tc = threaded_context(_pipe);
4989 struct pipe_context *pipe = tc->pipe;
4990
4991 return pipe->new_intel_perf_query_obj(pipe, query_index);
4992 }
4993
4994 static uint16_t
tc_call_begin_intel_perf_query(struct pipe_context * pipe,void * call)4995 tc_call_begin_intel_perf_query(struct pipe_context *pipe, void *call)
4996 {
4997 (void)pipe->begin_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4998 return call_size(tc_query_call);
4999 }
5000
5001 static bool
tc_begin_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)5002 tc_begin_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
5003 {
5004 struct threaded_context *tc = threaded_context(_pipe);
5005
5006 tc_add_call(tc, TC_CALL_begin_intel_perf_query, tc_query_call)->query = q;
5007
5008 /* assume success, begin failure can be signaled from get_intel_perf_query_data */
5009 return true;
5010 }
5011
5012 static uint16_t
tc_call_end_intel_perf_query(struct pipe_context * pipe,void * call)5013 tc_call_end_intel_perf_query(struct pipe_context *pipe, void *call)
5014 {
5015 pipe->end_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
5016 return call_size(tc_query_call);
5017 }
5018
5019 static void
tc_end_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)5020 tc_end_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
5021 {
5022 struct threaded_context *tc = threaded_context(_pipe);
5023
5024 tc_add_call(tc, TC_CALL_end_intel_perf_query, tc_query_call)->query = q;
5025 }
5026
5027 static void
tc_delete_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)5028 tc_delete_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
5029 {
5030 struct threaded_context *tc = threaded_context(_pipe);
5031 struct pipe_context *pipe = tc->pipe;
5032
5033 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5034 pipe->delete_intel_perf_query(pipe, q);
5035 }
5036
5037 static void
tc_wait_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)5038 tc_wait_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
5039 {
5040 struct threaded_context *tc = threaded_context(_pipe);
5041 struct pipe_context *pipe = tc->pipe;
5042
5043 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5044 pipe->wait_intel_perf_query(pipe, q);
5045 }
5046
5047 static bool
tc_is_intel_perf_query_ready(struct pipe_context * _pipe,struct pipe_query * q)5048 tc_is_intel_perf_query_ready(struct pipe_context *_pipe, struct pipe_query *q)
5049 {
5050 struct threaded_context *tc = threaded_context(_pipe);
5051 struct pipe_context *pipe = tc->pipe;
5052
5053 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5054 return pipe->is_intel_perf_query_ready(pipe, q);
5055 }
5056
5057 static bool
tc_get_intel_perf_query_data(struct pipe_context * _pipe,struct pipe_query * q,size_t data_size,uint32_t * data,uint32_t * bytes_written)5058 tc_get_intel_perf_query_data(struct pipe_context *_pipe,
5059 struct pipe_query *q,
5060 size_t data_size,
5061 uint32_t *data,
5062 uint32_t *bytes_written)
5063 {
5064 struct threaded_context *tc = threaded_context(_pipe);
5065 struct pipe_context *pipe = tc->pipe;
5066
5067 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5068 return pipe->get_intel_perf_query_data(pipe, q, data_size, data, bytes_written);
5069 }
5070
5071 /********************************************************************
5072 * callback
5073 */
5074
5075 struct tc_callback_call {
5076 struct tc_call_base base;
5077 void (*fn)(void *data);
5078 void *data;
5079 };
5080
5081 static uint16_t
tc_call_callback(UNUSED struct pipe_context * pipe,void * call)5082 tc_call_callback(UNUSED struct pipe_context *pipe, void *call)
5083 {
5084 struct tc_callback_call *p = to_call(call, tc_callback_call);
5085
5086 p->fn(p->data);
5087 return call_size(tc_callback_call);
5088 }
5089
5090 static void
tc_callback(struct pipe_context * _pipe,void (* fn)(void *),void * data,bool asap)5091 tc_callback(struct pipe_context *_pipe, void (*fn)(void *), void *data,
5092 bool asap)
5093 {
5094 struct threaded_context *tc = threaded_context(_pipe);
5095
5096 if (asap && tc_is_sync(tc)) {
5097 fn(data);
5098 return;
5099 }
5100
5101 struct tc_callback_call *p =
5102 tc_add_call(tc, TC_CALL_callback, tc_callback_call);
5103 p->fn = fn;
5104 p->data = data;
5105 }
5106
5107
5108 /********************************************************************
5109 * create & destroy
5110 */
5111
5112 static void
tc_destroy(struct pipe_context * _pipe)5113 tc_destroy(struct pipe_context *_pipe)
5114 {
5115 struct threaded_context *tc = threaded_context(_pipe);
5116 struct pipe_context *pipe = tc->pipe;
5117
5118 if (tc->base.const_uploader &&
5119 tc->base.stream_uploader != tc->base.const_uploader)
5120 u_upload_destroy(tc->base.const_uploader);
5121
5122 if (tc->base.stream_uploader)
5123 u_upload_destroy(tc->base.stream_uploader);
5124
5125 tc_sync(tc);
5126
5127 if (util_queue_is_initialized(&tc->queue)) {
5128 util_queue_destroy(&tc->queue);
5129
5130 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
5131 util_queue_fence_destroy(&tc->batch_slots[i].fence);
5132 util_dynarray_fini(&tc->batch_slots[i].renderpass_infos);
5133 assert(!tc->batch_slots[i].token);
5134 }
5135 }
5136
5137 slab_destroy_child(&tc->pool_transfers);
5138 assert(tc->batch_slots[tc->next].num_total_slots == 0);
5139 pipe->destroy(pipe);
5140
5141 for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
5142 if (!util_queue_fence_is_signalled(&tc->buffer_lists[i].driver_flushed_fence))
5143 util_queue_fence_signal(&tc->buffer_lists[i].driver_flushed_fence);
5144 util_queue_fence_destroy(&tc->buffer_lists[i].driver_flushed_fence);
5145 }
5146
5147 for (unsigned i = 0; i < ARRAY_SIZE(tc->fb_resources); i++)
5148 pipe_resource_reference(&tc->fb_resources[i], NULL);
5149 pipe_resource_reference(&tc->fb_resolve, NULL);
5150
5151 FREE(tc);
5152 }
5153
tc_driver_internal_flush_notify(struct threaded_context * tc)5154 void tc_driver_internal_flush_notify(struct threaded_context *tc)
5155 {
5156 /* Allow drivers to call this function even for internal contexts that
5157 * don't have tc. It simplifies drivers.
5158 */
5159 if (!tc)
5160 return;
5161
5162 /* Signal fences set by tc_batch_execute. */
5163 for (unsigned i = 0; i < tc->num_signal_fences_next_flush; i++)
5164 util_queue_fence_signal(tc->signal_fences_next_flush[i]);
5165
5166 tc->num_signal_fences_next_flush = 0;
5167 }
5168
5169 /**
5170 * Wrap an existing pipe_context into a threaded_context.
5171 *
5172 * \param pipe pipe_context to wrap
5173 * \param parent_transfer_pool parent slab pool set up for creating pipe_-
5174 * transfer objects; the driver should have one
5175 * in pipe_screen.
5176 * \param replace_buffer callback for replacing a pipe_resource's storage
5177 * with another pipe_resource's storage.
5178 * \param options optional TC options/callbacks
5179 * \param out if successful, the threaded_context will be returned here in
5180 * addition to the return value if "out" != NULL
5181 */
5182 struct pipe_context *
threaded_context_create(struct pipe_context * pipe,struct slab_parent_pool * parent_transfer_pool,tc_replace_buffer_storage_func replace_buffer,const struct threaded_context_options * options,struct threaded_context ** out)5183 threaded_context_create(struct pipe_context *pipe,
5184 struct slab_parent_pool *parent_transfer_pool,
5185 tc_replace_buffer_storage_func replace_buffer,
5186 const struct threaded_context_options *options,
5187 struct threaded_context **out)
5188 {
5189 struct threaded_context *tc;
5190
5191 if (!pipe)
5192 return NULL;
5193
5194 if (!debug_get_bool_option("GALLIUM_THREAD", true))
5195 return pipe;
5196
5197 tc = CALLOC_STRUCT(threaded_context);
5198 if (!tc) {
5199 pipe->destroy(pipe);
5200 return NULL;
5201 }
5202
5203 if (options) {
5204 /* this is unimplementable */
5205 assert(!(options->parse_renderpass_info && options->driver_calls_flush_notify));
5206 tc->options = *options;
5207 }
5208
5209 pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options);
5210
5211 /* The driver context isn't wrapped, so set its "priv" to NULL. */
5212 pipe->priv = NULL;
5213
5214 tc->pipe = pipe;
5215 tc->replace_buffer_storage = replace_buffer;
5216 tc->map_buffer_alignment =
5217 pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
5218 tc->ubo_alignment =
5219 MAX2(pipe->screen->get_param(pipe->screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT), 64);
5220 tc->base.priv = pipe; /* priv points to the wrapped driver context */
5221 tc->base.screen = pipe->screen;
5222 tc->base.destroy = tc_destroy;
5223 tc->base.callback = tc_callback;
5224
5225 tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
5226 if (pipe->stream_uploader == pipe->const_uploader)
5227 tc->base.const_uploader = tc->base.stream_uploader;
5228 else
5229 tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
5230
5231 if (!tc->base.stream_uploader || !tc->base.const_uploader)
5232 goto fail;
5233
5234 tc->use_forced_staging_uploads = true;
5235
5236 /* The queue size is the number of batches "waiting". Batches are removed
5237 * from the queue before being executed, so keep one tc_batch slot for that
5238 * execution. Also, keep one unused slot for an unflushed batch.
5239 */
5240 if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL))
5241 goto fail;
5242
5243 tc->last_completed = -1;
5244 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
5245 #if !defined(NDEBUG) && TC_DEBUG >= 1
5246 tc->batch_slots[i].sentinel = TC_SENTINEL;
5247 #endif
5248 tc->batch_slots[i].tc = tc;
5249 tc->batch_slots[i].batch_idx = i;
5250 util_queue_fence_init(&tc->batch_slots[i].fence);
5251 tc->batch_slots[i].renderpass_info_idx = -1;
5252 if (tc->options.parse_renderpass_info) {
5253 util_dynarray_init(&tc->batch_slots[i].renderpass_infos, NULL);
5254 tc_batch_renderpass_infos_resize(tc, &tc->batch_slots[i]);
5255 }
5256 }
5257 for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
5258 util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence);
5259
5260 list_inithead(&tc->unflushed_queries);
5261
5262 slab_create_child(&tc->pool_transfers, parent_transfer_pool);
5263
5264 /* If you have different limits in each shader stage, set the maximum. */
5265 struct pipe_screen *screen = pipe->screen;;
5266 tc->max_const_buffers =
5267 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5268 PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
5269 tc->max_shader_buffers =
5270 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5271 PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
5272 tc->max_images =
5273 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5274 PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
5275 tc->max_samplers =
5276 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5277 PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
5278
5279 tc->base.set_context_param = tc_set_context_param; /* always set this */
5280
5281 #define CTX_INIT(_member) \
5282 tc->base._member = tc->pipe->_member ? tc_##_member : NULL
5283
5284 CTX_INIT(flush);
5285 CTX_INIT(draw_vbo);
5286 CTX_INIT(draw_vertex_state);
5287 CTX_INIT(launch_grid);
5288 CTX_INIT(resource_copy_region);
5289 CTX_INIT(blit);
5290 CTX_INIT(clear);
5291 CTX_INIT(clear_render_target);
5292 CTX_INIT(clear_depth_stencil);
5293 CTX_INIT(clear_buffer);
5294 CTX_INIT(clear_texture);
5295 CTX_INIT(flush_resource);
5296 CTX_INIT(generate_mipmap);
5297 CTX_INIT(render_condition);
5298 CTX_INIT(create_query);
5299 CTX_INIT(create_batch_query);
5300 CTX_INIT(destroy_query);
5301 CTX_INIT(begin_query);
5302 CTX_INIT(end_query);
5303 CTX_INIT(get_query_result);
5304 CTX_INIT(get_query_result_resource);
5305 CTX_INIT(set_active_query_state);
5306 CTX_INIT(create_blend_state);
5307 CTX_INIT(bind_blend_state);
5308 CTX_INIT(delete_blend_state);
5309 CTX_INIT(create_sampler_state);
5310 CTX_INIT(bind_sampler_states);
5311 CTX_INIT(delete_sampler_state);
5312 CTX_INIT(create_rasterizer_state);
5313 CTX_INIT(bind_rasterizer_state);
5314 CTX_INIT(delete_rasterizer_state);
5315 CTX_INIT(create_depth_stencil_alpha_state);
5316 CTX_INIT(bind_depth_stencil_alpha_state);
5317 CTX_INIT(delete_depth_stencil_alpha_state);
5318 CTX_INIT(link_shader);
5319 CTX_INIT(create_fs_state);
5320 CTX_INIT(bind_fs_state);
5321 CTX_INIT(delete_fs_state);
5322 CTX_INIT(create_vs_state);
5323 CTX_INIT(bind_vs_state);
5324 CTX_INIT(delete_vs_state);
5325 CTX_INIT(create_gs_state);
5326 CTX_INIT(bind_gs_state);
5327 CTX_INIT(delete_gs_state);
5328 CTX_INIT(create_tcs_state);
5329 CTX_INIT(bind_tcs_state);
5330 CTX_INIT(delete_tcs_state);
5331 CTX_INIT(create_tes_state);
5332 CTX_INIT(bind_tes_state);
5333 CTX_INIT(delete_tes_state);
5334 CTX_INIT(create_compute_state);
5335 CTX_INIT(bind_compute_state);
5336 CTX_INIT(delete_compute_state);
5337 CTX_INIT(create_vertex_elements_state);
5338 CTX_INIT(bind_vertex_elements_state);
5339 CTX_INIT(delete_vertex_elements_state);
5340 CTX_INIT(set_blend_color);
5341 CTX_INIT(set_stencil_ref);
5342 CTX_INIT(set_sample_mask);
5343 CTX_INIT(set_min_samples);
5344 CTX_INIT(set_clip_state);
5345 CTX_INIT(set_constant_buffer);
5346 CTX_INIT(set_inlinable_constants);
5347 CTX_INIT(set_framebuffer_state);
5348 CTX_INIT(set_polygon_stipple);
5349 CTX_INIT(set_sample_locations);
5350 CTX_INIT(set_scissor_states);
5351 CTX_INIT(set_viewport_states);
5352 CTX_INIT(set_window_rectangles);
5353 CTX_INIT(set_sampler_views);
5354 CTX_INIT(set_tess_state);
5355 CTX_INIT(set_patch_vertices);
5356 CTX_INIT(set_shader_buffers);
5357 CTX_INIT(set_shader_images);
5358 CTX_INIT(set_vertex_buffers);
5359 CTX_INIT(create_stream_output_target);
5360 CTX_INIT(stream_output_target_destroy);
5361 CTX_INIT(set_stream_output_targets);
5362 CTX_INIT(create_sampler_view);
5363 CTX_INIT(sampler_view_destroy);
5364 CTX_INIT(create_surface);
5365 CTX_INIT(surface_destroy);
5366 CTX_INIT(buffer_map);
5367 CTX_INIT(texture_map);
5368 CTX_INIT(transfer_flush_region);
5369 CTX_INIT(buffer_unmap);
5370 CTX_INIT(texture_unmap);
5371 CTX_INIT(buffer_subdata);
5372 CTX_INIT(texture_subdata);
5373 CTX_INIT(texture_barrier);
5374 CTX_INIT(memory_barrier);
5375 CTX_INIT(resource_commit);
5376 CTX_INIT(create_video_codec);
5377 CTX_INIT(create_video_buffer);
5378 CTX_INIT(set_compute_resources);
5379 CTX_INIT(set_global_binding);
5380 CTX_INIT(get_sample_position);
5381 CTX_INIT(invalidate_resource);
5382 CTX_INIT(get_device_reset_status);
5383 CTX_INIT(set_device_reset_callback);
5384 CTX_INIT(dump_debug_state);
5385 CTX_INIT(set_log_context);
5386 CTX_INIT(emit_string_marker);
5387 CTX_INIT(set_debug_callback);
5388 CTX_INIT(create_fence_fd);
5389 CTX_INIT(fence_server_sync);
5390 CTX_INIT(fence_server_signal);
5391 CTX_INIT(get_timestamp);
5392 CTX_INIT(create_texture_handle);
5393 CTX_INIT(delete_texture_handle);
5394 CTX_INIT(make_texture_handle_resident);
5395 CTX_INIT(create_image_handle);
5396 CTX_INIT(delete_image_handle);
5397 CTX_INIT(make_image_handle_resident);
5398 CTX_INIT(set_frontend_noop);
5399 CTX_INIT(init_intel_perf_query_info);
5400 CTX_INIT(get_intel_perf_query_info);
5401 CTX_INIT(get_intel_perf_query_counter_info);
5402 CTX_INIT(new_intel_perf_query_obj);
5403 CTX_INIT(begin_intel_perf_query);
5404 CTX_INIT(end_intel_perf_query);
5405 CTX_INIT(delete_intel_perf_query);
5406 CTX_INIT(wait_intel_perf_query);
5407 CTX_INIT(is_intel_perf_query_ready);
5408 CTX_INIT(get_intel_perf_query_data);
5409 #undef CTX_INIT
5410
5411 #define CALL(name) tc->execute_func[TC_CALL_##name] = tc_call_##name;
5412 #include "u_threaded_context_calls.h"
5413 #undef CALL
5414
5415 if (out)
5416 *out = tc;
5417
5418 tc_begin_next_buffer_list(tc);
5419 if (tc->options.parse_renderpass_info)
5420 tc_batch_increment_renderpass_info(tc, tc->next, false);
5421 return &tc->base;
5422
5423 fail:
5424 tc_destroy(&tc->base);
5425 return NULL;
5426 }
5427
5428 void
threaded_context_init_bytes_mapped_limit(struct threaded_context * tc,unsigned divisor)5429 threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned divisor)
5430 {
5431 uint64_t total_ram;
5432 if (os_get_total_physical_memory(&total_ram)) {
5433 tc->bytes_mapped_limit = total_ram / divisor;
5434 if (sizeof(void*) == 4)
5435 tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL);
5436 }
5437 }
5438
5439 const struct tc_renderpass_info *
threaded_context_get_renderpass_info(struct threaded_context * tc)5440 threaded_context_get_renderpass_info(struct threaded_context *tc)
5441 {
5442 assert(tc->renderpass_info && tc->options.parse_renderpass_info);
5443 struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info);
5444 while (1) {
5445 util_queue_fence_wait(&info->ready);
5446 if (!info->next)
5447 return &info->info;
5448 info = info->next;
5449 }
5450 }
5451