xref: /aosp_15_r20/external/mesa3d/src/gallium/auxiliary/util/u_threaded_context.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2017 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  **************************************************************************/
26 
27 #include "util/u_threaded_context.h"
28 #include "util/u_cpu_detect.h"
29 #include "util/format/u_format.h"
30 #include "util/u_inlines.h"
31 #include "util/u_memory.h"
32 #include "util/u_upload_mgr.h"
33 #include "driver_trace/tr_context.h"
34 #include "util/log.h"
35 #include "util/perf/cpu_trace.h"
36 #include "util/thread_sched.h"
37 #include "compiler/shader_info.h"
38 
39 #if TC_DEBUG >= 1
40 #define tc_assert assert
41 #else
42 #define tc_assert(x)
43 #endif
44 
45 #if TC_DEBUG >= 2
46 #define tc_printf mesa_logi
47 #define tc_asprintf asprintf
48 #define tc_strcmp strcmp
49 #else
50 #define tc_printf(...)
51 #define tc_asprintf(...) 0
52 #define tc_strcmp(...) 0
53 #endif
54 
55 #define TC_SENTINEL 0x5ca1ab1e
56 
57 #if TC_DEBUG >= 3 || defined(TC_TRACE)
58 static const char *tc_call_names[] = {
59 #define CALL(name) #name,
60 #include "u_threaded_context_calls.h"
61 #undef CALL
62 };
63 #endif
64 
65 #ifdef TC_TRACE
66 #  define TC_TRACE_SCOPE(call_id) MESA_TRACE_SCOPE(tc_call_names[call_id])
67 #else
68 #  define TC_TRACE_SCOPE(call_id)
69 #endif
70 
71 static void
72 tc_buffer_subdata(struct pipe_context *_pipe,
73                   struct pipe_resource *resource,
74                   unsigned usage, unsigned offset,
75                   unsigned size, const void *data);
76 
77 static void
tc_batch_check(UNUSED struct tc_batch * batch)78 tc_batch_check(UNUSED struct tc_batch *batch)
79 {
80    tc_assert(batch->sentinel == TC_SENTINEL);
81    tc_assert(batch->num_total_slots <= TC_SLOTS_PER_BATCH);
82 }
83 
84 static void
tc_debug_check(struct threaded_context * tc)85 tc_debug_check(struct threaded_context *tc)
86 {
87    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
88       tc_batch_check(&tc->batch_slots[i]);
89       tc_assert(tc->batch_slots[i].tc == tc);
90    }
91 }
92 
93 static void
tc_set_driver_thread(struct threaded_context * tc)94 tc_set_driver_thread(struct threaded_context *tc)
95 {
96 #ifndef NDEBUG
97    tc->driver_thread = thrd_current();
98 #endif
99 }
100 
101 static void
tc_clear_driver_thread(struct threaded_context * tc)102 tc_clear_driver_thread(struct threaded_context *tc)
103 {
104 #ifndef NDEBUG
105    memset(&tc->driver_thread, 0, sizeof(tc->driver_thread));
106 #endif
107 }
108 
109 struct tc_batch_rp_info {
110    /* this is what drivers can see */
111    struct tc_renderpass_info info;
112    /* determines whether the info can be "safely" read by drivers or if it may still be in use */
113    struct util_queue_fence ready;
114    /* when a batch is full, the rp info rollsover onto 'next' */
115    struct tc_batch_rp_info *next;
116    /* when rp info has rolled over onto this struct, 'prev' is used to update pointers for realloc */
117    struct tc_batch_rp_info *prev;
118 };
119 
120 static struct tc_batch_rp_info *
tc_batch_rp_info(struct tc_renderpass_info * info)121 tc_batch_rp_info(struct tc_renderpass_info *info)
122 {
123    return (struct tc_batch_rp_info *)info;
124 }
125 
126 static void
tc_sanitize_renderpass_info(struct threaded_context * tc)127 tc_sanitize_renderpass_info(struct threaded_context *tc)
128 {
129    tc->renderpass_info_recording->cbuf_invalidate = 0;
130    tc->renderpass_info_recording->zsbuf_invalidate = false;
131    tc->renderpass_info_recording->cbuf_load |= (~tc->renderpass_info_recording->cbuf_clear) & BITFIELD_MASK(PIPE_MAX_COLOR_BUFS);
132    if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] && !tc_renderpass_info_is_zsbuf_used(tc->renderpass_info_recording))
133       /* this should be a "safe" way to indicate to the driver that both loads and stores are required;
134       * driver can always detect invalidation
135       */
136       tc->renderpass_info_recording->zsbuf_clear_partial = true;
137    if (tc->num_queries_active)
138       tc->renderpass_info_recording->has_query_ends = true;
139 }
140 
141 /* ensure the batch's array of renderpass data is large enough for the current index */
142 static void
tc_batch_renderpass_infos_resize(struct threaded_context * tc,struct tc_batch * batch)143 tc_batch_renderpass_infos_resize(struct threaded_context *tc, struct tc_batch *batch)
144 {
145    unsigned size = batch->renderpass_infos.capacity;
146    unsigned cur_num = MAX2(batch->renderpass_info_idx, 0);
147 
148    if (size / sizeof(struct tc_batch_rp_info) > cur_num)
149       return;
150 
151    struct tc_batch_rp_info *infos = batch->renderpass_infos.data;
152    unsigned old_idx = batch->renderpass_info_idx - 1;
153    bool redo = tc->renderpass_info_recording &&
154                tc->renderpass_info_recording == &infos[old_idx].info;
155    if (!util_dynarray_resize(&batch->renderpass_infos, struct tc_batch_rp_info, cur_num + 10))
156       mesa_loge("tc: memory alloc fail!");
157 
158    if (size != batch->renderpass_infos.capacity) {
159       /* zero new allocation region */
160       uint8_t *data = batch->renderpass_infos.data;
161       memset(data + size, 0, batch->renderpass_infos.capacity - size);
162       unsigned start = size / sizeof(struct tc_batch_rp_info);
163       unsigned count = (batch->renderpass_infos.capacity - size) /
164                        sizeof(struct tc_batch_rp_info);
165       infos = batch->renderpass_infos.data;
166       if (infos->prev)
167          infos->prev->next = infos;
168       for (unsigned i = 0; i < count; i++)
169          util_queue_fence_init(&infos[start + i].ready);
170       /* re-set current recording info on resize */
171       if (redo)
172          tc->renderpass_info_recording = &infos[old_idx].info;
173    }
174 }
175 
176 /* signal that the renderpass info is "ready" for use by drivers and will no longer be updated */
177 static void
tc_signal_renderpass_info_ready(struct threaded_context * tc)178 tc_signal_renderpass_info_ready(struct threaded_context *tc)
179 {
180    if (tc->renderpass_info_recording &&
181        !util_queue_fence_is_signalled(&tc_batch_rp_info(tc->renderpass_info_recording)->ready))
182       util_queue_fence_signal(&tc_batch_rp_info(tc->renderpass_info_recording)->ready);
183 }
184 
185 /* increment the current renderpass info struct for recording
186  * 'full_copy' is used for preserving data across non-blocking tc batch flushes
187  */
188 static void
tc_batch_increment_renderpass_info(struct threaded_context * tc,unsigned batch_idx,bool full_copy)189 tc_batch_increment_renderpass_info(struct threaded_context *tc, unsigned batch_idx, bool full_copy)
190 {
191    struct tc_batch *batch = &tc->batch_slots[batch_idx];
192    struct tc_batch_rp_info *tc_info = batch->renderpass_infos.data;
193 
194    if (tc_info[0].next || batch->num_total_slots) {
195       /* deadlock condition detected: all batches are in flight, renderpass hasn't ended
196        * (probably a cts case)
197        */
198       struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info_recording);
199       if (!util_queue_fence_is_signalled(&info->ready)) {
200          /* this batch is actively executing and the driver is waiting on the recording fence to signal */
201          /* force all buffer usage to avoid data loss */
202          info->info.cbuf_load = ~(BITFIELD_MASK(8) & info->info.cbuf_clear);
203          info->info.zsbuf_clear_partial = true;
204          info->info.has_query_ends = tc->num_queries_active > 0;
205          /* ensure threaded_context_get_renderpass_info() won't deadlock */
206          info->next = NULL;
207          util_queue_fence_signal(&info->ready);
208       }
209       /* always wait on the batch to finish since this will otherwise overwrite thread data */
210       util_queue_fence_wait(&batch->fence);
211    }
212    /* increment rp info and initialize it */
213    batch->renderpass_info_idx++;
214    tc_batch_renderpass_infos_resize(tc, batch);
215    tc_info = batch->renderpass_infos.data;
216 
217    if (full_copy) {
218       /* this should only be called when changing batches */
219       assert(batch->renderpass_info_idx == 0);
220       /* copy the previous data in its entirety: this is still the same renderpass */
221       if (tc->renderpass_info_recording) {
222          tc_info[batch->renderpass_info_idx].info.data = tc->renderpass_info_recording->data;
223          tc_batch_rp_info(tc->renderpass_info_recording)->next = &tc_info[batch->renderpass_info_idx];
224          tc_info[batch->renderpass_info_idx].prev = tc_batch_rp_info(tc->renderpass_info_recording);
225          /* guard against deadlock scenario */
226          assert(&tc_batch_rp_info(tc->renderpass_info_recording)->next->info != tc->renderpass_info_recording);
227       } else {
228          tc_info[batch->renderpass_info_idx].info.data = 0;
229          tc_info[batch->renderpass_info_idx].prev = NULL;
230       }
231    } else {
232       /* selectively copy: only the CSO metadata is copied, and a new framebuffer state will be added later */
233       tc_info[batch->renderpass_info_idx].info.data = 0;
234       if (tc->renderpass_info_recording) {
235          tc_info[batch->renderpass_info_idx].info.data16[2] = tc->renderpass_info_recording->data16[2];
236          tc_batch_rp_info(tc->renderpass_info_recording)->next = NULL;
237          tc_info[batch->renderpass_info_idx].prev = NULL;
238       }
239    }
240 
241    assert(!full_copy || !tc->renderpass_info_recording || tc_batch_rp_info(tc->renderpass_info_recording)->next);
242    /* signal existing info since it will not be used anymore */
243    tc_signal_renderpass_info_ready(tc);
244    util_queue_fence_reset(&tc_info[batch->renderpass_info_idx].ready);
245    /* guard against deadlock scenario */
246    assert(tc->renderpass_info_recording != &tc_info[batch->renderpass_info_idx].info);
247    /* this is now the current recording renderpass info */
248    tc->renderpass_info_recording = &tc_info[batch->renderpass_info_idx].info;
249    batch->max_renderpass_info_idx = batch->renderpass_info_idx;
250 }
251 
252 static ALWAYS_INLINE struct tc_renderpass_info *
tc_get_renderpass_info(struct threaded_context * tc)253 tc_get_renderpass_info(struct threaded_context *tc)
254 {
255    return tc->renderpass_info_recording;
256 }
257 
258 /* update metadata at draw time */
259 static void
tc_parse_draw(struct threaded_context * tc)260 tc_parse_draw(struct threaded_context *tc)
261 {
262    struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
263 
264    if (info) {
265       /* all buffers that aren't cleared are considered loaded */
266       info->cbuf_load |= ~info->cbuf_clear;
267       if (!info->zsbuf_clear)
268          info->zsbuf_load = true;
269       /* previous invalidates are no longer relevant */
270       info->cbuf_invalidate = 0;
271       info->zsbuf_invalidate = false;
272       info->has_draw = true;
273       info->has_query_ends |= tc->query_ended;
274    }
275 
276    tc->in_renderpass = true;
277    tc->seen_fb_state = true;
278    tc->query_ended = false;
279 }
280 
281 static void *
to_call_check(void * ptr,unsigned num_slots)282 to_call_check(void *ptr, unsigned num_slots)
283 {
284 #if TC_DEBUG >= 1
285    struct tc_call_base *call = ptr;
286    tc_assert(call->num_slots == num_slots);
287 #endif
288    return ptr;
289 }
290 #define to_call(ptr, type) ((struct type *)to_call_check((void *)(ptr), call_size(type)))
291 
292 #define size_to_slots(size)      DIV_ROUND_UP(size, 8)
293 #define call_size(type)          size_to_slots(sizeof(struct type))
294 #define call_size_with_slots(type, num_slots) size_to_slots( \
295    sizeof(struct type) + sizeof(((struct type*)NULL)->slot[0]) * (num_slots))
296 #define get_next_call(ptr, type) ((struct type*)((uint64_t*)ptr + call_size(type)))
297 
298 ALWAYS_INLINE static void
tc_set_resource_batch_usage(struct threaded_context * tc,struct pipe_resource * pres)299 tc_set_resource_batch_usage(struct threaded_context *tc, struct pipe_resource *pres)
300 {
301    /* ignore batch usage when persistent */
302    if (threaded_resource(pres)->last_batch_usage != INT8_MAX)
303       threaded_resource(pres)->last_batch_usage = tc->next;
304    threaded_resource(pres)->batch_generation = tc->batch_generation;
305 }
306 
307 ALWAYS_INLINE static void
tc_set_resource_batch_usage_persistent(struct threaded_context * tc,struct pipe_resource * pres,bool enable)308 tc_set_resource_batch_usage_persistent(struct threaded_context *tc, struct pipe_resource *pres, bool enable)
309 {
310    if (!pres)
311       return;
312    /* mark with special value to block any unsynchronized access */
313    threaded_resource(pres)->last_batch_usage = enable ? INT8_MAX : tc->next;
314    threaded_resource(pres)->batch_generation = tc->batch_generation;
315 }
316 
317 /* this can ONLY be used to check against the currently recording batch */
318 ALWAYS_INLINE static bool
tc_resource_batch_usage_test_busy(const struct threaded_context * tc,const struct pipe_resource * pres)319 tc_resource_batch_usage_test_busy(const struct threaded_context *tc, const struct pipe_resource *pres)
320 {
321    const struct threaded_resource *tbuf = (const struct threaded_resource*)pres;
322 
323    if (!tc->options.unsynchronized_texture_subdata)
324       return true;
325 
326    /* resource has persistent access: assume always busy */
327    if (tbuf->last_batch_usage == INT8_MAX)
328       return true;
329 
330    /* resource has never been seen */
331    if (tbuf->last_batch_usage == -1)
332       return false;
333 
334    /* resource has been seen but no batches have executed */
335    if (tc->last_completed == -1)
336       return true;
337 
338    /* begin comparisons checking number of times batches have cycled */
339    unsigned diff = tc->batch_generation - tbuf->batch_generation;
340    /* resource has been seen, batches have fully cycled at least once */
341    if (diff > 1)
342       return false;
343 
344    /* resource has been seen in current batch cycle: return whether batch has definitely completed */
345    if (diff == 0)
346       return tc->last_completed >= tbuf->last_batch_usage;
347 
348    /* resource has been seen within one batch cycle: check for batch wrapping */
349    if (tc->last_completed >= tbuf->last_batch_usage)
350       /* this or a subsequent pre-wrap batch was the last to definitely complete: resource is idle */
351       return false;
352 
353    /* batch execution has not definitely wrapped: resource is definitely not idle */
354    if (tc->last_completed > tc->next)
355       return true;
356 
357    /* resource was seen pre-wrap, batch execution has definitely wrapped: idle */
358    if (tbuf->last_batch_usage > tc->last_completed)
359       return false;
360 
361    /* tc->last_completed is not an exact measurement, so anything else is considered busy */
362    return true;
363 }
364 
365 /* Assign src to dst while dst is uninitialized. */
366 static inline void
tc_set_resource_reference(struct pipe_resource ** dst,struct pipe_resource * src)367 tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
368 {
369    *dst = src;
370    pipe_reference(NULL, &src->reference); /* only increment refcount */
371 }
372 
373 /* Assign src to dst while dst is uninitialized. */
374 static inline void
tc_set_vertex_state_reference(struct pipe_vertex_state ** dst,struct pipe_vertex_state * src)375 tc_set_vertex_state_reference(struct pipe_vertex_state **dst,
376                               struct pipe_vertex_state *src)
377 {
378    *dst = src;
379    pipe_reference(NULL, &src->reference); /* only increment refcount */
380 }
381 
382 /* Unreference dst but don't touch the dst pointer. */
383 static inline void
tc_drop_resource_reference(struct pipe_resource * dst)384 tc_drop_resource_reference(struct pipe_resource *dst)
385 {
386    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
387       pipe_resource_destroy(dst);
388 }
389 
390 /* Unreference dst but don't touch the dst pointer. */
391 static inline void
tc_drop_surface_reference(struct pipe_surface * dst)392 tc_drop_surface_reference(struct pipe_surface *dst)
393 {
394    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
395       dst->context->surface_destroy(dst->context, dst);
396 }
397 
398 /* Unreference dst but don't touch the dst pointer. */
399 static inline void
tc_drop_so_target_reference(struct pipe_stream_output_target * dst)400 tc_drop_so_target_reference(struct pipe_stream_output_target *dst)
401 {
402    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
403       dst->context->stream_output_target_destroy(dst->context, dst);
404 }
405 
406 /**
407  * Subtract the given number of references.
408  */
409 static inline void
tc_drop_vertex_state_references(struct pipe_vertex_state * dst,int num_refs)410 tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs)
411 {
412    int count = p_atomic_add_return(&dst->reference.count, -num_refs);
413 
414    assert(count >= 0);
415    /* Underflows shouldn't happen, but let's be safe. */
416    if (count <= 0)
417       dst->screen->vertex_state_destroy(dst->screen, dst);
418 }
419 
420 /* We don't want to read or write min_index and max_index, because
421  * it shouldn't be needed by drivers at this point.
422  */
423 #define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
424    offsetof(struct pipe_draw_info, min_index)
425 
426 ALWAYS_INLINE static struct tc_renderpass_info *
incr_rp_info(struct tc_renderpass_info * tc_info)427 incr_rp_info(struct tc_renderpass_info *tc_info)
428 {
429    struct tc_batch_rp_info *info = tc_batch_rp_info(tc_info);
430    return &info[1].info;
431 }
432 
433 ALWAYS_INLINE static void
batch_execute(struct tc_batch * batch,struct pipe_context * pipe,uint64_t * last,bool parsing)434 batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last, bool parsing)
435 {
436    /* if the framebuffer state is persisting from a previous batch,
437     * begin incrementing renderpass info on the first set_framebuffer_state call
438     */
439    bool first = !batch->first_set_fb;
440    const tc_execute *execute_func = batch->tc->execute_func;
441 
442    for (uint64_t *iter = batch->slots; iter != last;) {
443       struct tc_call_base *call = (struct tc_call_base *)iter;
444 
445       tc_assert(call->sentinel == TC_SENTINEL);
446 
447 #if TC_DEBUG >= 3
448       tc_printf("CALL: %s", tc_call_names[call->call_id]);
449 #endif
450 
451       TC_TRACE_SCOPE(call->call_id);
452 
453       iter += execute_func[call->call_id](pipe, call);
454 
455       if (parsing) {
456          if (call->call_id == TC_CALL_flush) {
457             /* always increment renderpass info for non-deferred flushes */
458             batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info);
459             /* if a flush happens, renderpass info is always incremented after */
460             first = false;
461          } else if (call->call_id == TC_CALL_set_framebuffer_state) {
462             /* the renderpass info pointer is already set at the start of the batch,
463              * so don't increment on the first set_framebuffer_state call
464              */
465             if (!first)
466                batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info);
467             first = false;
468          } else if (call->call_id >= TC_CALL_draw_single &&
469                     call->call_id <= TC_CALL_draw_vstate_multi) {
470             /* if a draw happens before a set_framebuffer_state on this batch,
471              * begin incrementing renderpass data
472              */
473             first = false;
474          }
475       }
476    }
477 }
478 
479 static void
tc_batch_execute(void * job,UNUSED void * gdata,int thread_index)480 tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
481 {
482    struct tc_batch *batch = job;
483    struct pipe_context *pipe = batch->tc->pipe;
484    uint64_t *last = &batch->slots[batch->num_total_slots];
485 
486    tc_batch_check(batch);
487    tc_set_driver_thread(batch->tc);
488 
489    assert(!batch->token);
490 
491    /* setup renderpass info */
492    batch->tc->renderpass_info = batch->renderpass_infos.data;
493 
494    if (batch->tc->options.parse_renderpass_info) {
495       batch_execute(batch, pipe, last, true);
496 
497       struct tc_batch_rp_info *info = batch->renderpass_infos.data;
498       for (unsigned i = 0; i < batch->max_renderpass_info_idx + 1; i++) {
499          if (info[i].next)
500             info[i].next->prev = NULL;
501          info[i].next = NULL;
502       }
503    } else {
504       batch_execute(batch, pipe, last, false);
505    }
506 
507    /* Add the fence to the list of fences for the driver to signal at the next
508     * flush, which we use for tracking which buffers are referenced by
509     * an unflushed command buffer.
510     */
511    struct threaded_context *tc = batch->tc;
512    struct util_queue_fence *fence =
513       &tc->buffer_lists[batch->buffer_list_index].driver_flushed_fence;
514 
515    if (tc->options.driver_calls_flush_notify) {
516       tc->signal_fences_next_flush[tc->num_signal_fences_next_flush++] = fence;
517 
518       /* Since our buffer lists are chained as a ring, we need to flush
519        * the context twice as we go around the ring to make the driver signal
520        * the buffer list fences, so that the producer thread can reuse the buffer
521        * list structures for the next batches without waiting.
522        */
523       unsigned half_ring = TC_MAX_BUFFER_LISTS / 2;
524       if (batch->buffer_list_index % half_ring == half_ring - 1)
525          pipe->flush(pipe, NULL, PIPE_FLUSH_ASYNC);
526    } else {
527       util_queue_fence_signal(fence);
528    }
529 
530    tc_clear_driver_thread(batch->tc);
531    tc_batch_check(batch);
532    batch->num_total_slots = 0;
533    batch->last_mergeable_call = NULL;
534    batch->first_set_fb = false;
535    batch->max_renderpass_info_idx = 0;
536    batch->tc->last_completed = batch->batch_idx;
537 }
538 
539 static void
tc_begin_next_buffer_list(struct threaded_context * tc)540 tc_begin_next_buffer_list(struct threaded_context *tc)
541 {
542    tc->next_buf_list = (tc->next_buf_list + 1) % TC_MAX_BUFFER_LISTS;
543 
544    tc->batch_slots[tc->next].buffer_list_index = tc->next_buf_list;
545 
546    /* Clear the buffer list in the new empty batch. */
547    struct tc_buffer_list *buf_list = &tc->buffer_lists[tc->next_buf_list];
548    assert(util_queue_fence_is_signalled(&buf_list->driver_flushed_fence));
549    util_queue_fence_reset(&buf_list->driver_flushed_fence); /* set to unsignalled */
550    BITSET_ZERO(buf_list->buffer_list);
551 
552    tc->add_all_gfx_bindings_to_buffer_list = true;
553    tc->add_all_compute_bindings_to_buffer_list = true;
554 }
555 
556 static void
tc_add_call_end(struct tc_batch * next)557 tc_add_call_end(struct tc_batch *next)
558 {
559    /* Add a dummy last call that won't be executed, but will indicate the end
560     * of the batch. It's for calls that always look at the next call and this
561     * stops them looking farther ahead.
562     */
563    assert(next->num_total_slots < TC_SLOTS_PER_BATCH);
564    struct tc_call_base *call =
565       (struct tc_call_base*)&next->slots[next->num_total_slots];
566    call->call_id = TC_NUM_CALLS;
567    call->num_slots = 1;
568 }
569 
570 static void
tc_batch_flush(struct threaded_context * tc,bool full_copy)571 tc_batch_flush(struct threaded_context *tc, bool full_copy)
572 {
573    struct tc_batch *next = &tc->batch_slots[tc->next];
574    unsigned next_id = (tc->next + 1) % TC_MAX_BATCHES;
575 
576    tc_assert(next->num_total_slots != 0);
577    tc_add_call_end(next);
578 
579    tc_batch_check(next);
580    tc_debug_check(tc);
581    tc->bytes_mapped_estimate = 0;
582    tc->bytes_replaced_estimate = 0;
583    p_atomic_add(&tc->num_offloaded_slots, next->num_total_slots);
584 
585    if (next->token) {
586       next->token->tc = NULL;
587       tc_unflushed_batch_token_reference(&next->token, NULL);
588    }
589    /* reset renderpass info index for subsequent use */
590    next->renderpass_info_idx = -1;
591 
592    /* always increment renderpass info on batch flush;
593     * renderpass info can only be accessed by its owner batch during execution
594     */
595    if (tc->renderpass_info_recording) {
596       tc->batch_slots[next_id].first_set_fb = full_copy;
597       tc_batch_increment_renderpass_info(tc, next_id, full_copy);
598    }
599 
600    util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
601                       NULL, 0);
602    tc->last = tc->next;
603    tc->next = next_id;
604    if (next_id == 0)
605       tc->batch_generation++;
606    tc_begin_next_buffer_list(tc);
607 
608 }
609 
610 /* This is the function that adds variable-sized calls into the current
611  * batch. It also flushes the batch if there is not enough space there.
612  * All other higher-level "add" functions use it.
613  */
614 static void *
tc_add_sized_call(struct threaded_context * tc,enum tc_call_id id,unsigned num_slots)615 tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
616                   unsigned num_slots)
617 {
618    TC_TRACE_SCOPE(id);
619    struct tc_batch *next = &tc->batch_slots[tc->next];
620    assert(num_slots <= TC_SLOTS_PER_BATCH - 1);
621    tc_debug_check(tc);
622 
623    if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH - 1)) {
624       /* copy existing renderpass info during flush */
625       tc_batch_flush(tc, true);
626       next = &tc->batch_slots[tc->next];
627       tc_assert(next->num_total_slots == 0);
628       tc_assert(next->last_mergeable_call == NULL);
629    }
630 
631    tc_assert(util_queue_fence_is_signalled(&next->fence));
632 
633    struct tc_call_base *call = (struct tc_call_base*)&next->slots[next->num_total_slots];
634    next->num_total_slots += num_slots;
635 
636 #if !defined(NDEBUG) && TC_DEBUG >= 1
637    call->sentinel = TC_SENTINEL;
638 #endif
639    call->call_id = id;
640    call->num_slots = num_slots;
641 
642 #if TC_DEBUG >= 3
643    tc_printf("ENQUEUE: %s", tc_call_names[id]);
644 #endif
645 
646    tc_debug_check(tc);
647    return call;
648 }
649 
650 #define tc_add_call(tc, execute, type) \
651    ((struct type*)tc_add_sized_call(tc, execute, call_size(type)))
652 
653 #define tc_add_slot_based_call(tc, execute, type, num_slots) \
654    ((struct type*)tc_add_sized_call(tc, execute, \
655                                     call_size_with_slots(type, num_slots)))
656 
657 /* Returns the last mergeable call that was added to the unflushed
658  * batch, or NULL if the address of that call is not currently known
659  * or no such call exists in the unflushed batch.
660  */
661 static struct tc_call_base *
tc_get_last_mergeable_call(struct threaded_context * tc)662 tc_get_last_mergeable_call(struct threaded_context *tc)
663 {
664    struct tc_batch *batch = &tc->batch_slots[tc->next];
665    struct tc_call_base *call = batch->last_mergeable_call;
666 
667    tc_assert(call == NULL || call->num_slots <= batch->num_total_slots);
668 
669    if (call && (uint64_t *)call == &batch->slots[batch->num_total_slots - call->num_slots])
670       return call;
671    else
672       return NULL;
673 }
674 
675 /* Increases the size of the last call in the unflushed batch to the
676  * given number of slots, if possible, without changing the call's data.
677  */
678 static bool
tc_enlarge_last_mergeable_call(struct threaded_context * tc,unsigned desired_num_slots)679 tc_enlarge_last_mergeable_call(struct threaded_context *tc, unsigned desired_num_slots)
680 {
681    struct tc_batch *batch = &tc->batch_slots[tc->next];
682    struct tc_call_base *call = tc_get_last_mergeable_call(tc);
683 
684    tc_assert(call);
685    tc_assert(desired_num_slots >= call->num_slots);
686 
687    unsigned added_slots = desired_num_slots - call->num_slots;
688 
689    if (unlikely(batch->num_total_slots + added_slots > TC_SLOTS_PER_BATCH - 1))
690       return false;
691 
692    batch->num_total_slots += added_slots;
693    call->num_slots += added_slots;
694 
695    return true;
696 }
697 
698 static void
tc_mark_call_mergeable(struct threaded_context * tc,struct tc_call_base * call)699 tc_mark_call_mergeable(struct threaded_context *tc, struct tc_call_base *call)
700 {
701    struct tc_batch *batch = &tc->batch_slots[tc->next];
702    tc_assert(call->num_slots <= batch->num_total_slots);
703    tc_assert((uint64_t *)call == &batch->slots[batch->num_total_slots - call->num_slots]);
704    batch->last_mergeable_call = call;
705 }
706 
707 static bool
tc_is_sync(struct threaded_context * tc)708 tc_is_sync(struct threaded_context *tc)
709 {
710    struct tc_batch *last = &tc->batch_slots[tc->last];
711    struct tc_batch *next = &tc->batch_slots[tc->next];
712 
713    return util_queue_fence_is_signalled(&last->fence) &&
714           !next->num_total_slots;
715 }
716 
717 static void
_tc_sync(struct threaded_context * tc,UNUSED const char * info,UNUSED const char * func)718 _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char *func)
719 {
720    struct tc_batch *last = &tc->batch_slots[tc->last];
721    struct tc_batch *next = &tc->batch_slots[tc->next];
722    bool synced = false;
723 
724    MESA_TRACE_SCOPE(func);
725 
726    tc_debug_check(tc);
727 
728    if (tc->options.parse_renderpass_info && tc->in_renderpass && !tc->flushing) {
729       /* corner case: if tc syncs for any reason but a driver flush during a renderpass,
730        * then the current renderpass info MUST be signaled to avoid deadlocking the driver
731        *
732        * this is not a "complete" signal operation, however, as it's unknown what calls may
733        * come after this one, which means that framebuffer attachment data is unreliable
734        *
735        * to avoid erroneously passing bad state to the driver (e.g., allowing zsbuf elimination),
736        * force all attachments active and assume the app was going to get bad perf here anyway
737        */
738       tc_sanitize_renderpass_info(tc);
739    }
740    tc_signal_renderpass_info_ready(tc);
741 
742    /* Only wait for queued calls... */
743    if (!util_queue_fence_is_signalled(&last->fence)) {
744       util_queue_fence_wait(&last->fence);
745       synced = true;
746    }
747 
748    tc_debug_check(tc);
749 
750    if (next->token) {
751       next->token->tc = NULL;
752       tc_unflushed_batch_token_reference(&next->token, NULL);
753    }
754 
755    /* .. and execute unflushed calls directly. */
756    if (next->num_total_slots) {
757       p_atomic_add(&tc->num_direct_slots, next->num_total_slots);
758       tc->bytes_mapped_estimate = 0;
759       tc->bytes_replaced_estimate = 0;
760       tc_add_call_end(next);
761       tc_batch_execute(next, NULL, 0);
762       tc_begin_next_buffer_list(tc);
763       synced = true;
764    }
765 
766    if (synced) {
767       p_atomic_inc(&tc->num_syncs);
768 
769       if (tc_strcmp(func, "tc_destroy") != 0) {
770          tc_printf("sync %s %s", func, info);
771       }
772    }
773 
774    tc_debug_check(tc);
775 
776    if (tc->options.parse_renderpass_info) {
777       int renderpass_info_idx = next->renderpass_info_idx;
778       if (renderpass_info_idx > 0) {
779          /* don't reset if fb state is unflushed */
780          bool fb_no_draw = tc->seen_fb_state && !tc->renderpass_info_recording->has_draw;
781          uint32_t fb_info = tc->renderpass_info_recording->data32[0];
782          next->renderpass_info_idx = -1;
783          tc_batch_increment_renderpass_info(tc, tc->next, false);
784          if (fb_no_draw)
785             tc->renderpass_info_recording->data32[0] = fb_info;
786       } else if (tc->renderpass_info_recording->has_draw) {
787          tc->renderpass_info_recording->data32[0] = 0;
788       }
789       tc->seen_fb_state = false;
790       tc->query_ended = false;
791    }
792 }
793 
794 #define tc_sync(tc) _tc_sync(tc, "", __func__)
795 #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
796 
797 /**
798  * Call this from fence_finish for same-context fence waits of deferred fences
799  * that haven't been flushed yet.
800  *
801  * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
802  * i.e., the wrapped one.
803  */
804 void
threaded_context_flush(struct pipe_context * _pipe,struct tc_unflushed_batch_token * token,bool prefer_async)805 threaded_context_flush(struct pipe_context *_pipe,
806                        struct tc_unflushed_batch_token *token,
807                        bool prefer_async)
808 {
809    struct threaded_context *tc = threaded_context(_pipe);
810 
811    /* This is called from the gallium frontend / application thread. */
812    if (token->tc && token->tc == tc) {
813       struct tc_batch *last = &tc->batch_slots[tc->last];
814 
815       /* Prefer to do the flush in the driver thread if it is already
816        * running. That should be better for cache locality.
817        */
818       if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
819          tc_batch_flush(tc, false);
820       else
821          tc_sync(token->tc);
822    }
823 }
824 
825 static void
tc_add_to_buffer_list(struct tc_buffer_list * next,struct pipe_resource * buf)826 tc_add_to_buffer_list(struct tc_buffer_list *next, struct pipe_resource *buf)
827 {
828    uint32_t id = threaded_resource(buf)->buffer_id_unique;
829    BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
830 }
831 
832 /* Reset a range of buffer binding slots. */
833 static void
tc_unbind_buffers(uint32_t * binding,unsigned count)834 tc_unbind_buffers(uint32_t *binding, unsigned count)
835 {
836    if (count)
837       memset(binding, 0, sizeof(*binding) * count);
838 }
839 
840 static void
tc_add_bindings_to_buffer_list(BITSET_WORD * buffer_list,const uint32_t * bindings,unsigned count)841 tc_add_bindings_to_buffer_list(BITSET_WORD *buffer_list, const uint32_t *bindings,
842                                unsigned count)
843 {
844    for (unsigned i = 0; i < count; i++) {
845       if (bindings[i])
846          BITSET_SET(buffer_list, bindings[i] & TC_BUFFER_ID_MASK);
847    }
848 }
849 
850 static bool
tc_rebind_bindings(uint32_t old_id,uint32_t new_id,uint32_t * bindings,unsigned count)851 tc_rebind_bindings(uint32_t old_id, uint32_t new_id, uint32_t *bindings,
852                    unsigned count)
853 {
854    unsigned rebind_count = 0;
855 
856    for (unsigned i = 0; i < count; i++) {
857       if (bindings[i] == old_id) {
858          bindings[i] = new_id;
859          rebind_count++;
860       }
861    }
862    return rebind_count;
863 }
864 
865 static void
tc_add_shader_bindings_to_buffer_list(struct threaded_context * tc,BITSET_WORD * buffer_list,enum pipe_shader_type shader)866 tc_add_shader_bindings_to_buffer_list(struct threaded_context *tc,
867                                       BITSET_WORD *buffer_list,
868                                       enum pipe_shader_type shader)
869 {
870    tc_add_bindings_to_buffer_list(buffer_list, tc->const_buffers[shader],
871                                   tc->max_const_buffers);
872    if (tc->seen_shader_buffers[shader]) {
873       tc_add_bindings_to_buffer_list(buffer_list, tc->shader_buffers[shader],
874                                      tc->max_shader_buffers);
875    }
876    if (tc->seen_image_buffers[shader]) {
877       tc_add_bindings_to_buffer_list(buffer_list, tc->image_buffers[shader],
878                                      tc->max_images);
879    }
880    if (tc->seen_sampler_buffers[shader]) {
881       tc_add_bindings_to_buffer_list(buffer_list, tc->sampler_buffers[shader],
882                                      tc->max_samplers);
883    }
884 }
885 
886 static unsigned
tc_rebind_shader_bindings(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,enum pipe_shader_type shader,uint32_t * rebind_mask)887 tc_rebind_shader_bindings(struct threaded_context *tc, uint32_t old_id,
888                           uint32_t new_id, enum pipe_shader_type shader, uint32_t *rebind_mask)
889 {
890    unsigned ubo = 0, ssbo = 0, img = 0, sampler = 0;
891 
892    ubo = tc_rebind_bindings(old_id, new_id, tc->const_buffers[shader],
893                             tc->max_const_buffers);
894    if (ubo)
895       *rebind_mask |= BITFIELD_BIT(TC_BINDING_UBO_VS) << shader;
896    if (tc->seen_shader_buffers[shader]) {
897       ssbo = tc_rebind_bindings(old_id, new_id, tc->shader_buffers[shader],
898                                 tc->max_shader_buffers);
899       if (ssbo)
900          *rebind_mask |= BITFIELD_BIT(TC_BINDING_SSBO_VS) << shader;
901    }
902    if (tc->seen_image_buffers[shader]) {
903       img = tc_rebind_bindings(old_id, new_id, tc->image_buffers[shader],
904                                tc->max_images);
905       if (img)
906          *rebind_mask |= BITFIELD_BIT(TC_BINDING_IMAGE_VS) << shader;
907    }
908    if (tc->seen_sampler_buffers[shader]) {
909       sampler = tc_rebind_bindings(old_id, new_id, tc->sampler_buffers[shader],
910                                    tc->max_samplers);
911       if (sampler)
912          *rebind_mask |= BITFIELD_BIT(TC_BINDING_SAMPLERVIEW_VS) << shader;
913    }
914    return ubo + ssbo + img + sampler;
915 }
916 
917 /* Add all bound buffers used by VS/TCS/TES/GS/FS to the buffer list.
918  * This is called by the first draw call in a batch when we want to inherit
919  * all bindings set by the previous batch.
920  */
921 static void
tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context * tc)922 tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context *tc)
923 {
924    BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
925 
926    tc_add_bindings_to_buffer_list(buffer_list, tc->vertex_buffers, tc->num_vertex_buffers);
927    if (tc->seen_streamout_buffers)
928       tc_add_bindings_to_buffer_list(buffer_list, tc->streamout_buffers, PIPE_MAX_SO_BUFFERS);
929 
930    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_VERTEX);
931    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_FRAGMENT);
932 
933    if (tc->seen_tcs)
934       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_CTRL);
935    if (tc->seen_tes)
936       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_EVAL);
937    if (tc->seen_gs)
938       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_GEOMETRY);
939 
940    tc->add_all_gfx_bindings_to_buffer_list = false;
941 }
942 
943 /* Add all bound buffers used by compute to the buffer list.
944  * This is called by the first compute call in a batch when we want to inherit
945  * all bindings set by the previous batch.
946  */
947 static void
tc_add_all_compute_bindings_to_buffer_list(struct threaded_context * tc)948 tc_add_all_compute_bindings_to_buffer_list(struct threaded_context *tc)
949 {
950    BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
951 
952    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_COMPUTE);
953    tc->add_all_compute_bindings_to_buffer_list = false;
954 }
955 
956 static unsigned
tc_rebind_buffer(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,uint32_t * rebind_mask)957 tc_rebind_buffer(struct threaded_context *tc, uint32_t old_id, uint32_t new_id, uint32_t *rebind_mask)
958 {
959    unsigned vbo = 0, so = 0;
960 
961    vbo = tc_rebind_bindings(old_id, new_id, tc->vertex_buffers,
962                             tc->num_vertex_buffers);
963    if (vbo)
964       *rebind_mask |= BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER);
965 
966    if (tc->seen_streamout_buffers) {
967       so = tc_rebind_bindings(old_id, new_id, tc->streamout_buffers,
968                               PIPE_MAX_SO_BUFFERS);
969       if (so)
970          *rebind_mask |= BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER);
971    }
972    unsigned rebound = vbo + so;
973 
974    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_VERTEX, rebind_mask);
975    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_FRAGMENT, rebind_mask);
976 
977    if (tc->seen_tcs)
978       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_CTRL, rebind_mask);
979    if (tc->seen_tes)
980       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_EVAL, rebind_mask);
981    if (tc->seen_gs)
982       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_GEOMETRY, rebind_mask);
983 
984    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_COMPUTE, rebind_mask);
985 
986    if (rebound)
987       BITSET_SET(tc->buffer_lists[tc->next_buf_list].buffer_list, new_id & TC_BUFFER_ID_MASK);
988    return rebound;
989 }
990 
991 static bool
tc_is_buffer_bound_with_mask(uint32_t id,uint32_t * bindings,unsigned binding_mask)992 tc_is_buffer_bound_with_mask(uint32_t id, uint32_t *bindings, unsigned binding_mask)
993 {
994    while (binding_mask) {
995       if (bindings[u_bit_scan(&binding_mask)] == id)
996          return true;
997    }
998    return false;
999 }
1000 
1001 static bool
tc_is_buffer_shader_bound_for_write(struct threaded_context * tc,uint32_t id,enum pipe_shader_type shader)1002 tc_is_buffer_shader_bound_for_write(struct threaded_context *tc, uint32_t id,
1003                                     enum pipe_shader_type shader)
1004 {
1005    if (tc->seen_shader_buffers[shader] &&
1006        tc_is_buffer_bound_with_mask(id, tc->shader_buffers[shader],
1007                                     tc->shader_buffers_writeable_mask[shader]))
1008       return true;
1009 
1010    if (tc->seen_image_buffers[shader] &&
1011        tc_is_buffer_bound_with_mask(id, tc->image_buffers[shader],
1012                                     tc->image_buffers_writeable_mask[shader]))
1013       return true;
1014 
1015    return false;
1016 }
1017 
1018 static bool
tc_is_buffer_bound_for_write(struct threaded_context * tc,uint32_t id)1019 tc_is_buffer_bound_for_write(struct threaded_context *tc, uint32_t id)
1020 {
1021    if (tc->seen_streamout_buffers &&
1022        tc_is_buffer_bound_with_mask(id, tc->streamout_buffers,
1023                                     BITFIELD_MASK(PIPE_MAX_SO_BUFFERS)))
1024       return true;
1025 
1026    if (tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_VERTEX) ||
1027        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_FRAGMENT) ||
1028        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_COMPUTE))
1029       return true;
1030 
1031    if (tc->seen_tcs &&
1032        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_CTRL))
1033       return true;
1034 
1035    if (tc->seen_tes &&
1036        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_EVAL))
1037       return true;
1038 
1039    if (tc->seen_gs &&
1040        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_GEOMETRY))
1041       return true;
1042 
1043    return false;
1044 }
1045 
1046 static bool
tc_is_buffer_busy(struct threaded_context * tc,struct threaded_resource * tbuf,unsigned map_usage)1047 tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf,
1048                   unsigned map_usage)
1049 {
1050    if (!tc->options.is_resource_busy)
1051       return true;
1052 
1053    uint32_t id_hash = tbuf->buffer_id_unique & TC_BUFFER_ID_MASK;
1054 
1055    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
1056       struct tc_buffer_list *buf_list = &tc->buffer_lists[i];
1057 
1058       /* If the buffer is referenced by a batch that hasn't been flushed (by tc or the driver),
1059        * then the buffer is considered busy. */
1060       if (!util_queue_fence_is_signalled(&buf_list->driver_flushed_fence) &&
1061           BITSET_TEST(buf_list->buffer_list, id_hash))
1062          return true;
1063    }
1064 
1065    /* The buffer isn't referenced by any unflushed batch: we can safely ask to the driver whether
1066     * this buffer is busy or not. */
1067    return tc->options.is_resource_busy(tc->pipe->screen, tbuf->latest, map_usage);
1068 }
1069 
1070 /**
1071  * allow_cpu_storage should be false for user memory and imported buffers.
1072  */
1073 void
threaded_resource_init(struct pipe_resource * res,bool allow_cpu_storage)1074 threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage)
1075 {
1076    struct threaded_resource *tres = threaded_resource(res);
1077 
1078    tres->latest = &tres->b;
1079    tres->cpu_storage = NULL;
1080    util_range_init(&tres->valid_buffer_range);
1081    tres->is_shared = false;
1082    tres->is_user_ptr = false;
1083    tres->buffer_id_unique = 0;
1084    tres->pending_staging_uploads = 0;
1085    tres->last_batch_usage = -1;
1086    util_range_init(&tres->pending_staging_uploads_range);
1087 
1088    if (allow_cpu_storage &&
1089        !(res->flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
1090                        PIPE_RESOURCE_FLAG_SPARSE |
1091                        PIPE_RESOURCE_FLAG_ENCRYPTED)) &&
1092        /* We need buffer invalidation and buffer busyness tracking for the CPU
1093         * storage, which aren't supported with pipe_vertex_state. */
1094        !(res->bind & PIPE_BIND_VERTEX_STATE))
1095       tres->allow_cpu_storage = true;
1096    else
1097       tres->allow_cpu_storage = false;
1098 }
1099 
1100 void
threaded_resource_deinit(struct pipe_resource * res)1101 threaded_resource_deinit(struct pipe_resource *res)
1102 {
1103    struct threaded_resource *tres = threaded_resource(res);
1104 
1105    if (tres->latest != &tres->b)
1106            pipe_resource_reference(&tres->latest, NULL);
1107    util_range_destroy(&tres->valid_buffer_range);
1108    util_range_destroy(&tres->pending_staging_uploads_range);
1109    align_free(tres->cpu_storage);
1110 }
1111 
1112 struct pipe_context *
threaded_context_unwrap_sync(struct pipe_context * pipe)1113 threaded_context_unwrap_sync(struct pipe_context *pipe)
1114 {
1115    if (!pipe || !pipe->priv)
1116       return pipe;
1117 
1118    tc_sync(threaded_context(pipe));
1119    return (struct pipe_context*)pipe->priv;
1120 }
1121 
1122 
1123 /********************************************************************
1124  * simple functions
1125  */
1126 
1127 #define TC_FUNC1(func, qualifier, type, deref, addr, ...) \
1128    struct tc_call_##func { \
1129       struct tc_call_base base; \
1130       type state; \
1131    }; \
1132    \
1133    static uint16_t \
1134    tc_call_##func(struct pipe_context *pipe, void *call) \
1135    { \
1136       pipe->func(pipe, addr(to_call(call, tc_call_##func)->state)); \
1137       return call_size(tc_call_##func); \
1138    } \
1139    \
1140    static void \
1141    tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
1142    { \
1143       struct threaded_context *tc = threaded_context(_pipe); \
1144       struct tc_call_##func *p = (struct tc_call_##func*) \
1145                      tc_add_call(tc, TC_CALL_##func, tc_call_##func); \
1146       p->state = deref(param); \
1147       __VA_ARGS__; \
1148    }
1149 
1150 TC_FUNC1(set_active_query_state, , bool, , )
1151 
1152 TC_FUNC1(set_blend_color, const, struct pipe_blend_color, *, &)
1153 TC_FUNC1(set_stencil_ref, const, struct pipe_stencil_ref, , )
1154 TC_FUNC1(set_clip_state, const, struct pipe_clip_state, *, &)
1155 TC_FUNC1(set_sample_mask, , unsigned, , )
1156 TC_FUNC1(set_min_samples, , unsigned, , )
1157 TC_FUNC1(set_polygon_stipple, const, struct pipe_poly_stipple, *, &)
1158 
1159 TC_FUNC1(texture_barrier, , unsigned, , )
1160 TC_FUNC1(memory_barrier, , unsigned, , )
1161 TC_FUNC1(delete_texture_handle, , uint64_t, , )
1162 TC_FUNC1(delete_image_handle, , uint64_t, , )
1163 TC_FUNC1(set_frontend_noop, , bool, , )
1164 
1165 
1166 /********************************************************************
1167  * queries
1168  */
1169 
1170 static struct pipe_query *
tc_create_query(struct pipe_context * _pipe,unsigned query_type,unsigned index)1171 tc_create_query(struct pipe_context *_pipe, unsigned query_type,
1172                 unsigned index)
1173 {
1174    struct threaded_context *tc = threaded_context(_pipe);
1175    struct pipe_context *pipe = tc->pipe;
1176 
1177    return pipe->create_query(pipe, query_type, index);
1178 }
1179 
1180 static struct pipe_query *
tc_create_batch_query(struct pipe_context * _pipe,unsigned num_queries,unsigned * query_types)1181 tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
1182                       unsigned *query_types)
1183 {
1184    struct threaded_context *tc = threaded_context(_pipe);
1185    struct pipe_context *pipe = tc->pipe;
1186 
1187    return pipe->create_batch_query(pipe, num_queries, query_types);
1188 }
1189 
1190 struct tc_query_call {
1191    struct tc_call_base base;
1192    struct pipe_query *query;
1193 };
1194 
1195 static uint16_t
tc_call_destroy_query(struct pipe_context * pipe,void * call)1196 tc_call_destroy_query(struct pipe_context *pipe, void *call)
1197 {
1198    struct pipe_query *query = to_call(call, tc_query_call)->query;
1199    struct threaded_query *tq = threaded_query(query);
1200 
1201    if (list_is_linked(&tq->head_unflushed))
1202       list_del(&tq->head_unflushed);
1203 
1204    pipe->destroy_query(pipe, query);
1205    return call_size(tc_query_call);
1206 }
1207 
1208 static void
tc_destroy_query(struct pipe_context * _pipe,struct pipe_query * query)1209 tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
1210 {
1211    struct threaded_context *tc = threaded_context(_pipe);
1212 
1213    tc_add_call(tc, TC_CALL_destroy_query, tc_query_call)->query = query;
1214 }
1215 
1216 static uint16_t
tc_call_begin_query(struct pipe_context * pipe,void * call)1217 tc_call_begin_query(struct pipe_context *pipe, void *call)
1218 {
1219    pipe->begin_query(pipe, to_call(call, tc_query_call)->query);
1220    return call_size(tc_query_call);
1221 }
1222 
1223 static bool
tc_begin_query(struct pipe_context * _pipe,struct pipe_query * query)1224 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
1225 {
1226    struct threaded_context *tc = threaded_context(_pipe);
1227    tc->num_queries_active++;
1228 
1229    tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query;
1230    return true; /* we don't care about the return value for this call */
1231 }
1232 
1233 struct tc_end_query_call {
1234    struct tc_call_base base;
1235    struct threaded_context *tc;
1236    struct pipe_query *query;
1237 };
1238 
1239 static uint16_t
tc_call_end_query(struct pipe_context * pipe,void * call)1240 tc_call_end_query(struct pipe_context *pipe, void *call)
1241 {
1242    struct tc_end_query_call *p = to_call(call, tc_end_query_call);
1243    struct threaded_query *tq = threaded_query(p->query);
1244 
1245    if (!list_is_linked(&tq->head_unflushed))
1246       list_add(&tq->head_unflushed, &p->tc->unflushed_queries);
1247 
1248    pipe->end_query(pipe, p->query);
1249    return call_size(tc_end_query_call);
1250 }
1251 
1252 static bool
tc_end_query(struct pipe_context * _pipe,struct pipe_query * query)1253 tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
1254 {
1255    struct threaded_context *tc = threaded_context(_pipe);
1256    struct threaded_query *tq = threaded_query(query);
1257    struct tc_end_query_call *call =
1258       tc_add_call(tc, TC_CALL_end_query, tc_end_query_call);
1259    tc->num_queries_active--;
1260 
1261    call->tc = tc;
1262    call->query = query;
1263 
1264    tq->flushed = false;
1265    tc->query_ended = true;
1266 
1267    return true; /* we don't care about the return value for this call */
1268 }
1269 
1270 static bool
tc_get_query_result(struct pipe_context * _pipe,struct pipe_query * query,bool wait,union pipe_query_result * result)1271 tc_get_query_result(struct pipe_context *_pipe,
1272                     struct pipe_query *query, bool wait,
1273                     union pipe_query_result *result)
1274 {
1275    struct threaded_context *tc = threaded_context(_pipe);
1276    struct threaded_query *tq = threaded_query(query);
1277    struct pipe_context *pipe = tc->pipe;
1278    bool flushed = tq->flushed;
1279 
1280    if (!flushed) {
1281       tc_sync_msg(tc, wait ? "wait" : "nowait");
1282       tc_set_driver_thread(tc);
1283    }
1284 
1285    bool success = pipe->get_query_result(pipe, query, wait, result);
1286 
1287    if (!flushed)
1288       tc_clear_driver_thread(tc);
1289 
1290    if (success) {
1291       tq->flushed = true;
1292       if (list_is_linked(&tq->head_unflushed)) {
1293          /* This is safe because it can only happen after we sync'd. */
1294          list_del(&tq->head_unflushed);
1295       }
1296    }
1297    return success;
1298 }
1299 
1300 struct tc_query_result_resource {
1301    struct tc_call_base base;
1302    enum pipe_query_flags flags:8;
1303    enum pipe_query_value_type result_type:8;
1304    int8_t index; /* it can be -1 */
1305    unsigned offset;
1306    struct pipe_query *query;
1307    struct pipe_resource *resource;
1308 };
1309 
1310 static uint16_t
tc_call_get_query_result_resource(struct pipe_context * pipe,void * call)1311 tc_call_get_query_result_resource(struct pipe_context *pipe, void *call)
1312 {
1313    struct tc_query_result_resource *p = to_call(call, tc_query_result_resource);
1314 
1315    pipe->get_query_result_resource(pipe, p->query, p->flags, p->result_type,
1316                                    p->index, p->resource, p->offset);
1317    tc_drop_resource_reference(p->resource);
1318    return call_size(tc_query_result_resource);
1319 }
1320 
1321 static void
tc_get_query_result_resource(struct pipe_context * _pipe,struct pipe_query * query,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)1322 tc_get_query_result_resource(struct pipe_context *_pipe,
1323                              struct pipe_query *query,
1324                              enum pipe_query_flags flags,
1325                              enum pipe_query_value_type result_type, int index,
1326                              struct pipe_resource *resource, unsigned offset)
1327 {
1328    struct threaded_context *tc = threaded_context(_pipe);
1329 
1330    tc_buffer_disable_cpu_storage(resource);
1331 
1332    struct tc_query_result_resource *p =
1333       tc_add_call(tc, TC_CALL_get_query_result_resource,
1334                   tc_query_result_resource);
1335    p->query = query;
1336    p->flags = flags;
1337    p->result_type = result_type;
1338    p->index = index;
1339    tc_set_resource_reference(&p->resource, resource);
1340    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
1341    p->offset = offset;
1342 }
1343 
1344 struct tc_render_condition {
1345    struct tc_call_base base;
1346    bool condition;
1347    unsigned mode;
1348    struct pipe_query *query;
1349 };
1350 
1351 static uint16_t
tc_call_render_condition(struct pipe_context * pipe,void * call)1352 tc_call_render_condition(struct pipe_context *pipe, void *call)
1353 {
1354    struct tc_render_condition *p = to_call(call, tc_render_condition);
1355    pipe->render_condition(pipe, p->query, p->condition, p->mode);
1356    return call_size(tc_render_condition);
1357 }
1358 
1359 static void
tc_render_condition(struct pipe_context * _pipe,struct pipe_query * query,bool condition,enum pipe_render_cond_flag mode)1360 tc_render_condition(struct pipe_context *_pipe,
1361                     struct pipe_query *query, bool condition,
1362                     enum pipe_render_cond_flag mode)
1363 {
1364    struct threaded_context *tc = threaded_context(_pipe);
1365    struct tc_render_condition *p =
1366       tc_add_call(tc, TC_CALL_render_condition, tc_render_condition);
1367 
1368    p->query = query;
1369    p->condition = condition;
1370    p->mode = mode;
1371 }
1372 
1373 
1374 /********************************************************************
1375  * constant (immutable) states
1376  */
1377 
1378 #define TC_CSO_CREATE(name, sname) \
1379    static void * \
1380    tc_create_##name##_state(struct pipe_context *_pipe, \
1381                             const struct pipe_##sname##_state *state) \
1382    { \
1383       struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
1384       return pipe->create_##name##_state(pipe, state); \
1385    }
1386 
1387 #define TC_CSO_BIND(name, ...) TC_FUNC1(bind_##name##_state, , void *, , , ##__VA_ARGS__)
1388 #define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, , void *, , )
1389 
1390 #define TC_CSO(name, sname, ...) \
1391    TC_CSO_CREATE(name, sname) \
1392    TC_CSO_BIND(name, ##__VA_ARGS__) \
1393    TC_CSO_DELETE(name)
1394 
1395 #define TC_CSO_WHOLE(name) TC_CSO(name, name)
1396 #define TC_CSO_SHADER(name) TC_CSO(name, shader)
1397 #define TC_CSO_SHADER_TRACK(name) TC_CSO(name, shader, tc->seen_##name = true;)
1398 
1399 TC_CSO_WHOLE(blend)
TC_CSO_WHOLE(rasterizer)1400 TC_CSO_WHOLE(rasterizer)
1401 TC_CSO_CREATE(depth_stencil_alpha, depth_stencil_alpha)
1402 TC_CSO_BIND(depth_stencil_alpha,
1403    if (param && tc->options.parse_renderpass_info) {
1404       /* dsa info is only ever added during a renderpass;
1405        * changes outside of a renderpass reset the data
1406        */
1407       if (!tc->in_renderpass) {
1408          tc_get_renderpass_info(tc)->zsbuf_write_dsa = 0;
1409          tc_get_renderpass_info(tc)->zsbuf_read_dsa = 0;
1410       }
1411       /* let the driver parse its own state */
1412       tc->options.dsa_parse(param, tc_get_renderpass_info(tc));
1413    }
1414 )
1415 TC_CSO_DELETE(depth_stencil_alpha)
1416 TC_CSO_WHOLE(compute)
1417 TC_CSO_CREATE(fs, shader)
1418 TC_CSO_BIND(fs,
1419    if (param && tc->options.parse_renderpass_info) {
1420       /* fs info is only ever added during a renderpass;
1421        * changes outside of a renderpass reset the data
1422        */
1423       if (!tc->in_renderpass) {
1424          tc_get_renderpass_info(tc)->cbuf_fbfetch = 0;
1425          tc_get_renderpass_info(tc)->zsbuf_write_fs = 0;
1426       }
1427       /* let the driver parse its own state */
1428       tc->options.fs_parse(param, tc_get_renderpass_info(tc));
1429    }
1430 )
1431 TC_CSO_DELETE(fs)
1432 TC_CSO_SHADER(vs)
1433 TC_CSO_SHADER_TRACK(gs)
1434 TC_CSO_SHADER_TRACK(tcs)
1435 TC_CSO_SHADER_TRACK(tes)
1436 TC_CSO_CREATE(sampler, sampler)
1437 TC_CSO_DELETE(sampler)
1438 TC_CSO_BIND(vertex_elements)
1439 TC_CSO_DELETE(vertex_elements)
1440 
1441 static void *
1442 tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
1443                                 const struct pipe_vertex_element *elems)
1444 {
1445    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1446 
1447    return pipe->create_vertex_elements_state(pipe, count, elems);
1448 }
1449 
1450 struct tc_sampler_states {
1451    struct tc_call_base base;
1452    uint8_t shader, start, count;
1453    void *slot[0]; /* more will be allocated if needed */
1454 };
1455 
1456 static uint16_t
tc_call_bind_sampler_states(struct pipe_context * pipe,void * call)1457 tc_call_bind_sampler_states(struct pipe_context *pipe, void *call)
1458 {
1459    struct tc_sampler_states *p = (struct tc_sampler_states *)call;
1460 
1461    pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
1462    return p->base.num_slots;
1463 }
1464 
1465 static void
tc_bind_sampler_states(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,void ** states)1466 tc_bind_sampler_states(struct pipe_context *_pipe,
1467                        enum pipe_shader_type shader,
1468                        unsigned start, unsigned count, void **states)
1469 {
1470    if (!count)
1471       return;
1472 
1473    struct threaded_context *tc = threaded_context(_pipe);
1474    struct tc_sampler_states *p =
1475       tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
1476 
1477    p->shader = shader;
1478    p->start = start;
1479    p->count = count;
1480    memcpy(p->slot, states, count * sizeof(states[0]));
1481 }
1482 
1483 static void
tc_link_shader(struct pipe_context * _pipe,void ** shaders)1484 tc_link_shader(struct pipe_context *_pipe, void **shaders)
1485 {
1486    struct threaded_context *tc = threaded_context(_pipe);
1487    tc->pipe->link_shader(tc->pipe, shaders);
1488 }
1489 /********************************************************************
1490  * immediate states
1491  */
1492 
1493 struct tc_framebuffer {
1494    struct tc_call_base base;
1495    struct pipe_framebuffer_state state;
1496 };
1497 
1498 static uint16_t
tc_call_set_framebuffer_state(struct pipe_context * pipe,void * call)1499 tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call)
1500 {
1501    struct pipe_framebuffer_state *p = &to_call(call, tc_framebuffer)->state;
1502 
1503    pipe->set_framebuffer_state(pipe, p);
1504 
1505    unsigned nr_cbufs = p->nr_cbufs;
1506    for (unsigned i = 0; i < nr_cbufs; i++)
1507       tc_drop_surface_reference(p->cbufs[i]);
1508    tc_drop_surface_reference(p->zsbuf);
1509    tc_drop_resource_reference(p->resolve);
1510    return call_size(tc_framebuffer);
1511 }
1512 
1513 static void
tc_set_framebuffer_state(struct pipe_context * _pipe,const struct pipe_framebuffer_state * fb)1514 tc_set_framebuffer_state(struct pipe_context *_pipe,
1515                          const struct pipe_framebuffer_state *fb)
1516 {
1517    struct threaded_context *tc = threaded_context(_pipe);
1518    struct tc_framebuffer *p =
1519       tc_add_call(tc, TC_CALL_set_framebuffer_state, tc_framebuffer);
1520    unsigned nr_cbufs = fb->nr_cbufs;
1521 
1522    p->state.width = fb->width;
1523    p->state.height = fb->height;
1524    p->state.samples = fb->samples;
1525    p->state.layers = fb->layers;
1526    p->state.nr_cbufs = nr_cbufs;
1527    p->state.viewmask = fb->viewmask;
1528 
1529    /* when unbinding, mark attachments as used for the current batch */
1530    for (unsigned i = 0; i < tc->nr_cbufs; i++) {
1531       tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[i], false);
1532       pipe_resource_reference(&tc->fb_resources[i], NULL);
1533    }
1534    tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[PIPE_MAX_COLOR_BUFS], false);
1535    tc_set_resource_batch_usage_persistent(tc, tc->fb_resolve, false);
1536 
1537    for (unsigned i = 0; i < nr_cbufs; i++) {
1538       p->state.cbufs[i] = NULL;
1539       pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
1540       /* full tracking requires storing the fb attachment resources */
1541       if (fb->cbufs[i])
1542          pipe_resource_reference(&tc->fb_resources[i], fb->cbufs[i]->texture);
1543       tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[i], true);
1544    }
1545    tc->nr_cbufs = nr_cbufs;
1546    if (tc->options.parse_renderpass_info) {
1547       /* ensure this is treated as the first fb set if no fb activity has occurred */
1548       if (!tc->renderpass_info_recording->has_draw &&
1549           !tc->renderpass_info_recording->cbuf_clear &&
1550           !tc->renderpass_info_recording->cbuf_load &&
1551           !tc->renderpass_info_recording->zsbuf_load &&
1552           !tc->renderpass_info_recording->zsbuf_clear_partial)
1553          tc->batch_slots[tc->next].first_set_fb = false;
1554       /* store existing zsbuf data for possible persistence */
1555       uint8_t zsbuf = tc->renderpass_info_recording->has_draw ?
1556                       0 :
1557                       tc->renderpass_info_recording->data8[3];
1558       bool zsbuf_changed = tc->fb_resources[PIPE_MAX_COLOR_BUFS] !=
1559                            (fb->zsbuf ? fb->zsbuf->texture : NULL);
1560 
1561       if (tc->seen_fb_state) {
1562          /* this is the end of a renderpass, so increment the renderpass info */
1563          tc_batch_increment_renderpass_info(tc, tc->next, false);
1564          /* if zsbuf hasn't changed (i.e., possibly just adding a color buffer):
1565           * keep zsbuf usage data
1566           */
1567          if (!zsbuf_changed)
1568             tc->renderpass_info_recording->data8[3] = zsbuf;
1569       } else {
1570          /* this is the first time a set_framebuffer_call is triggered;
1571           * just increment the index and keep using the existing info for recording
1572           */
1573          tc->batch_slots[tc->next].renderpass_info_idx = 0;
1574       }
1575       /* future fb state changes will increment the index */
1576       tc->seen_fb_state = true;
1577    }
1578    pipe_resource_reference(&tc->fb_resources[PIPE_MAX_COLOR_BUFS],
1579                            fb->zsbuf ? fb->zsbuf->texture : NULL);
1580    pipe_resource_reference(&tc->fb_resolve, fb->resolve);
1581    tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[PIPE_MAX_COLOR_BUFS], true);
1582    tc_set_resource_batch_usage_persistent(tc, tc->fb_resolve, true);
1583    tc->in_renderpass = false;
1584    p->state.zsbuf = NULL;
1585    pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
1586    p->state.resolve = NULL;
1587    pipe_resource_reference(&p->state.resolve, fb->resolve);
1588 }
1589 
1590 struct tc_tess_state {
1591    struct tc_call_base base;
1592    float state[6];
1593 };
1594 
1595 static uint16_t
tc_call_set_tess_state(struct pipe_context * pipe,void * call)1596 tc_call_set_tess_state(struct pipe_context *pipe, void *call)
1597 {
1598    float *p = to_call(call, tc_tess_state)->state;
1599 
1600    pipe->set_tess_state(pipe, p, p + 4);
1601    return call_size(tc_tess_state);
1602 }
1603 
1604 static void
tc_set_tess_state(struct pipe_context * _pipe,const float default_outer_level[4],const float default_inner_level[2])1605 tc_set_tess_state(struct pipe_context *_pipe,
1606                   const float default_outer_level[4],
1607                   const float default_inner_level[2])
1608 {
1609    struct threaded_context *tc = threaded_context(_pipe);
1610    float *p = tc_add_call(tc, TC_CALL_set_tess_state, tc_tess_state)->state;
1611 
1612    memcpy(p, default_outer_level, 4 * sizeof(float));
1613    memcpy(p + 4, default_inner_level, 2 * sizeof(float));
1614 }
1615 
1616 struct tc_patch_vertices {
1617    struct tc_call_base base;
1618    uint8_t patch_vertices;
1619 };
1620 
1621 static uint16_t
tc_call_set_patch_vertices(struct pipe_context * pipe,void * call)1622 tc_call_set_patch_vertices(struct pipe_context *pipe, void *call)
1623 {
1624    uint8_t patch_vertices = to_call(call, tc_patch_vertices)->patch_vertices;
1625 
1626    pipe->set_patch_vertices(pipe, patch_vertices);
1627    return call_size(tc_patch_vertices);
1628 }
1629 
1630 static void
tc_set_patch_vertices(struct pipe_context * _pipe,uint8_t patch_vertices)1631 tc_set_patch_vertices(struct pipe_context *_pipe, uint8_t patch_vertices)
1632 {
1633    struct threaded_context *tc = threaded_context(_pipe);
1634 
1635    tc_add_call(tc, TC_CALL_set_patch_vertices,
1636                tc_patch_vertices)->patch_vertices = patch_vertices;
1637 }
1638 
1639 struct tc_constant_buffer_base {
1640    struct tc_call_base base;
1641    uint8_t shader, index;
1642    bool is_null;
1643 };
1644 
1645 struct tc_constant_buffer {
1646    struct tc_constant_buffer_base base;
1647    struct pipe_constant_buffer cb;
1648 };
1649 
1650 static uint16_t
tc_call_set_constant_buffer(struct pipe_context * pipe,void * call)1651 tc_call_set_constant_buffer(struct pipe_context *pipe, void *call)
1652 {
1653    struct tc_constant_buffer *p = (struct tc_constant_buffer *)call;
1654 
1655    if (unlikely(p->base.is_null)) {
1656       pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, false, NULL);
1657       return call_size(tc_constant_buffer_base);
1658    }
1659 
1660    pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, true, &p->cb);
1661    return call_size(tc_constant_buffer);
1662 }
1663 
1664 static void
tc_set_constant_buffer(struct pipe_context * _pipe,enum pipe_shader_type shader,uint index,bool take_ownership,const struct pipe_constant_buffer * cb)1665 tc_set_constant_buffer(struct pipe_context *_pipe,
1666                        enum pipe_shader_type shader, uint index,
1667                        bool take_ownership,
1668                        const struct pipe_constant_buffer *cb)
1669 {
1670    struct threaded_context *tc = threaded_context(_pipe);
1671 
1672    if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) {
1673       struct tc_constant_buffer_base *p =
1674          tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer_base);
1675       p->shader = shader;
1676       p->index = index;
1677       p->is_null = true;
1678       tc_unbind_buffer(&tc->const_buffers[shader][index]);
1679       return;
1680    }
1681 
1682    struct pipe_resource *buffer;
1683    unsigned offset;
1684 
1685    if (cb->user_buffer) {
1686       /* This must be done before adding set_constant_buffer, because it could
1687        * generate e.g. transfer_unmap and flush partially-uninitialized
1688        * set_constant_buffer to the driver if it was done afterwards.
1689        */
1690       buffer = NULL;
1691       u_upload_data(tc->base.const_uploader, 0, cb->buffer_size,
1692                     tc->ubo_alignment, cb->user_buffer, &offset, &buffer);
1693       u_upload_unmap(tc->base.const_uploader);
1694       take_ownership = true;
1695    } else {
1696       buffer = cb->buffer;
1697       offset = cb->buffer_offset;
1698    }
1699 
1700    struct tc_constant_buffer *p =
1701       tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer);
1702    p->base.shader = shader;
1703    p->base.index = index;
1704    p->base.is_null = false;
1705    p->cb.user_buffer = NULL;
1706    p->cb.buffer_offset = offset;
1707    p->cb.buffer_size = cb->buffer_size;
1708 
1709    if (take_ownership)
1710       p->cb.buffer = buffer;
1711    else
1712       tc_set_resource_reference(&p->cb.buffer, buffer);
1713 
1714    if (buffer) {
1715       tc_bind_buffer(&tc->const_buffers[shader][index],
1716                      &tc->buffer_lists[tc->next_buf_list], buffer);
1717    } else {
1718       tc_unbind_buffer(&tc->const_buffers[shader][index]);
1719    }
1720 }
1721 
1722 struct tc_inlinable_constants {
1723    struct tc_call_base base;
1724    uint8_t shader;
1725    uint8_t num_values;
1726    uint32_t values[MAX_INLINABLE_UNIFORMS];
1727 };
1728 
1729 static uint16_t
tc_call_set_inlinable_constants(struct pipe_context * pipe,void * call)1730 tc_call_set_inlinable_constants(struct pipe_context *pipe, void *call)
1731 {
1732    struct tc_inlinable_constants *p = to_call(call, tc_inlinable_constants);
1733 
1734    pipe->set_inlinable_constants(pipe, p->shader, p->num_values, p->values);
1735    return call_size(tc_inlinable_constants);
1736 }
1737 
1738 static void
tc_set_inlinable_constants(struct pipe_context * _pipe,enum pipe_shader_type shader,uint num_values,uint32_t * values)1739 tc_set_inlinable_constants(struct pipe_context *_pipe,
1740                            enum pipe_shader_type shader,
1741                            uint num_values, uint32_t *values)
1742 {
1743    struct threaded_context *tc = threaded_context(_pipe);
1744    struct tc_inlinable_constants *p =
1745       tc_add_call(tc, TC_CALL_set_inlinable_constants, tc_inlinable_constants);
1746    p->shader = shader;
1747    p->num_values = num_values;
1748    memcpy(p->values, values, num_values * 4);
1749 }
1750 
1751 struct tc_sample_locations {
1752    struct tc_call_base base;
1753    uint16_t size;
1754    uint8_t slot[0];
1755 };
1756 
1757 
1758 static uint16_t
tc_call_set_sample_locations(struct pipe_context * pipe,void * call)1759 tc_call_set_sample_locations(struct pipe_context *pipe, void *call)
1760 {
1761    struct tc_sample_locations *p = (struct tc_sample_locations *)call;
1762 
1763    pipe->set_sample_locations(pipe, p->size, p->slot);
1764    return p->base.num_slots;
1765 }
1766 
1767 static void
tc_set_sample_locations(struct pipe_context * _pipe,size_t size,const uint8_t * locations)1768 tc_set_sample_locations(struct pipe_context *_pipe, size_t size, const uint8_t *locations)
1769 {
1770    struct threaded_context *tc = threaded_context(_pipe);
1771    struct tc_sample_locations *p =
1772       tc_add_slot_based_call(tc, TC_CALL_set_sample_locations,
1773                              tc_sample_locations, size);
1774 
1775    p->size = size;
1776    memcpy(p->slot, locations, size);
1777 }
1778 
1779 struct tc_scissors {
1780    struct tc_call_base base;
1781    uint8_t start, count;
1782    struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1783 };
1784 
1785 static uint16_t
tc_call_set_scissor_states(struct pipe_context * pipe,void * call)1786 tc_call_set_scissor_states(struct pipe_context *pipe, void *call)
1787 {
1788    struct tc_scissors *p = (struct tc_scissors *)call;
1789 
1790    pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
1791    return p->base.num_slots;
1792 }
1793 
1794 static void
tc_set_scissor_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_scissor_state * states)1795 tc_set_scissor_states(struct pipe_context *_pipe,
1796                       unsigned start, unsigned count,
1797                       const struct pipe_scissor_state *states)
1798 {
1799    struct threaded_context *tc = threaded_context(_pipe);
1800    struct tc_scissors *p =
1801       tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
1802 
1803    p->start = start;
1804    p->count = count;
1805    memcpy(&p->slot, states, count * sizeof(states[0]));
1806 }
1807 
1808 struct tc_viewports {
1809    struct tc_call_base base;
1810    uint8_t start, count;
1811    struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
1812 };
1813 
1814 static uint16_t
tc_call_set_viewport_states(struct pipe_context * pipe,void * call)1815 tc_call_set_viewport_states(struct pipe_context *pipe, void *call)
1816 {
1817    struct tc_viewports *p = (struct tc_viewports *)call;
1818 
1819    pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
1820    return p->base.num_slots;
1821 }
1822 
1823 static void
tc_set_viewport_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_viewport_state * states)1824 tc_set_viewport_states(struct pipe_context *_pipe,
1825                        unsigned start, unsigned count,
1826                        const struct pipe_viewport_state *states)
1827 {
1828    if (!count)
1829       return;
1830 
1831    struct threaded_context *tc = threaded_context(_pipe);
1832    struct tc_viewports *p =
1833       tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
1834 
1835    p->start = start;
1836    p->count = count;
1837    memcpy(&p->slot, states, count * sizeof(states[0]));
1838 }
1839 
1840 struct tc_window_rects {
1841    struct tc_call_base base;
1842    bool include;
1843    uint8_t count;
1844    struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1845 };
1846 
1847 static uint16_t
tc_call_set_window_rectangles(struct pipe_context * pipe,void * call)1848 tc_call_set_window_rectangles(struct pipe_context *pipe, void *call)
1849 {
1850    struct tc_window_rects *p = (struct tc_window_rects *)call;
1851 
1852    pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
1853    return p->base.num_slots;
1854 }
1855 
1856 static void
tc_set_window_rectangles(struct pipe_context * _pipe,bool include,unsigned count,const struct pipe_scissor_state * rects)1857 tc_set_window_rectangles(struct pipe_context *_pipe, bool include,
1858                          unsigned count,
1859                          const struct pipe_scissor_state *rects)
1860 {
1861    struct threaded_context *tc = threaded_context(_pipe);
1862    struct tc_window_rects *p =
1863       tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
1864 
1865    p->include = include;
1866    p->count = count;
1867    memcpy(p->slot, rects, count * sizeof(rects[0]));
1868 }
1869 
1870 struct tc_sampler_views {
1871    struct tc_call_base base;
1872    uint8_t shader, start, count, unbind_num_trailing_slots;
1873    struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
1874 };
1875 
1876 static uint16_t
tc_call_set_sampler_views(struct pipe_context * pipe,void * call)1877 tc_call_set_sampler_views(struct pipe_context *pipe, void *call)
1878 {
1879    struct tc_sampler_views *p = (struct tc_sampler_views *)call;
1880 
1881    pipe->set_sampler_views(pipe, p->shader, p->start, p->count,
1882                            p->unbind_num_trailing_slots, true, p->slot);
1883    return p->base.num_slots;
1884 }
1885 
1886 static void
tc_set_sampler_views(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,struct pipe_sampler_view ** views)1887 tc_set_sampler_views(struct pipe_context *_pipe,
1888                      enum pipe_shader_type shader,
1889                      unsigned start, unsigned count,
1890                      unsigned unbind_num_trailing_slots, bool take_ownership,
1891                      struct pipe_sampler_view **views)
1892 {
1893    if (!count && !unbind_num_trailing_slots)
1894       return;
1895 
1896    struct threaded_context *tc = threaded_context(_pipe);
1897    struct tc_sampler_views *p =
1898       tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views,
1899                              views ? count : 0);
1900 
1901    p->shader = shader;
1902    p->start = start;
1903 
1904    if (views) {
1905       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1906 
1907       p->count = count;
1908       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1909 
1910       if (take_ownership) {
1911          memcpy(p->slot, views, sizeof(*views) * count);
1912 
1913          for (unsigned i = 0; i < count; i++) {
1914             if (views[i]) {
1915                if (views[i]->target == PIPE_BUFFER)
1916                   tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1917                                  views[i]->texture);
1918                else
1919                   tc_set_resource_batch_usage(tc, views[i]->texture);
1920             } else {
1921                tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1922             }
1923          }
1924       } else {
1925          for (unsigned i = 0; i < count; i++) {
1926             p->slot[i] = NULL;
1927             pipe_sampler_view_reference(&p->slot[i], views[i]);
1928 
1929             if (views[i]) {
1930                if (views[i]->target == PIPE_BUFFER)
1931                   tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1932                                  views[i]->texture);
1933                else
1934                   tc_set_resource_batch_usage(tc, views[i]->texture);
1935             } else {
1936                tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1937             }
1938          }
1939       }
1940 
1941       tc_unbind_buffers(&tc->sampler_buffers[shader][start + count],
1942                         unbind_num_trailing_slots);
1943       tc->seen_sampler_buffers[shader] = true;
1944    } else {
1945       p->count = 0;
1946       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1947 
1948       tc_unbind_buffers(&tc->sampler_buffers[shader][start],
1949                         count + unbind_num_trailing_slots);
1950    }
1951 }
1952 
1953 struct tc_shader_images {
1954    struct tc_call_base base;
1955    uint8_t shader, start, count;
1956    uint8_t unbind_num_trailing_slots;
1957    struct pipe_image_view slot[0]; /* more will be allocated if needed */
1958 };
1959 
1960 static uint16_t
tc_call_set_shader_images(struct pipe_context * pipe,void * call)1961 tc_call_set_shader_images(struct pipe_context *pipe, void *call)
1962 {
1963    struct tc_shader_images *p = (struct tc_shader_images *)call;
1964    unsigned count = p->count;
1965 
1966    if (!p->count) {
1967       pipe->set_shader_images(pipe, p->shader, p->start, 0,
1968                               p->unbind_num_trailing_slots, NULL);
1969       return call_size(tc_shader_images);
1970    }
1971 
1972    pipe->set_shader_images(pipe, p->shader, p->start, p->count,
1973                            p->unbind_num_trailing_slots, p->slot);
1974 
1975    for (unsigned i = 0; i < count; i++)
1976       tc_drop_resource_reference(p->slot[i].resource);
1977 
1978    return p->base.num_slots;
1979 }
1980 
1981 static void
tc_set_shader_images(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,const struct pipe_image_view * images)1982 tc_set_shader_images(struct pipe_context *_pipe,
1983                      enum pipe_shader_type shader,
1984                      unsigned start, unsigned count,
1985                      unsigned unbind_num_trailing_slots,
1986                      const struct pipe_image_view *images)
1987 {
1988    if (!count && !unbind_num_trailing_slots)
1989       return;
1990 
1991    struct threaded_context *tc = threaded_context(_pipe);
1992    struct tc_shader_images *p =
1993       tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images,
1994                              images ? count : 0);
1995    unsigned writable_buffers = 0;
1996 
1997    p->shader = shader;
1998    p->start = start;
1999 
2000    if (images) {
2001       p->count = count;
2002       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
2003 
2004       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2005 
2006       for (unsigned i = 0; i < count; i++) {
2007          struct pipe_resource *resource = images[i].resource;
2008 
2009          tc_set_resource_reference(&p->slot[i].resource, resource);
2010 
2011          if (resource) {
2012             if (resource->target == PIPE_BUFFER) {
2013                tc_bind_buffer(&tc->image_buffers[shader][start + i], next, resource);
2014 
2015                if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) {
2016                   struct threaded_resource *tres = threaded_resource(resource);
2017 
2018                   tc_buffer_disable_cpu_storage(resource);
2019                   util_range_add(&tres->b, &tres->valid_buffer_range,
2020                                  images[i].u.buf.offset,
2021                                  images[i].u.buf.offset + images[i].u.buf.size);
2022                   writable_buffers |= BITFIELD_BIT(start + i);
2023                }
2024             } else {
2025                tc_set_resource_batch_usage(tc, resource);
2026             }
2027          } else {
2028             tc_unbind_buffer(&tc->image_buffers[shader][start + i]);
2029          }
2030       }
2031       memcpy(p->slot, images, count * sizeof(images[0]));
2032 
2033       tc_unbind_buffers(&tc->image_buffers[shader][start + count],
2034                         unbind_num_trailing_slots);
2035       tc->seen_image_buffers[shader] = true;
2036    } else {
2037       p->count = 0;
2038       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
2039 
2040       tc_unbind_buffers(&tc->image_buffers[shader][start],
2041                         count + unbind_num_trailing_slots);
2042    }
2043 
2044    tc->image_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
2045    tc->image_buffers_writeable_mask[shader] |= writable_buffers;
2046 }
2047 
2048 struct tc_shader_buffers {
2049    struct tc_call_base base;
2050    uint8_t shader, start, count;
2051    bool unbind;
2052    unsigned writable_bitmask;
2053    struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
2054 };
2055 
2056 static uint16_t
tc_call_set_shader_buffers(struct pipe_context * pipe,void * call)2057 tc_call_set_shader_buffers(struct pipe_context *pipe, void *call)
2058 {
2059    struct tc_shader_buffers *p = (struct tc_shader_buffers *)call;
2060    unsigned count = p->count;
2061 
2062    if (p->unbind) {
2063       pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL, 0);
2064       return call_size(tc_shader_buffers);
2065    }
2066 
2067    pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot,
2068                             p->writable_bitmask);
2069 
2070    for (unsigned i = 0; i < count; i++)
2071       tc_drop_resource_reference(p->slot[i].buffer);
2072 
2073    return p->base.num_slots;
2074 }
2075 
2076 static void
tc_set_shader_buffers(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,const struct pipe_shader_buffer * buffers,unsigned writable_bitmask)2077 tc_set_shader_buffers(struct pipe_context *_pipe,
2078                       enum pipe_shader_type shader,
2079                       unsigned start, unsigned count,
2080                       const struct pipe_shader_buffer *buffers,
2081                       unsigned writable_bitmask)
2082 {
2083    if (!count)
2084       return;
2085 
2086    struct threaded_context *tc = threaded_context(_pipe);
2087    struct tc_shader_buffers *p =
2088       tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers,
2089                              buffers ? count : 0);
2090 
2091    p->shader = shader;
2092    p->start = start;
2093    p->count = count;
2094    p->unbind = buffers == NULL;
2095    p->writable_bitmask = writable_bitmask;
2096 
2097    if (buffers) {
2098       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2099 
2100       for (unsigned i = 0; i < count; i++) {
2101          struct pipe_shader_buffer *dst = &p->slot[i];
2102          const struct pipe_shader_buffer *src = buffers + i;
2103 
2104          tc_set_resource_reference(&dst->buffer, src->buffer);
2105          dst->buffer_offset = src->buffer_offset;
2106          dst->buffer_size = src->buffer_size;
2107 
2108          if (src->buffer) {
2109             struct threaded_resource *tres = threaded_resource(src->buffer);
2110 
2111             tc_bind_buffer(&tc->shader_buffers[shader][start + i], next, &tres->b);
2112 
2113             if (writable_bitmask & BITFIELD_BIT(i)) {
2114                tc_buffer_disable_cpu_storage(src->buffer);
2115                util_range_add(&tres->b, &tres->valid_buffer_range,
2116                               src->buffer_offset,
2117                               src->buffer_offset + src->buffer_size);
2118             }
2119          } else {
2120             tc_unbind_buffer(&tc->shader_buffers[shader][start + i]);
2121          }
2122       }
2123       tc->seen_shader_buffers[shader] = true;
2124    } else {
2125       tc_unbind_buffers(&tc->shader_buffers[shader][start], count);
2126    }
2127 
2128    tc->shader_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
2129    tc->shader_buffers_writeable_mask[shader] |= writable_bitmask << start;
2130 }
2131 
2132 static uint16_t
tc_call_set_vertex_buffers(struct pipe_context * pipe,void * call)2133 tc_call_set_vertex_buffers(struct pipe_context *pipe, void *call)
2134 {
2135    struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)call;
2136    unsigned count = p->count;
2137 
2138    for (unsigned i = 0; i < count; i++)
2139       tc_assert(!p->slot[i].is_user_buffer);
2140 
2141    pipe->set_vertex_buffers(pipe, count, p->slot);
2142    return p->base.num_slots;
2143 }
2144 
2145 static void
tc_set_vertex_buffers(struct pipe_context * _pipe,unsigned count,const struct pipe_vertex_buffer * buffers)2146 tc_set_vertex_buffers(struct pipe_context *_pipe, unsigned count,
2147                       const struct pipe_vertex_buffer *buffers)
2148 {
2149    struct threaded_context *tc = threaded_context(_pipe);
2150 
2151    assert(!count || buffers);
2152 
2153    if (count) {
2154       struct tc_vertex_buffers *p =
2155          tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
2156       p->count = count;
2157 
2158       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2159 
2160       memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer));
2161 
2162       for (unsigned i = 0; i < count; i++) {
2163          struct pipe_resource *buf = buffers[i].buffer.resource;
2164 
2165          if (buf) {
2166             tc_bind_buffer(&tc->vertex_buffers[i], next, buf);
2167          } else {
2168             tc_unbind_buffer(&tc->vertex_buffers[i]);
2169          }
2170       }
2171    } else {
2172       struct tc_vertex_buffers *p =
2173          tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
2174       p->count = 0;
2175    }
2176 
2177    /* We don't need to unbind trailing buffers because we never touch bindings
2178     * after num_vertex_buffers.
2179     */
2180    tc->num_vertex_buffers = count;
2181 }
2182 
2183 struct pipe_vertex_buffer *
tc_add_set_vertex_buffers_call(struct pipe_context * _pipe,unsigned count)2184 tc_add_set_vertex_buffers_call(struct pipe_context *_pipe, unsigned count)
2185 {
2186    struct threaded_context *tc = threaded_context(_pipe);
2187 
2188    /* We don't need to unbind trailing buffers because we never touch bindings
2189     * after num_vertex_buffers.
2190     */
2191    tc->num_vertex_buffers = count;
2192 
2193    struct tc_vertex_buffers *p =
2194       tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
2195    p->count = count;
2196    return p->slot;
2197 }
2198 
2199 struct tc_stream_outputs {
2200    struct tc_call_base base;
2201    unsigned count;
2202    struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
2203    unsigned offsets[PIPE_MAX_SO_BUFFERS];
2204 };
2205 
2206 static uint16_t
tc_call_set_stream_output_targets(struct pipe_context * pipe,void * call)2207 tc_call_set_stream_output_targets(struct pipe_context *pipe, void *call)
2208 {
2209    struct tc_stream_outputs *p = to_call(call, tc_stream_outputs);
2210    unsigned count = p->count;
2211 
2212    pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets);
2213    for (unsigned i = 0; i < count; i++)
2214       tc_drop_so_target_reference(p->targets[i]);
2215 
2216    return call_size(tc_stream_outputs);
2217 }
2218 
2219 static void
tc_set_stream_output_targets(struct pipe_context * _pipe,unsigned count,struct pipe_stream_output_target ** tgs,const unsigned * offsets)2220 tc_set_stream_output_targets(struct pipe_context *_pipe,
2221                              unsigned count,
2222                              struct pipe_stream_output_target **tgs,
2223                              const unsigned *offsets)
2224 {
2225    struct threaded_context *tc = threaded_context(_pipe);
2226    struct tc_stream_outputs *p =
2227       tc_add_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs);
2228    struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2229 
2230    for (unsigned i = 0; i < count; i++) {
2231       p->targets[i] = NULL;
2232       pipe_so_target_reference(&p->targets[i], tgs[i]);
2233       if (tgs[i]) {
2234          tc_buffer_disable_cpu_storage(tgs[i]->buffer);
2235          tc_bind_buffer(&tc->streamout_buffers[i], next, tgs[i]->buffer);
2236       } else {
2237          tc_unbind_buffer(&tc->streamout_buffers[i]);
2238       }
2239    }
2240    p->count = count;
2241    memcpy(p->offsets, offsets, count * sizeof(unsigned));
2242 
2243    tc_unbind_buffers(&tc->streamout_buffers[count], PIPE_MAX_SO_BUFFERS - count);
2244    if (count)
2245       tc->seen_streamout_buffers = true;
2246 }
2247 
2248 static void
tc_set_compute_resources(struct pipe_context * _pipe,unsigned start,unsigned count,struct pipe_surface ** resources)2249 tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
2250                          unsigned count, struct pipe_surface **resources)
2251 {
2252    struct threaded_context *tc = threaded_context(_pipe);
2253    struct pipe_context *pipe = tc->pipe;
2254 
2255    tc_sync(tc);
2256    pipe->set_compute_resources(pipe, start, count, resources);
2257 }
2258 
2259 static void
tc_set_global_binding(struct pipe_context * _pipe,unsigned first,unsigned count,struct pipe_resource ** resources,uint32_t ** handles)2260 tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
2261                       unsigned count, struct pipe_resource **resources,
2262                       uint32_t **handles)
2263 {
2264    struct threaded_context *tc = threaded_context(_pipe);
2265    struct pipe_context *pipe = tc->pipe;
2266 
2267    tc_sync(tc);
2268    pipe->set_global_binding(pipe, first, count, resources, handles);
2269 }
2270 
2271 
2272 /********************************************************************
2273  * views
2274  */
2275 
2276 static struct pipe_surface *
tc_create_surface(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_surface * surf_tmpl)2277 tc_create_surface(struct pipe_context *_pipe,
2278                   struct pipe_resource *resource,
2279                   const struct pipe_surface *surf_tmpl)
2280 {
2281    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2282    struct pipe_surface *view =
2283          pipe->create_surface(pipe, resource, surf_tmpl);
2284 
2285    if (view)
2286       view->context = _pipe;
2287    return view;
2288 }
2289 
2290 static void
tc_surface_destroy(struct pipe_context * _pipe,struct pipe_surface * surf)2291 tc_surface_destroy(struct pipe_context *_pipe,
2292                    struct pipe_surface *surf)
2293 {
2294    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2295 
2296    pipe->surface_destroy(pipe, surf);
2297 }
2298 
2299 static struct pipe_sampler_view *
tc_create_sampler_view(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_sampler_view * templ)2300 tc_create_sampler_view(struct pipe_context *_pipe,
2301                        struct pipe_resource *resource,
2302                        const struct pipe_sampler_view *templ)
2303 {
2304    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2305    struct pipe_sampler_view *view =
2306          pipe->create_sampler_view(pipe, resource, templ);
2307 
2308    if (view)
2309       view->context = _pipe;
2310    return view;
2311 }
2312 
2313 static void
tc_sampler_view_destroy(struct pipe_context * _pipe,struct pipe_sampler_view * view)2314 tc_sampler_view_destroy(struct pipe_context *_pipe,
2315                         struct pipe_sampler_view *view)
2316 {
2317    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2318 
2319    pipe->sampler_view_destroy(pipe, view);
2320 }
2321 
2322 static struct pipe_stream_output_target *
tc_create_stream_output_target(struct pipe_context * _pipe,struct pipe_resource * res,unsigned buffer_offset,unsigned buffer_size)2323 tc_create_stream_output_target(struct pipe_context *_pipe,
2324                                struct pipe_resource *res,
2325                                unsigned buffer_offset,
2326                                unsigned buffer_size)
2327 {
2328    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2329    struct threaded_resource *tres = threaded_resource(res);
2330    struct pipe_stream_output_target *view;
2331 
2332    util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset,
2333                   buffer_offset + buffer_size);
2334 
2335    view = pipe->create_stream_output_target(pipe, res, buffer_offset,
2336                                             buffer_size);
2337    if (view)
2338       view->context = _pipe;
2339    return view;
2340 }
2341 
2342 static void
tc_stream_output_target_destroy(struct pipe_context * _pipe,struct pipe_stream_output_target * target)2343 tc_stream_output_target_destroy(struct pipe_context *_pipe,
2344                                 struct pipe_stream_output_target *target)
2345 {
2346    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2347 
2348    pipe->stream_output_target_destroy(pipe, target);
2349 }
2350 
2351 
2352 /********************************************************************
2353  * bindless
2354  */
2355 
2356 static uint64_t
tc_create_texture_handle(struct pipe_context * _pipe,struct pipe_sampler_view * view,const struct pipe_sampler_state * state)2357 tc_create_texture_handle(struct pipe_context *_pipe,
2358                          struct pipe_sampler_view *view,
2359                          const struct pipe_sampler_state *state)
2360 {
2361    struct threaded_context *tc = threaded_context(_pipe);
2362    struct pipe_context *pipe = tc->pipe;
2363 
2364    tc_sync(tc);
2365    return pipe->create_texture_handle(pipe, view, state);
2366 }
2367 
2368 struct tc_make_texture_handle_resident {
2369    struct tc_call_base base;
2370    bool resident;
2371    uint64_t handle;
2372 };
2373 
2374 static uint16_t
tc_call_make_texture_handle_resident(struct pipe_context * pipe,void * call)2375 tc_call_make_texture_handle_resident(struct pipe_context *pipe, void *call)
2376 {
2377    struct tc_make_texture_handle_resident *p =
2378       to_call(call, tc_make_texture_handle_resident);
2379 
2380    pipe->make_texture_handle_resident(pipe, p->handle, p->resident);
2381    return call_size(tc_make_texture_handle_resident);
2382 }
2383 
2384 static void
tc_make_texture_handle_resident(struct pipe_context * _pipe,uint64_t handle,bool resident)2385 tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle,
2386                                 bool resident)
2387 {
2388    struct threaded_context *tc = threaded_context(_pipe);
2389    struct tc_make_texture_handle_resident *p =
2390       tc_add_call(tc, TC_CALL_make_texture_handle_resident,
2391                   tc_make_texture_handle_resident);
2392 
2393    p->handle = handle;
2394    p->resident = resident;
2395 }
2396 
2397 static uint64_t
tc_create_image_handle(struct pipe_context * _pipe,const struct pipe_image_view * image)2398 tc_create_image_handle(struct pipe_context *_pipe,
2399                        const struct pipe_image_view *image)
2400 {
2401    struct threaded_context *tc = threaded_context(_pipe);
2402    struct pipe_context *pipe = tc->pipe;
2403 
2404    if (image->resource->target == PIPE_BUFFER)
2405       tc_buffer_disable_cpu_storage(image->resource);
2406 
2407    tc_sync(tc);
2408    return pipe->create_image_handle(pipe, image);
2409 }
2410 
2411 struct tc_make_image_handle_resident {
2412    struct tc_call_base base;
2413    bool resident;
2414    unsigned access;
2415    uint64_t handle;
2416 };
2417 
2418 static uint16_t
tc_call_make_image_handle_resident(struct pipe_context * pipe,void * call)2419 tc_call_make_image_handle_resident(struct pipe_context *pipe, void *call)
2420 {
2421    struct tc_make_image_handle_resident *p =
2422       to_call(call, tc_make_image_handle_resident);
2423 
2424    pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident);
2425    return call_size(tc_make_image_handle_resident);
2426 }
2427 
2428 static void
tc_make_image_handle_resident(struct pipe_context * _pipe,uint64_t handle,unsigned access,bool resident)2429 tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle,
2430                               unsigned access, bool resident)
2431 {
2432    struct threaded_context *tc = threaded_context(_pipe);
2433    struct tc_make_image_handle_resident *p =
2434       tc_add_call(tc, TC_CALL_make_image_handle_resident,
2435                   tc_make_image_handle_resident);
2436 
2437    p->handle = handle;
2438    p->access = access;
2439    p->resident = resident;
2440 }
2441 
2442 
2443 /********************************************************************
2444  * transfer
2445  */
2446 
2447 static void
2448 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2449          unsigned flags);
2450 
2451 struct tc_replace_buffer_storage {
2452    struct tc_call_base base;
2453    uint16_t num_rebinds;
2454    uint32_t rebind_mask;
2455    uint32_t delete_buffer_id;
2456    struct pipe_resource *dst;
2457    struct pipe_resource *src;
2458    tc_replace_buffer_storage_func func;
2459 };
2460 
2461 static uint16_t
tc_call_replace_buffer_storage(struct pipe_context * pipe,void * call)2462 tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call)
2463 {
2464    struct tc_replace_buffer_storage *p = to_call(call, tc_replace_buffer_storage);
2465 
2466    p->func(pipe, p->dst, p->src, p->num_rebinds, p->rebind_mask, p->delete_buffer_id);
2467 
2468    tc_drop_resource_reference(p->dst);
2469    tc_drop_resource_reference(p->src);
2470    return call_size(tc_replace_buffer_storage);
2471 }
2472 
2473 /* Return true if the buffer has been invalidated or is idle. */
2474 static bool
tc_invalidate_buffer(struct threaded_context * tc,struct threaded_resource * tbuf)2475 tc_invalidate_buffer(struct threaded_context *tc,
2476                      struct threaded_resource *tbuf)
2477 {
2478    if (!tc_is_buffer_busy(tc, tbuf, PIPE_MAP_READ_WRITE)) {
2479       /* It's idle, so invalidation would be a no-op, but we can still clear
2480        * the valid range because we are technically doing invalidation, but
2481        * skipping it because it's useless.
2482        *
2483        * If the buffer is bound for write, we can't invalidate the range.
2484        */
2485       if (!tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique))
2486          util_range_set_empty(&tbuf->valid_buffer_range);
2487       return true;
2488    }
2489 
2490    struct pipe_screen *screen = tc->base.screen;
2491    struct pipe_resource *new_buf;
2492 
2493    /* Shared, pinned, and sparse buffers can't be reallocated. */
2494    if (tbuf->is_shared ||
2495        tbuf->is_user_ptr ||
2496        tbuf->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE))
2497       return false;
2498 
2499    assert(tbuf->b.target == PIPE_BUFFER);
2500    tc->bytes_replaced_estimate += tbuf->b.width0;
2501 
2502    if (tc->bytes_replaced_limit && (tc->bytes_replaced_estimate > tc->bytes_replaced_limit)) {
2503       tc_flush(&tc->base, NULL, PIPE_FLUSH_ASYNC);
2504    }
2505 
2506    /* Allocate a new one. */
2507    new_buf = screen->resource_create(screen, &tbuf->b);
2508    if (!new_buf)
2509       return false;
2510 
2511    /* Replace the "latest" pointer. */
2512    if (tbuf->latest != &tbuf->b)
2513       pipe_resource_reference(&tbuf->latest, NULL);
2514 
2515    tbuf->latest = new_buf;
2516 
2517    uint32_t delete_buffer_id = tbuf->buffer_id_unique;
2518 
2519    /* Enqueue storage replacement of the original buffer. */
2520    struct tc_replace_buffer_storage *p =
2521       tc_add_call(tc, TC_CALL_replace_buffer_storage,
2522                   tc_replace_buffer_storage);
2523 
2524    p->func = tc->replace_buffer_storage;
2525    tc_set_resource_reference(&p->dst, &tbuf->b);
2526    tc_set_resource_reference(&p->src, new_buf);
2527    p->delete_buffer_id = delete_buffer_id;
2528    p->rebind_mask = 0;
2529 
2530    /* Treat the current buffer as the new buffer. */
2531    bool bound_for_write = tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique);
2532    p->num_rebinds = tc_rebind_buffer(tc, tbuf->buffer_id_unique,
2533                                      threaded_resource(new_buf)->buffer_id_unique,
2534                                      &p->rebind_mask);
2535 
2536    /* If the buffer is not bound for write, clear the valid range. */
2537    if (!bound_for_write)
2538       util_range_set_empty(&tbuf->valid_buffer_range);
2539 
2540    tbuf->buffer_id_unique = threaded_resource(new_buf)->buffer_id_unique;
2541    threaded_resource(new_buf)->buffer_id_unique = 0;
2542 
2543    return true;
2544 }
2545 
2546 static unsigned
tc_improve_map_buffer_flags(struct threaded_context * tc,struct threaded_resource * tres,unsigned usage,unsigned offset,unsigned size)2547 tc_improve_map_buffer_flags(struct threaded_context *tc,
2548                             struct threaded_resource *tres, unsigned usage,
2549                             unsigned offset, unsigned size)
2550 {
2551    /* Never invalidate inside the driver and never infer "unsynchronized". */
2552    unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE |
2553                        TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
2554 
2555    /* Prevent a reentry. */
2556    if (usage & tc_flags)
2557       return usage;
2558 
2559    /* Use the staging upload if it's preferred. */
2560    if (usage & (PIPE_MAP_DISCARD_RANGE |
2561                 PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
2562        !(usage & PIPE_MAP_PERSISTENT) &&
2563        tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY &&
2564        tc->use_forced_staging_uploads) {
2565       usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE |
2566                  PIPE_MAP_UNSYNCHRONIZED);
2567 
2568       return usage | tc_flags | PIPE_MAP_DISCARD_RANGE;
2569    }
2570 
2571    /* Sparse buffers can't be mapped directly and can't be reallocated
2572     * (fully invalidated). That may just be a radeonsi limitation, but
2573     * the threaded context must obey it with radeonsi.
2574     */
2575    if (tres->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) {
2576       /* We can use DISCARD_RANGE instead of full discard. This is the only
2577        * fast path for sparse buffers that doesn't need thread synchronization.
2578        */
2579       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
2580          usage |= PIPE_MAP_DISCARD_RANGE;
2581 
2582       /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
2583        * The threaded context doesn't do unsychronized mappings and invalida-
2584        * tions of sparse buffers, therefore a correct driver behavior won't
2585        * result in an incorrect behavior with the threaded context.
2586        */
2587       return usage;
2588    }
2589 
2590    usage |= tc_flags;
2591 
2592    /* Handle CPU reads trivially. */
2593    if (usage & PIPE_MAP_READ) {
2594       if (usage & PIPE_MAP_UNSYNCHRONIZED)
2595          usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* don't sync */
2596 
2597       /* Drivers aren't allowed to do buffer invalidations. */
2598       return usage & ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2599    }
2600 
2601    /* See if the buffer range being mapped has never been initialized or
2602     * the buffer is idle, in which case it can be mapped unsynchronized. */
2603    if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
2604        ((!tres->is_shared &&
2605          !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) ||
2606         !tc_is_buffer_busy(tc, tres, usage)))
2607       usage |= PIPE_MAP_UNSYNCHRONIZED;
2608 
2609    if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
2610       /* If discarding the entire valid range, discard the whole resource instead. */
2611       if (usage & PIPE_MAP_DISCARD_RANGE &&
2612           util_ranges_covered(&tres->valid_buffer_range, offset, offset + size))
2613          usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2614 
2615       /* Discard the whole resource if needed. */
2616       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
2617          if (tc_invalidate_buffer(tc, tres))
2618             usage |= PIPE_MAP_UNSYNCHRONIZED;
2619          else
2620             usage |= PIPE_MAP_DISCARD_RANGE; /* fallback */
2621       }
2622    }
2623 
2624    /* We won't need this flag anymore. */
2625    /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
2626    usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2627 
2628    /* GL_AMD_pinned_memory and persistent mappings can't use staging
2629     * buffers. */
2630    if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2631                 PIPE_MAP_PERSISTENT) ||
2632        tres->is_user_ptr)
2633       usage &= ~PIPE_MAP_DISCARD_RANGE;
2634 
2635    /* Unsychronized buffer mappings don't have to synchronize the thread. */
2636    if (usage & PIPE_MAP_UNSYNCHRONIZED) {
2637       usage &= ~PIPE_MAP_DISCARD_RANGE;
2638       usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
2639    }
2640 
2641    return usage;
2642 }
2643 
2644 static void *
tc_buffer_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2645 tc_buffer_map(struct pipe_context *_pipe,
2646               struct pipe_resource *resource, unsigned level,
2647               unsigned usage, const struct pipe_box *box,
2648               struct pipe_transfer **transfer)
2649 {
2650    struct threaded_context *tc = threaded_context(_pipe);
2651    struct threaded_resource *tres = threaded_resource(resource);
2652    struct pipe_context *pipe = tc->pipe;
2653 
2654    /* PIPE_MAP_THREAD_SAFE is for glthread, which shouldn't use the CPU storage and
2655     * this shouldn't normally be necessary because glthread only uses large buffers.
2656     */
2657    if (usage & PIPE_MAP_THREAD_SAFE)
2658       tc_buffer_disable_cpu_storage(resource);
2659 
2660    usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
2661 
2662    /* If the CPU storage is enabled, return it directly. */
2663    if (tres->allow_cpu_storage && !(usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2664       /* We can't let resource_copy_region disable the CPU storage. */
2665       assert(!(tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY));
2666 
2667       if (!tres->cpu_storage) {
2668          tres->cpu_storage = align_malloc(resource->width0, tc->map_buffer_alignment);
2669 
2670          if (tres->cpu_storage && tres->valid_buffer_range.end) {
2671             /* The GPU buffer contains valid data. Copy them to the CPU storage. */
2672             struct pipe_box box2;
2673             struct pipe_transfer *transfer2;
2674 
2675             unsigned valid_range_len = tres->valid_buffer_range.end - tres->valid_buffer_range.start;
2676             u_box_1d(tres->valid_buffer_range.start, valid_range_len, &box2);
2677 
2678             tc_sync_msg(tc, "cpu storage GPU -> CPU copy");
2679             tc_set_driver_thread(tc);
2680 
2681             void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2682                                          0, PIPE_MAP_READ, &box2, &transfer2);
2683             memcpy(&((uint8_t*)tres->cpu_storage)[tres->valid_buffer_range.start],
2684                    ret,
2685                    valid_range_len);
2686             pipe->buffer_unmap(pipe, transfer2);
2687 
2688             tc_clear_driver_thread(tc);
2689          }
2690       }
2691 
2692       if (tres->cpu_storage) {
2693          struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2694          ttrans->b.resource = resource;
2695          ttrans->b.usage = usage;
2696          ttrans->b.box = *box;
2697          ttrans->valid_buffer_range = &tres->valid_buffer_range;
2698          ttrans->cpu_storage_mapped = true;
2699          *transfer = &ttrans->b;
2700 
2701          return (uint8_t*)tres->cpu_storage + box->x;
2702       } else {
2703          tres->allow_cpu_storage = false;
2704       }
2705    }
2706 
2707    /* Do a staging transfer within the threaded context. The driver should
2708     * only get resource_copy_region.
2709     */
2710    if (usage & PIPE_MAP_DISCARD_RANGE) {
2711       struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2712       uint8_t *map;
2713 
2714       u_upload_alloc(tc->base.stream_uploader, 0,
2715                      box->width + (box->x % tc->map_buffer_alignment),
2716                      tc->map_buffer_alignment, &ttrans->b.offset,
2717                      &ttrans->staging, (void**)&map);
2718       if (!map) {
2719          slab_free(&tc->pool_transfers, ttrans);
2720          return NULL;
2721       }
2722 
2723       ttrans->b.resource = resource;
2724       ttrans->b.level = 0;
2725       ttrans->b.usage = usage;
2726       ttrans->b.box = *box;
2727       ttrans->b.stride = 0;
2728       ttrans->b.layer_stride = 0;
2729       ttrans->valid_buffer_range = &tres->valid_buffer_range;
2730       ttrans->cpu_storage_mapped = false;
2731       *transfer = &ttrans->b;
2732 
2733       p_atomic_inc(&tres->pending_staging_uploads);
2734       util_range_add(resource, &tres->pending_staging_uploads_range,
2735                      box->x, box->x + box->width);
2736 
2737       return map + (box->x % tc->map_buffer_alignment);
2738    }
2739 
2740    if (usage & PIPE_MAP_UNSYNCHRONIZED &&
2741        p_atomic_read(&tres->pending_staging_uploads) &&
2742        util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) {
2743       /* Write conflict detected between a staging transfer and the direct mapping we're
2744        * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping
2745        * will have to wait for the staging transfer completion.
2746        * Note: The conflict detection is only based on the mapped range, not on the actual
2747        * written range(s).
2748        */
2749       usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC;
2750       tc->use_forced_staging_uploads = false;
2751    }
2752 
2753    /* Unsychronized buffer mappings don't have to synchronize the thread. */
2754    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) {
2755       tc_sync_msg(tc, usage & PIPE_MAP_DISCARD_RANGE ? "  discard_range" :
2756                       usage & PIPE_MAP_READ ? "  read" : "  staging conflict");
2757       tc_set_driver_thread(tc);
2758    }
2759 
2760    tc->bytes_mapped_estimate += box->width;
2761 
2762    void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2763                                 level, usage, box, transfer);
2764    threaded_transfer(*transfer)->valid_buffer_range = &tres->valid_buffer_range;
2765    threaded_transfer(*transfer)->cpu_storage_mapped = false;
2766 
2767    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2768       tc_clear_driver_thread(tc);
2769 
2770    return ret;
2771 }
2772 
2773 static void *
tc_texture_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2774 tc_texture_map(struct pipe_context *_pipe,
2775                struct pipe_resource *resource, unsigned level,
2776                unsigned usage, const struct pipe_box *box,
2777                struct pipe_transfer **transfer)
2778 {
2779    struct threaded_context *tc = threaded_context(_pipe);
2780    struct threaded_resource *tres = threaded_resource(resource);
2781    struct pipe_context *pipe = tc->pipe;
2782 
2783    tc_sync_msg(tc, "texture");
2784    tc_set_driver_thread(tc);
2785    /* block all unsync texture subdata during map */
2786    tc_set_resource_batch_usage_persistent(tc, resource, true);
2787 
2788    tc->bytes_mapped_estimate += box->width;
2789 
2790    void *ret = pipe->texture_map(pipe, tres->latest ? tres->latest : resource,
2791                                  level, usage, box, transfer);
2792 
2793    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2794       tc_clear_driver_thread(tc);
2795 
2796    return ret;
2797 }
2798 
2799 struct tc_transfer_flush_region {
2800    struct tc_call_base base;
2801    struct pipe_box box;
2802    struct pipe_transfer *transfer;
2803 };
2804 
2805 static uint16_t
tc_call_transfer_flush_region(struct pipe_context * pipe,void * call)2806 tc_call_transfer_flush_region(struct pipe_context *pipe, void *call)
2807 {
2808    struct tc_transfer_flush_region *p = to_call(call, tc_transfer_flush_region);
2809 
2810    pipe->transfer_flush_region(pipe, p->transfer, &p->box);
2811    return call_size(tc_transfer_flush_region);
2812 }
2813 
2814 struct tc_resource_copy_region {
2815    struct tc_call_base base;
2816    unsigned dst_level;
2817    unsigned dstx, dsty, dstz;
2818    unsigned src_level;
2819    struct pipe_box src_box;
2820    struct pipe_resource *dst;
2821    struct pipe_resource *src;
2822 };
2823 
2824 static void
2825 tc_resource_copy_region(struct pipe_context *_pipe,
2826                         struct pipe_resource *dst, unsigned dst_level,
2827                         unsigned dstx, unsigned dsty, unsigned dstz,
2828                         struct pipe_resource *src, unsigned src_level,
2829                         const struct pipe_box *src_box);
2830 
2831 static void
tc_buffer_do_flush_region(struct threaded_context * tc,struct threaded_transfer * ttrans,const struct pipe_box * box)2832 tc_buffer_do_flush_region(struct threaded_context *tc,
2833                           struct threaded_transfer *ttrans,
2834                           const struct pipe_box *box)
2835 {
2836    struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
2837 
2838    if (ttrans->staging) {
2839       struct pipe_box src_box;
2840 
2841       u_box_1d(ttrans->b.offset + ttrans->b.box.x % tc->map_buffer_alignment +
2842                (box->x - ttrans->b.box.x),
2843                box->width, &src_box);
2844 
2845       /* Copy the staging buffer into the original one. */
2846       tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
2847                               ttrans->staging, 0, &src_box);
2848    }
2849 
2850    /* Don't update the valid range when we're uploading the CPU storage
2851     * because it includes the uninitialized range too.
2852     */
2853    if (!(ttrans->b.usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2854       util_range_add(&tres->b, ttrans->valid_buffer_range,
2855                      box->x, box->x + box->width);
2856    }
2857 }
2858 
2859 static void
tc_transfer_flush_region(struct pipe_context * _pipe,struct pipe_transfer * transfer,const struct pipe_box * rel_box)2860 tc_transfer_flush_region(struct pipe_context *_pipe,
2861                          struct pipe_transfer *transfer,
2862                          const struct pipe_box *rel_box)
2863 {
2864    struct threaded_context *tc = threaded_context(_pipe);
2865    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2866    struct threaded_resource *tres = threaded_resource(transfer->resource);
2867    unsigned required_usage = PIPE_MAP_WRITE |
2868                              PIPE_MAP_FLUSH_EXPLICIT;
2869 
2870    if (tres->b.target == PIPE_BUFFER) {
2871       if ((transfer->usage & required_usage) == required_usage) {
2872          struct pipe_box box;
2873 
2874          u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
2875          tc_buffer_do_flush_region(tc, ttrans, &box);
2876       }
2877 
2878       /* Staging transfers don't send the call to the driver.
2879        *
2880        * Transfers using the CPU storage shouldn't call transfer_flush_region
2881        * in the driver because the buffer is not really mapped on the driver
2882        * side and the CPU storage always re-uploads everything (flush_region
2883        * makes no difference).
2884        */
2885       if (ttrans->staging || ttrans->cpu_storage_mapped)
2886          return;
2887    }
2888 
2889    struct tc_transfer_flush_region *p =
2890       tc_add_call(tc, TC_CALL_transfer_flush_region, tc_transfer_flush_region);
2891    p->transfer = transfer;
2892    p->box = *rel_box;
2893 }
2894 
2895 struct tc_buffer_unmap {
2896    struct tc_call_base base;
2897    bool was_staging_transfer;
2898    union {
2899       struct pipe_transfer *transfer;
2900       struct pipe_resource *resource;
2901    };
2902 };
2903 
2904 static uint16_t
tc_call_buffer_unmap(struct pipe_context * pipe,void * call)2905 tc_call_buffer_unmap(struct pipe_context *pipe, void *call)
2906 {
2907    struct tc_buffer_unmap *p = to_call(call, tc_buffer_unmap);
2908 
2909    if (p->was_staging_transfer) {
2910       struct threaded_resource *tres = threaded_resource(p->resource);
2911       /* Nothing to do except keeping track of staging uploads */
2912       assert(tres->pending_staging_uploads > 0);
2913       p_atomic_dec(&tres->pending_staging_uploads);
2914       tc_drop_resource_reference(p->resource);
2915    } else {
2916       pipe->buffer_unmap(pipe, p->transfer);
2917    }
2918 
2919    return call_size(tc_buffer_unmap);
2920 }
2921 
2922 static void
tc_buffer_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2923 tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2924 {
2925    struct threaded_context *tc = threaded_context(_pipe);
2926    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2927    struct threaded_resource *tres = threaded_resource(transfer->resource);
2928 
2929    /* PIPE_MAP_THREAD_SAFE is only valid with UNSYNCHRONIZED. It can be
2930     * called from any thread and bypasses all multithreaded queues.
2931     */
2932    if (transfer->usage & PIPE_MAP_THREAD_SAFE) {
2933       assert(transfer->usage & PIPE_MAP_UNSYNCHRONIZED);
2934       assert(!(transfer->usage & (PIPE_MAP_FLUSH_EXPLICIT |
2935                                   PIPE_MAP_DISCARD_RANGE)));
2936 
2937       struct pipe_context *pipe = tc->pipe;
2938       util_range_add(&tres->b, ttrans->valid_buffer_range,
2939                       transfer->box.x, transfer->box.x + transfer->box.width);
2940 
2941       pipe->buffer_unmap(pipe, transfer);
2942       return;
2943    }
2944 
2945    if (transfer->usage & PIPE_MAP_WRITE &&
2946        !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
2947       tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
2948 
2949    if (ttrans->cpu_storage_mapped) {
2950       /* GL allows simultaneous GPU stores with mapped buffers as long as GPU stores don't
2951        * touch the mapped range. That's a problem because GPU stores free the CPU storage.
2952        * If that happens, we just ignore the unmap call and don't upload anything to prevent
2953        * a crash.
2954        *
2955        * Disallow the CPU storage in the driver to work around this.
2956        */
2957       assert(tres->cpu_storage);
2958 
2959       if (tres->cpu_storage) {
2960          tc_invalidate_buffer(tc, tres);
2961          tc_buffer_subdata(&tc->base, &tres->b,
2962                            PIPE_MAP_UNSYNCHRONIZED |
2963                            TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE,
2964                            0, tres->b.width0, tres->cpu_storage);
2965          /* This shouldn't have been freed by buffer_subdata. */
2966          assert(tres->cpu_storage);
2967       } else {
2968          static bool warned_once = false;
2969          if (!warned_once) {
2970             fprintf(stderr, "This application is incompatible with cpu_storage.\n");
2971             fprintf(stderr, "Use tc_max_cpu_storage_size=0 to disable it and report this issue to Mesa.\n");
2972             warned_once = true;
2973          }
2974       }
2975 
2976       tc_drop_resource_reference(ttrans->staging);
2977       slab_free(&tc->pool_transfers, ttrans);
2978       return;
2979    }
2980 
2981    bool was_staging_transfer = false;
2982 
2983    if (ttrans->staging) {
2984       was_staging_transfer = true;
2985 
2986       tc_drop_resource_reference(ttrans->staging);
2987       slab_free(&tc->pool_transfers, ttrans);
2988    }
2989 
2990    struct tc_buffer_unmap *p = tc_add_call(tc, TC_CALL_buffer_unmap,
2991                                            tc_buffer_unmap);
2992    if (was_staging_transfer) {
2993       tc_set_resource_reference(&p->resource, &tres->b);
2994       p->was_staging_transfer = true;
2995    } else {
2996       p->transfer = transfer;
2997       p->was_staging_transfer = false;
2998    }
2999 
3000    /* tc_buffer_map directly maps the buffers, but tc_buffer_unmap
3001     * defers the unmap operation to the batch execution.
3002     * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
3003     * and if it goes over an optional limit the current batch is flushed,
3004     * to reclaim some RAM. */
3005    if (!ttrans->staging && tc->bytes_mapped_limit &&
3006        tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
3007       tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
3008    }
3009 }
3010 
3011 struct tc_texture_unmap {
3012    struct tc_call_base base;
3013    struct pipe_transfer *transfer;
3014 };
3015 
3016 static uint16_t
tc_call_texture_unmap(struct pipe_context * pipe,void * call)3017 tc_call_texture_unmap(struct pipe_context *pipe, void *call)
3018 {
3019    struct tc_texture_unmap *p = (struct tc_texture_unmap *) call;
3020 
3021    pipe->texture_unmap(pipe, p->transfer);
3022    return call_size(tc_texture_unmap);
3023 }
3024 
3025 static void
tc_texture_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)3026 tc_texture_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
3027 {
3028    struct threaded_context *tc = threaded_context(_pipe);
3029    struct threaded_transfer *ttrans = threaded_transfer(transfer);
3030 
3031    /* enable subdata again once resource is no longer mapped */
3032    tc_set_resource_batch_usage_persistent(tc, transfer->resource, false);
3033 
3034    tc_add_call(tc, TC_CALL_texture_unmap, tc_texture_unmap)->transfer = transfer;
3035 
3036    /* tc_texture_map directly maps the textures, but tc_texture_unmap
3037     * defers the unmap operation to the batch execution.
3038     * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
3039     * and if it goes over an optional limit the current batch is flushed,
3040     * to reclaim some RAM. */
3041    if (!ttrans->staging && tc->bytes_mapped_limit &&
3042        tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
3043       tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
3044    }
3045 }
3046 
3047 struct tc_buffer_subdata {
3048    struct tc_call_base base;
3049    unsigned usage, offset, size;
3050    struct pipe_resource *resource;
3051    char slot[0]; /* more will be allocated if needed */
3052 };
3053 
3054 static uint16_t
tc_call_buffer_subdata(struct pipe_context * pipe,void * call)3055 tc_call_buffer_subdata(struct pipe_context *pipe, void *call)
3056 {
3057    struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)call;
3058 
3059    pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
3060                         p->slot);
3061    tc_drop_resource_reference(p->resource);
3062    return p->base.num_slots;
3063 }
3064 
3065 static bool
is_mergeable_buffer_subdata(const struct tc_call_base * previous_call,unsigned usage,unsigned offset,struct pipe_resource * resource)3066 is_mergeable_buffer_subdata(const struct tc_call_base *previous_call,
3067                             unsigned usage, unsigned offset,
3068                             struct pipe_resource *resource)
3069 {
3070    if (!previous_call || previous_call->call_id != TC_CALL_buffer_subdata)
3071       return false;
3072 
3073    struct tc_buffer_subdata *subdata = (struct tc_buffer_subdata *)previous_call;
3074 
3075    return subdata->usage == usage && subdata->resource == resource
3076           && (subdata->offset + subdata->size) == offset;
3077 }
3078 
3079 static void
tc_buffer_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned usage,unsigned offset,unsigned size,const void * data)3080 tc_buffer_subdata(struct pipe_context *_pipe,
3081                   struct pipe_resource *resource,
3082                   unsigned usage, unsigned offset,
3083                   unsigned size, const void *data)
3084 {
3085    struct threaded_context *tc = threaded_context(_pipe);
3086    struct threaded_resource *tres = threaded_resource(resource);
3087 
3088    if (!size)
3089       return;
3090 
3091    usage |= PIPE_MAP_WRITE;
3092 
3093    /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */
3094    if (!(usage & PIPE_MAP_DIRECTLY))
3095       usage |= PIPE_MAP_DISCARD_RANGE;
3096 
3097    usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
3098 
3099    /* Unsychronized and big transfers should use transfer_map. Also handle
3100     * full invalidations, because drivers aren't allowed to do them.
3101     */
3102    if (usage & (PIPE_MAP_UNSYNCHRONIZED |
3103                 PIPE_MAP_DISCARD_WHOLE_RESOURCE) ||
3104        size > TC_MAX_SUBDATA_BYTES ||
3105        tres->cpu_storage) {
3106       struct pipe_transfer *transfer;
3107       struct pipe_box box;
3108       uint8_t *map = NULL;
3109 
3110       u_box_1d(offset, size, &box);
3111 
3112       /* CPU storage is only useful for partial updates. It can add overhead
3113        * on glBufferData calls so avoid using it.
3114        */
3115       if (!tres->cpu_storage && offset == 0 && size == resource->width0)
3116          usage |= TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE;
3117 
3118       map = tc_buffer_map(_pipe, resource, 0, usage, &box, &transfer);
3119       if (map) {
3120          memcpy(map, data, size);
3121          tc_buffer_unmap(_pipe, transfer);
3122       }
3123       return;
3124    }
3125 
3126    util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
3127 
3128    /* We can potentially merge this subdata call with the previous one (if any),
3129     * if the application does a whole-buffer upload piecewise. */
3130    {
3131       struct tc_call_base *last_call = tc_get_last_mergeable_call(tc);
3132       struct tc_buffer_subdata *merge_dest = (struct tc_buffer_subdata *)last_call;
3133 
3134       if (is_mergeable_buffer_subdata(last_call, usage, offset, resource) &&
3135          tc_enlarge_last_mergeable_call(tc, call_size_with_slots(tc_buffer_subdata, merge_dest->size + size))) {
3136          memcpy(merge_dest->slot + merge_dest->size, data, size);
3137          merge_dest->size += size;
3138 
3139          /* TODO: We *could* do an invalidate + upload here if we detect that
3140           * the merged subdata call overwrites the entire buffer. However, that's
3141           * a little complicated since we can't add further calls to our batch
3142           * until we have removed the merged subdata call, which means that
3143           * calling tc_invalidate_buffer before we have removed the call will
3144           * blow things up.
3145           *
3146           * Just leave a large, merged subdata call in the batch for now, which is
3147           * at least better than tons of tiny subdata calls.
3148           */
3149 
3150          return;
3151       }
3152    }
3153 
3154    /* The upload is small. Enqueue it. */
3155    struct tc_buffer_subdata *p =
3156       tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
3157 
3158    tc_set_resource_reference(&p->resource, resource);
3159    /* This is will always be busy because if it wasn't, tc_improve_map_buffer-
3160     * _flags would set UNSYNCHRONIZED and we wouldn't get here.
3161     */
3162    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
3163    p->usage = usage;
3164    p->offset = offset;
3165    p->size = size;
3166    memcpy(p->slot, data, size);
3167 
3168    tc_mark_call_mergeable(tc, &p->base);
3169 }
3170 
3171 struct tc_texture_subdata {
3172    struct tc_call_base base;
3173    unsigned level, usage, stride;
3174    struct pipe_box box;
3175    struct pipe_resource *resource;
3176    uintptr_t layer_stride;
3177    char slot[0]; /* more will be allocated if needed */
3178 };
3179 
3180 static uint16_t
tc_call_texture_subdata(struct pipe_context * pipe,void * call)3181 tc_call_texture_subdata(struct pipe_context *pipe, void *call)
3182 {
3183    struct tc_texture_subdata *p = (struct tc_texture_subdata *)call;
3184 
3185    pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
3186                          p->slot, p->stride, p->layer_stride);
3187    tc_drop_resource_reference(p->resource);
3188    return p->base.num_slots;
3189 }
3190 
3191 static void
tc_texture_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,const void * data,unsigned stride,uintptr_t layer_stride)3192 tc_texture_subdata(struct pipe_context *_pipe,
3193                    struct pipe_resource *resource,
3194                    unsigned level, unsigned usage,
3195                    const struct pipe_box *box,
3196                    const void *data, unsigned stride,
3197                    uintptr_t layer_stride)
3198 {
3199    struct threaded_context *tc = threaded_context(_pipe);
3200    uint64_t size;
3201 
3202    assert(box->height >= 1);
3203    assert(box->depth >= 1);
3204 
3205    size = (box->depth - 1) * layer_stride +
3206           (box->height - 1) * (uint64_t)stride +
3207           box->width * util_format_get_blocksize(resource->format);
3208    if (!size)
3209       return;
3210 
3211    /* Small uploads can be enqueued, big uploads must sync. */
3212    if (size <= TC_MAX_SUBDATA_BYTES) {
3213       struct tc_texture_subdata *p =
3214          tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
3215 
3216       tc_set_resource_batch_usage(tc, resource);
3217       tc_set_resource_reference(&p->resource, resource);
3218       p->level = level;
3219       p->usage = usage;
3220       p->box = *box;
3221       p->stride = stride;
3222       p->layer_stride = layer_stride;
3223       memcpy(p->slot, data, size);
3224    } else {
3225       struct pipe_context *pipe = tc->pipe;
3226       struct threaded_resource *tres = threaded_resource(resource);
3227       unsigned unsync_usage = TC_TRANSFER_MAP_THREADED_UNSYNC | PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_WRITE;
3228       bool can_unsync = !tc_resource_batch_usage_test_busy(tc, resource) &&
3229                         tc->options.is_resource_busy &&
3230                         !tc->options.is_resource_busy(tc->pipe->screen, tres->latest, usage | unsync_usage);
3231 
3232       if (!can_unsync && resource->usage != PIPE_USAGE_STAGING &&
3233           tc->options.parse_renderpass_info && tc->in_renderpass) {
3234          enum pipe_format format = resource->format;
3235          if (usage & PIPE_MAP_DEPTH_ONLY)
3236             format = util_format_get_depth_only(format);
3237          else if (usage & PIPE_MAP_STENCIL_ONLY)
3238             format = PIPE_FORMAT_S8_UINT;
3239 
3240          unsigned fmt_stride = util_format_get_stride(format, box->width);
3241          uint64_t fmt_layer_stride = util_format_get_2d_size(format, stride, box->height);
3242          assert(fmt_layer_stride * box->depth <= UINT32_MAX);
3243 
3244          struct pipe_resource *pres = pipe_buffer_create(pipe->screen, 0, PIPE_USAGE_STREAM, layer_stride * box->depth);
3245          pipe->buffer_subdata(pipe, pres, unsync_usage, 0, layer_stride * box->depth, data);
3246          struct pipe_box src_box = *box;
3247          src_box.x = src_box.y = src_box.z = 0;
3248 
3249          if (fmt_stride == stride && fmt_layer_stride == layer_stride) {
3250             /* if stride matches, single copy is fine*/
3251             tc->base.resource_copy_region(&tc->base, resource, level, box->x, box->y, box->z, pres, 0, &src_box);
3252          } else {
3253             /* if stride doesn't match, inline util_copy_box on the GPU and assume the driver will optimize */
3254             src_box.depth = 1;
3255             for (unsigned z = 0; z < box->depth; ++z, src_box.x = z * layer_stride) {
3256                unsigned dst_x = box->x, dst_y = box->y, width = box->width, height = box->height, dst_z = box->z + z;
3257                int blocksize = util_format_get_blocksize(format);
3258                int blockwidth = util_format_get_blockwidth(format);
3259                int blockheight = util_format_get_blockheight(format);
3260 
3261                assert(blocksize > 0);
3262                assert(blockwidth > 0);
3263                assert(blockheight > 0);
3264 
3265                dst_x /= blockwidth;
3266                dst_y /= blockheight;
3267                width = DIV_ROUND_UP(width, blockwidth);
3268                height = DIV_ROUND_UP(height, blockheight);
3269 
3270                width *= blocksize;
3271 
3272                if (width == fmt_stride && width == (unsigned)stride) {
3273                   ASSERTED uint64_t size = (uint64_t)height * width;
3274 
3275                   assert(size <= SIZE_MAX);
3276                   assert(dst_x + src_box.width < u_minify(pres->width0, level));
3277                   assert(dst_y + src_box.height < u_minify(pres->height0, level));
3278                   assert(pres->target != PIPE_TEXTURE_3D ||  z + src_box.depth < u_minify(pres->depth0, level));
3279                   tc->base.resource_copy_region(&tc->base, resource, level, dst_x, dst_y, dst_z, pres, 0, &src_box);
3280                } else {
3281                   src_box.height = 1;
3282                   for (unsigned i = 0; i < height; i++, dst_y++, src_box.x += stride)
3283                      tc->base.resource_copy_region(&tc->base, resource, level, dst_x, dst_y, dst_z, pres, 0, &src_box);
3284                }
3285             }
3286          }
3287 
3288          pipe_resource_reference(&pres, NULL);
3289       } else {
3290          if (can_unsync) {
3291             usage |= unsync_usage;
3292          } else {
3293             tc_sync(tc);
3294             tc_set_driver_thread(tc);
3295          }
3296          pipe->texture_subdata(pipe, resource, level, usage, box, data,
3297                               stride, layer_stride);
3298          if (!can_unsync)
3299             tc_clear_driver_thread(tc);
3300       }
3301    }
3302 }
3303 
3304 
3305 /********************************************************************
3306  * miscellaneous
3307  */
3308 
3309 #define TC_FUNC_SYNC_RET0(ret_type, func) \
3310    static ret_type \
3311    tc_##func(struct pipe_context *_pipe) \
3312    { \
3313       struct threaded_context *tc = threaded_context(_pipe); \
3314       struct pipe_context *pipe = tc->pipe; \
3315       tc_sync(tc); \
3316       return pipe->func(pipe); \
3317    }
3318 
TC_FUNC_SYNC_RET0(uint64_t,get_timestamp)3319 TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
3320 
3321 static void
3322 tc_get_sample_position(struct pipe_context *_pipe,
3323                        unsigned sample_count, unsigned sample_index,
3324                        float *out_value)
3325 {
3326    struct threaded_context *tc = threaded_context(_pipe);
3327    struct pipe_context *pipe = tc->pipe;
3328 
3329    pipe->get_sample_position(pipe, sample_count, sample_index,
3330                              out_value);
3331 }
3332 
3333 static enum pipe_reset_status
tc_get_device_reset_status(struct pipe_context * _pipe)3334 tc_get_device_reset_status(struct pipe_context *_pipe)
3335 {
3336    struct threaded_context *tc = threaded_context(_pipe);
3337    struct pipe_context *pipe = tc->pipe;
3338 
3339    if (!tc->options.unsynchronized_get_device_reset_status)
3340       tc_sync(tc);
3341 
3342    return pipe->get_device_reset_status(pipe);
3343 }
3344 
3345 static void
tc_set_device_reset_callback(struct pipe_context * _pipe,const struct pipe_device_reset_callback * cb)3346 tc_set_device_reset_callback(struct pipe_context *_pipe,
3347                              const struct pipe_device_reset_callback *cb)
3348 {
3349    struct threaded_context *tc = threaded_context(_pipe);
3350    struct pipe_context *pipe = tc->pipe;
3351 
3352    tc_sync(tc);
3353    pipe->set_device_reset_callback(pipe, cb);
3354 }
3355 
3356 struct tc_string_marker {
3357    struct tc_call_base base;
3358    int len;
3359    char slot[0]; /* more will be allocated if needed */
3360 };
3361 
3362 static uint16_t
tc_call_emit_string_marker(struct pipe_context * pipe,void * call)3363 tc_call_emit_string_marker(struct pipe_context *pipe, void *call)
3364 {
3365    struct tc_string_marker *p = (struct tc_string_marker *)call;
3366    pipe->emit_string_marker(pipe, p->slot, p->len);
3367    return p->base.num_slots;
3368 }
3369 
3370 static void
tc_emit_string_marker(struct pipe_context * _pipe,const char * string,int len)3371 tc_emit_string_marker(struct pipe_context *_pipe,
3372                       const char *string, int len)
3373 {
3374    struct threaded_context *tc = threaded_context(_pipe);
3375 
3376    if (len <= TC_MAX_STRING_MARKER_BYTES) {
3377       struct tc_string_marker *p =
3378          tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
3379 
3380       memcpy(p->slot, string, len);
3381       p->len = len;
3382    } else {
3383       struct pipe_context *pipe = tc->pipe;
3384 
3385       tc_sync(tc);
3386       tc_set_driver_thread(tc);
3387       pipe->emit_string_marker(pipe, string, len);
3388       tc_clear_driver_thread(tc);
3389    }
3390 }
3391 
3392 static void
tc_dump_debug_state(struct pipe_context * _pipe,FILE * stream,unsigned flags)3393 tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
3394                     unsigned flags)
3395 {
3396    struct threaded_context *tc = threaded_context(_pipe);
3397    struct pipe_context *pipe = tc->pipe;
3398 
3399    tc_sync(tc);
3400    pipe->dump_debug_state(pipe, stream, flags);
3401 }
3402 
3403 static void
tc_set_debug_callback(struct pipe_context * _pipe,const struct util_debug_callback * cb)3404 tc_set_debug_callback(struct pipe_context *_pipe,
3405                       const struct util_debug_callback *cb)
3406 {
3407    struct threaded_context *tc = threaded_context(_pipe);
3408    struct pipe_context *pipe = tc->pipe;
3409 
3410    tc_sync(tc);
3411 
3412    /* Drop all synchronous debug callbacks. Drivers are expected to be OK
3413     * with this. shader-db will use an environment variable to disable
3414     * the threaded context.
3415     */
3416    if (cb && !cb->async)
3417       pipe->set_debug_callback(pipe, NULL);
3418    else
3419       pipe->set_debug_callback(pipe, cb);
3420 }
3421 
3422 static void
tc_set_log_context(struct pipe_context * _pipe,struct u_log_context * log)3423 tc_set_log_context(struct pipe_context *_pipe, struct u_log_context *log)
3424 {
3425    struct threaded_context *tc = threaded_context(_pipe);
3426    struct pipe_context *pipe = tc->pipe;
3427 
3428    tc_sync(tc);
3429    pipe->set_log_context(pipe, log);
3430 }
3431 
3432 static void
tc_create_fence_fd(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,int fd,enum pipe_fd_type type)3433 tc_create_fence_fd(struct pipe_context *_pipe,
3434                    struct pipe_fence_handle **fence, int fd,
3435                    enum pipe_fd_type type)
3436 {
3437    struct threaded_context *tc = threaded_context(_pipe);
3438    struct pipe_context *pipe = tc->pipe;
3439 
3440    if (!tc->options.unsynchronized_create_fence_fd)
3441       tc_sync(tc);
3442 
3443    pipe->create_fence_fd(pipe, fence, fd, type);
3444 }
3445 
3446 struct tc_fence_call {
3447    struct tc_call_base base;
3448    struct pipe_fence_handle *fence;
3449 };
3450 
3451 static uint16_t
tc_call_fence_server_sync(struct pipe_context * pipe,void * call)3452 tc_call_fence_server_sync(struct pipe_context *pipe, void *call)
3453 {
3454    struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
3455 
3456    pipe->fence_server_sync(pipe, fence);
3457    pipe->screen->fence_reference(pipe->screen, &fence, NULL);
3458    return call_size(tc_fence_call);
3459 }
3460 
3461 static void
tc_fence_server_sync(struct pipe_context * _pipe,struct pipe_fence_handle * fence)3462 tc_fence_server_sync(struct pipe_context *_pipe,
3463                      struct pipe_fence_handle *fence)
3464 {
3465    struct threaded_context *tc = threaded_context(_pipe);
3466    struct pipe_screen *screen = tc->pipe->screen;
3467    struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_sync,
3468                                             tc_fence_call);
3469 
3470    call->fence = NULL;
3471    screen->fence_reference(screen, &call->fence, fence);
3472 }
3473 
3474 static void
tc_fence_server_signal(struct pipe_context * _pipe,struct pipe_fence_handle * fence)3475 tc_fence_server_signal(struct pipe_context *_pipe,
3476                            struct pipe_fence_handle *fence)
3477 {
3478    struct threaded_context *tc = threaded_context(_pipe);
3479    struct pipe_context *pipe = tc->pipe;
3480    tc_sync(tc);
3481    pipe->fence_server_signal(pipe, fence);
3482 }
3483 
3484 static struct pipe_video_codec *
tc_create_video_codec(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_codec * templ)3485 tc_create_video_codec(UNUSED struct pipe_context *_pipe,
3486                       UNUSED const struct pipe_video_codec *templ)
3487 {
3488    unreachable("Threaded context should not be enabled for video APIs");
3489    return NULL;
3490 }
3491 
3492 static struct pipe_video_buffer *
tc_create_video_buffer(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_buffer * templ)3493 tc_create_video_buffer(UNUSED struct pipe_context *_pipe,
3494                        UNUSED const struct pipe_video_buffer *templ)
3495 {
3496    unreachable("Threaded context should not be enabled for video APIs");
3497    return NULL;
3498 }
3499 
3500 struct tc_context_param {
3501    struct tc_call_base base;
3502    enum pipe_context_param param;
3503    unsigned value;
3504 };
3505 
3506 static uint16_t
tc_call_set_context_param(struct pipe_context * pipe,void * call)3507 tc_call_set_context_param(struct pipe_context *pipe, void *call)
3508 {
3509    struct tc_context_param *p = to_call(call, tc_context_param);
3510 
3511    if (pipe->set_context_param)
3512       pipe->set_context_param(pipe, p->param, p->value);
3513 
3514    return call_size(tc_context_param);
3515 }
3516 
3517 static void
tc_set_context_param(struct pipe_context * _pipe,enum pipe_context_param param,unsigned value)3518 tc_set_context_param(struct pipe_context *_pipe,
3519                            enum pipe_context_param param,
3520                            unsigned value)
3521 {
3522    struct threaded_context *tc = threaded_context(_pipe);
3523 
3524    if (param == PIPE_CONTEXT_PARAM_UPDATE_THREAD_SCHEDULING) {
3525       util_thread_sched_apply_policy(tc->queue.threads[0],
3526                                      UTIL_THREAD_THREADED_CONTEXT, value,
3527                                      NULL);
3528 
3529       /* Execute this immediately (without enqueuing).
3530        * It's required to be thread-safe.
3531        */
3532       struct pipe_context *pipe = tc->pipe;
3533       if (pipe->set_context_param)
3534          pipe->set_context_param(pipe, param, value);
3535       return;
3536    }
3537 
3538    if (tc->pipe->set_context_param) {
3539       struct tc_context_param *call =
3540          tc_add_call(tc, TC_CALL_set_context_param, tc_context_param);
3541 
3542       call->param = param;
3543       call->value = value;
3544    }
3545 }
3546 
3547 
3548 /********************************************************************
3549  * draw, launch, clear, blit, copy, flush
3550  */
3551 
3552 struct tc_flush_deferred_call {
3553    struct tc_call_base base;
3554    unsigned flags;
3555    struct pipe_fence_handle *fence;
3556 };
3557 
3558 struct tc_flush_call {
3559    struct tc_call_base base;
3560    unsigned flags;
3561    struct pipe_fence_handle *fence;
3562    struct threaded_context *tc;
3563 };
3564 
3565 static void
tc_flush_queries(struct threaded_context * tc)3566 tc_flush_queries(struct threaded_context *tc)
3567 {
3568    struct threaded_query *tq, *tmp;
3569    LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
3570       list_del(&tq->head_unflushed);
3571 
3572       /* Memory release semantics: due to a possible race with
3573        * tc_get_query_result, we must ensure that the linked list changes
3574        * are visible before setting tq->flushed.
3575        */
3576       p_atomic_set(&tq->flushed, true);
3577    }
3578 }
3579 
3580 static uint16_t
tc_call_flush_deferred(struct pipe_context * pipe,void * call)3581 tc_call_flush_deferred(struct pipe_context *pipe, void *call)
3582 {
3583    struct tc_flush_deferred_call *p = to_call(call, tc_flush_deferred_call);
3584    struct pipe_screen *screen = pipe->screen;
3585 
3586    pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
3587    screen->fence_reference(screen, &p->fence, NULL);
3588 
3589    return call_size(tc_flush_deferred_call);
3590 }
3591 
3592 static uint16_t
tc_call_flush(struct pipe_context * pipe,void * call)3593 tc_call_flush(struct pipe_context *pipe, void *call)
3594 {
3595    struct tc_flush_call *p = to_call(call, tc_flush_call);
3596    struct pipe_screen *screen = pipe->screen;
3597 
3598    pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
3599    screen->fence_reference(screen, &p->fence, NULL);
3600 
3601    tc_flush_queries(p->tc);
3602 
3603    return call_size(tc_flush_call);
3604 }
3605 
3606 static void
tc_flush(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,unsigned flags)3607 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
3608          unsigned flags)
3609 {
3610    struct threaded_context *tc = threaded_context(_pipe);
3611    struct pipe_context *pipe = tc->pipe;
3612    struct pipe_screen *screen = pipe->screen;
3613    bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
3614    bool deferred = (flags & PIPE_FLUSH_DEFERRED) > 0;
3615 
3616    if (!deferred || !fence)
3617       tc->in_renderpass = false;
3618 
3619    if (async && tc->options.create_fence) {
3620       if (fence) {
3621          struct tc_batch *next = &tc->batch_slots[tc->next];
3622 
3623          if (!next->token) {
3624             next->token = malloc(sizeof(*next->token));
3625             if (!next->token)
3626                goto out_of_memory;
3627 
3628             pipe_reference_init(&next->token->ref, 1);
3629             next->token->tc = tc;
3630          }
3631 
3632          screen->fence_reference(screen, fence,
3633                                  tc->options.create_fence(pipe, next->token));
3634          if (!*fence)
3635             goto out_of_memory;
3636       }
3637 
3638       struct tc_flush_call *p;
3639       if (deferred) {
3640          /* these have identical fields */
3641          p = (struct tc_flush_call *)tc_add_call(tc, TC_CALL_flush_deferred, tc_flush_deferred_call);
3642       } else {
3643          p = tc_add_call(tc, TC_CALL_flush, tc_flush_call);
3644          p->tc = tc;
3645       }
3646       p->fence = fence ? *fence : NULL;
3647       p->flags = flags | TC_FLUSH_ASYNC;
3648 
3649       if (!deferred) {
3650          /* non-deferred async flushes indicate completion of existing renderpass info */
3651          tc_signal_renderpass_info_ready(tc);
3652          tc_batch_flush(tc, false);
3653          tc->seen_fb_state = false;
3654       }
3655 
3656       return;
3657    }
3658 
3659 out_of_memory:
3660    tc->flushing = true;
3661    /* renderpass info is signaled during sync */
3662    tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
3663                    flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
3664 
3665    if (!deferred) {
3666       tc_flush_queries(tc);
3667       tc->seen_fb_state = false;
3668       tc->query_ended = false;
3669    }
3670    tc_set_driver_thread(tc);
3671    pipe->flush(pipe, fence, flags);
3672    tc_clear_driver_thread(tc);
3673    tc->flushing = false;
3674 }
3675 
3676 struct tc_draw_single_drawid {
3677    struct tc_draw_single base;
3678    unsigned drawid_offset;
3679 };
3680 
3681 static uint16_t
tc_call_draw_single_drawid(struct pipe_context * pipe,void * call)3682 tc_call_draw_single_drawid(struct pipe_context *pipe, void *call)
3683 {
3684    struct tc_draw_single_drawid *info_drawid = to_call(call, tc_draw_single_drawid);
3685    struct tc_draw_single *info = &info_drawid->base;
3686 
3687    /* u_threaded_context stores start/count in min/max_index for single draws. */
3688    /* Drivers using u_threaded_context shouldn't use min/max_index. */
3689    struct pipe_draw_start_count_bias draw;
3690 
3691    draw.start = info->info.min_index;
3692    draw.count = info->info.max_index;
3693    draw.index_bias = info->index_bias;
3694 
3695    info->info.index_bounds_valid = false;
3696    info->info.has_user_indices = false;
3697    info->info.take_index_buffer_ownership = false;
3698 
3699    pipe->draw_vbo(pipe, &info->info, info_drawid->drawid_offset, NULL, &draw, 1);
3700    if (info->info.index_size)
3701       tc_drop_resource_reference(info->info.index.resource);
3702 
3703    return call_size(tc_draw_single_drawid);
3704 }
3705 
3706 static void
simplify_draw_info(struct pipe_draw_info * info)3707 simplify_draw_info(struct pipe_draw_info *info)
3708 {
3709    /* Clear these fields to facilitate draw merging.
3710     * Drivers shouldn't use them.
3711     */
3712    info->has_user_indices = false;
3713    info->index_bounds_valid = false;
3714    info->take_index_buffer_ownership = false;
3715    info->index_bias_varies = false;
3716    info->_pad = 0;
3717 
3718    /* This shouldn't be set when merging single draws. */
3719    info->increment_draw_id = false;
3720 
3721    if (info->index_size) {
3722       if (!info->primitive_restart)
3723          info->restart_index = 0;
3724    } else {
3725       assert(!info->primitive_restart);
3726       info->primitive_restart = false;
3727       info->restart_index = 0;
3728       info->index.resource = NULL;
3729    }
3730 }
3731 
3732 static bool
is_next_call_a_mergeable_draw(struct tc_draw_single * first,struct tc_draw_single * next)3733 is_next_call_a_mergeable_draw(struct tc_draw_single *first,
3734                               struct tc_draw_single *next)
3735 {
3736    if (next->base.call_id != TC_CALL_draw_single)
3737       return false;
3738 
3739    STATIC_ASSERT(offsetof(struct pipe_draw_info, min_index) ==
3740                  sizeof(struct pipe_draw_info) - 8);
3741    STATIC_ASSERT(offsetof(struct pipe_draw_info, max_index) ==
3742                  sizeof(struct pipe_draw_info) - 4);
3743    /* All fields must be the same except start and count. */
3744    /* u_threaded_context stores start/count in min/max_index for single draws. */
3745    return memcmp((uint32_t*)&first->info, (uint32_t*)&next->info,
3746                  DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX) == 0;
3747 }
3748 
3749 static uint16_t
tc_call_draw_single(struct pipe_context * pipe,void * call)3750 tc_call_draw_single(struct pipe_context *pipe, void *call)
3751 {
3752    /* Draw call merging. */
3753    struct tc_draw_single *first = to_call(call, tc_draw_single);
3754    struct tc_draw_single *next = get_next_call(first, tc_draw_single);
3755 
3756    /* If at least 2 consecutive draw calls can be merged... */
3757    if (next->base.call_id == TC_CALL_draw_single) {
3758       if (is_next_call_a_mergeable_draw(first, next)) {
3759          /* The maximum number of merged draws is given by the batch size. */
3760          struct pipe_draw_start_count_bias multi[TC_SLOTS_PER_BATCH / call_size(tc_draw_single)];
3761          unsigned num_draws = 2;
3762          bool index_bias_varies = first->index_bias != next->index_bias;
3763 
3764          /* u_threaded_context stores start/count in min/max_index for single draws. */
3765          multi[0].start = first->info.min_index;
3766          multi[0].count = first->info.max_index;
3767          multi[0].index_bias = first->index_bias;
3768          multi[1].start = next->info.min_index;
3769          multi[1].count = next->info.max_index;
3770          multi[1].index_bias = next->index_bias;
3771 
3772          /* Find how many other draws can be merged. */
3773          next = get_next_call(next, tc_draw_single);
3774          for (; is_next_call_a_mergeable_draw(first, next);
3775               next = get_next_call(next, tc_draw_single), num_draws++) {
3776             /* u_threaded_context stores start/count in min/max_index for single draws. */
3777             multi[num_draws].start = next->info.min_index;
3778             multi[num_draws].count = next->info.max_index;
3779             multi[num_draws].index_bias = next->index_bias;
3780             index_bias_varies |= first->index_bias != next->index_bias;
3781          }
3782 
3783          first->info.index_bias_varies = index_bias_varies;
3784          pipe->draw_vbo(pipe, &first->info, 0, NULL, multi, num_draws);
3785 
3786          /* Since all draws use the same index buffer, drop all references at once. */
3787          if (first->info.index_size)
3788             pipe_drop_resource_references(first->info.index.resource, num_draws);
3789 
3790          return call_size(tc_draw_single) * num_draws;
3791       }
3792    }
3793 
3794    /* u_threaded_context stores start/count in min/max_index for single draws. */
3795    /* Drivers using u_threaded_context shouldn't use min/max_index. */
3796    struct pipe_draw_start_count_bias draw;
3797 
3798    draw.start = first->info.min_index;
3799    draw.count = first->info.max_index;
3800    draw.index_bias = first->index_bias;
3801 
3802    first->info.index_bounds_valid = false;
3803    first->info.has_user_indices = false;
3804    first->info.take_index_buffer_ownership = false;
3805 
3806    pipe->draw_vbo(pipe, &first->info, 0, NULL, &draw, 1);
3807    if (first->info.index_size)
3808       tc_drop_resource_reference(first->info.index.resource);
3809 
3810    return call_size(tc_draw_single);
3811 }
3812 
3813 struct tc_draw_indirect {
3814    struct tc_call_base base;
3815    struct pipe_draw_start_count_bias draw;
3816    struct pipe_draw_info info;
3817    struct pipe_draw_indirect_info indirect;
3818 };
3819 
3820 static uint16_t
tc_call_draw_indirect(struct pipe_context * pipe,void * call)3821 tc_call_draw_indirect(struct pipe_context *pipe, void *call)
3822 {
3823    struct tc_draw_indirect *info = to_call(call, tc_draw_indirect);
3824 
3825    info->info.index_bounds_valid = false;
3826    info->info.take_index_buffer_ownership = false;
3827 
3828    pipe->draw_vbo(pipe, &info->info, 0, &info->indirect, &info->draw, 1);
3829    if (info->info.index_size)
3830       tc_drop_resource_reference(info->info.index.resource);
3831 
3832    tc_drop_resource_reference(info->indirect.buffer);
3833    tc_drop_resource_reference(info->indirect.indirect_draw_count);
3834    tc_drop_so_target_reference(info->indirect.count_from_stream_output);
3835    return call_size(tc_draw_indirect);
3836 }
3837 
3838 struct tc_draw_multi {
3839    struct tc_call_base base;
3840    unsigned num_draws;
3841    struct pipe_draw_info info;
3842    struct pipe_draw_start_count_bias slot[]; /* variable-sized array */
3843 };
3844 
3845 static uint16_t
tc_call_draw_multi(struct pipe_context * pipe,void * call)3846 tc_call_draw_multi(struct pipe_context *pipe, void *call)
3847 {
3848    struct tc_draw_multi *info = (struct tc_draw_multi*)call;
3849 
3850    info->info.has_user_indices = false;
3851    info->info.index_bounds_valid = false;
3852    info->info.take_index_buffer_ownership = false;
3853 
3854    pipe->draw_vbo(pipe, &info->info, 0, NULL, info->slot, info->num_draws);
3855    if (info->info.index_size)
3856       tc_drop_resource_reference(info->info.index.resource);
3857 
3858    return info->base.num_slots;
3859 }
3860 
3861 #define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \
3862    offsetof(struct pipe_draw_info, index)
3863 
3864 /* Single draw with drawid_offset == 0. */
3865 static void
tc_draw_single(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3866 tc_draw_single(struct pipe_context *_pipe, const struct pipe_draw_info *info,
3867                unsigned drawid_offset,
3868                const struct pipe_draw_indirect_info *indirect,
3869                const struct pipe_draw_start_count_bias *draws,
3870                unsigned num_draws)
3871 {
3872    struct threaded_context *tc = threaded_context(_pipe);
3873    struct tc_draw_single *p =
3874       tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3875 
3876    if (info->index_size) {
3877       if (!info->take_index_buffer_ownership) {
3878          tc_set_resource_reference(&p->info.index.resource,
3879                                    info->index.resource);
3880       }
3881       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3882    }
3883    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3884    /* u_threaded_context stores start/count in min/max_index for single draws. */
3885    p->info.min_index = draws[0].start;
3886    p->info.max_index = draws[0].count;
3887    p->index_bias = draws[0].index_bias;
3888    simplify_draw_info(&p->info);
3889 }
3890 
3891 /* Single draw with drawid_offset > 0. */
3892 static void
tc_draw_single_draw_id(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3893 tc_draw_single_draw_id(struct pipe_context *_pipe,
3894                        const struct pipe_draw_info *info,
3895                        unsigned drawid_offset,
3896                        const struct pipe_draw_indirect_info *indirect,
3897                        const struct pipe_draw_start_count_bias *draws,
3898                        unsigned num_draws)
3899 {
3900    struct threaded_context *tc = threaded_context(_pipe);
3901    struct tc_draw_single *p =
3902       &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base;
3903 
3904    if (info->index_size) {
3905       if (!info->take_index_buffer_ownership) {
3906          tc_set_resource_reference(&p->info.index.resource,
3907                                    info->index.resource);
3908       }
3909       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3910    }
3911    ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3912    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3913    /* u_threaded_context stores start/count in min/max_index for single draws. */
3914    p->info.min_index = draws[0].start;
3915    p->info.max_index = draws[0].count;
3916    p->index_bias = draws[0].index_bias;
3917    simplify_draw_info(&p->info);
3918 }
3919 
3920 /* Single draw with user indices and drawid_offset == 0. */
3921 static void
tc_draw_user_indices_single(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3922 tc_draw_user_indices_single(struct pipe_context *_pipe,
3923                             const struct pipe_draw_info *info,
3924                             unsigned drawid_offset,
3925                             const struct pipe_draw_indirect_info *indirect,
3926                             const struct pipe_draw_start_count_bias *draws,
3927                             unsigned num_draws)
3928 {
3929    struct threaded_context *tc = threaded_context(_pipe);
3930    unsigned index_size = info->index_size;
3931    unsigned size = draws[0].count * index_size;
3932    struct pipe_resource *buffer = NULL;
3933    unsigned offset;
3934 
3935    if (!size)
3936       return;
3937 
3938    /* This must be done before adding draw_vbo, because it could generate
3939     * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3940     * to the driver if it was done afterwards.
3941     */
3942    u_upload_data(tc->base.stream_uploader, 0, size, 4,
3943                  (uint8_t*)info->index.user + draws[0].start * index_size,
3944                  &offset, &buffer);
3945    if (unlikely(!buffer))
3946       return;
3947 
3948    struct tc_draw_single *p =
3949       tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3950    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3951    p->info.index.resource = buffer;
3952    /* u_threaded_context stores start/count in min/max_index for single draws. */
3953    p->info.min_index = offset >> util_logbase2(index_size);
3954    p->info.max_index = draws[0].count;
3955    p->index_bias = draws[0].index_bias;
3956    simplify_draw_info(&p->info);
3957 }
3958 
3959 /* Single draw with user indices and drawid_offset > 0. */
3960 static void
tc_draw_user_indices_single_draw_id(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3961 tc_draw_user_indices_single_draw_id(struct pipe_context *_pipe,
3962                                     const struct pipe_draw_info *info,
3963                                     unsigned drawid_offset,
3964                                     const struct pipe_draw_indirect_info *indirect,
3965                                     const struct pipe_draw_start_count_bias *draws,
3966                                     unsigned num_draws)
3967 {
3968    struct threaded_context *tc = threaded_context(_pipe);
3969    unsigned index_size = info->index_size;
3970    unsigned size = draws[0].count * index_size;
3971    struct pipe_resource *buffer = NULL;
3972    unsigned offset;
3973 
3974    if (!size)
3975       return;
3976 
3977    /* This must be done before adding draw_vbo, because it could generate
3978     * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3979     * to the driver if it was done afterwards.
3980     */
3981    u_upload_data(tc->base.stream_uploader, 0, size, 4,
3982                  (uint8_t*)info->index.user + draws[0].start * index_size,
3983                  &offset, &buffer);
3984    if (unlikely(!buffer))
3985       return;
3986 
3987    struct tc_draw_single *p =
3988       &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base;
3989    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3990    p->info.index.resource = buffer;
3991    ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3992    /* u_threaded_context stores start/count in min/max_index for single draws. */
3993    p->info.min_index = offset >> util_logbase2(index_size);
3994    p->info.max_index = draws[0].count;
3995    p->index_bias = draws[0].index_bias;
3996    simplify_draw_info(&p->info);
3997 }
3998 
3999 #define DRAW_OVERHEAD_BYTES sizeof(struct tc_draw_multi)
4000 #define ONE_DRAW_SLOT_BYTES sizeof(((struct tc_draw_multi*)NULL)->slot[0])
4001 
4002 #define SLOTS_FOR_ONE_DRAW \
4003    DIV_ROUND_UP(DRAW_OVERHEAD_BYTES + ONE_DRAW_SLOT_BYTES, \
4004                 sizeof(struct tc_call_base))
4005 
4006 static void
tc_draw_multi(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4007 tc_draw_multi(struct pipe_context *_pipe, const struct pipe_draw_info *info,
4008               unsigned drawid_offset,
4009               const struct pipe_draw_indirect_info *indirect,
4010               const struct pipe_draw_start_count_bias *draws,
4011               unsigned num_draws)
4012 {
4013    struct threaded_context *tc = threaded_context(_pipe);
4014    int total_offset = 0;
4015    bool take_index_buffer_ownership = info->take_index_buffer_ownership;
4016 
4017    while (num_draws) {
4018       struct tc_batch *next = &tc->batch_slots[tc->next];
4019 
4020       int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4021       /* If there isn't enough place for one draw, try to fill the next one */
4022       if (nb_slots_left < SLOTS_FOR_ONE_DRAW)
4023          nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4024       const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4025 
4026       /* How many draws can we fit in the current batch */
4027       const int dr = MIN2(num_draws, (size_left_bytes - DRAW_OVERHEAD_BYTES) /
4028                           ONE_DRAW_SLOT_BYTES);
4029 
4030       /* Non-indexed call or indexed with a real index buffer. */
4031       struct tc_draw_multi *p =
4032          tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
4033                                 dr);
4034       if (info->index_size) {
4035          if (!take_index_buffer_ownership) {
4036             tc_set_resource_reference(&p->info.index.resource,
4037                                       info->index.resource);
4038          }
4039          tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
4040       }
4041       take_index_buffer_ownership = false;
4042       memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
4043       p->num_draws = dr;
4044       memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
4045       num_draws -= dr;
4046 
4047       total_offset += dr;
4048    }
4049 }
4050 
4051 static void
tc_draw_user_indices_multi(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4052 tc_draw_user_indices_multi(struct pipe_context *_pipe,
4053                            const struct pipe_draw_info *info,
4054                            unsigned drawid_offset,
4055                            const struct pipe_draw_indirect_info *indirect,
4056                            const struct pipe_draw_start_count_bias *draws,
4057                            unsigned num_draws)
4058 {
4059    struct threaded_context *tc = threaded_context(_pipe);
4060    struct pipe_resource *buffer = NULL;
4061    unsigned buffer_offset, total_count = 0;
4062    unsigned index_size_shift = util_logbase2(info->index_size);
4063    uint8_t *ptr = NULL;
4064 
4065    /* Get the total count. */
4066    for (unsigned i = 0; i < num_draws; i++)
4067       total_count += draws[i].count;
4068 
4069    if (!total_count)
4070       return;
4071 
4072    /* Allocate space for all index buffers.
4073     *
4074     * This must be done before adding draw_vbo, because it could generate
4075     * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
4076     * to the driver if it was done afterwards.
4077     */
4078    u_upload_alloc(tc->base.stream_uploader, 0,
4079                   total_count << index_size_shift, 4,
4080                   &buffer_offset, &buffer, (void**)&ptr);
4081    if (unlikely(!buffer))
4082       return;
4083 
4084    int total_offset = 0;
4085    unsigned offset = 0;
4086    while (num_draws) {
4087       struct tc_batch *next = &tc->batch_slots[tc->next];
4088 
4089       int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4090       /* If there isn't enough place for one draw, try to fill the next one */
4091       if (nb_slots_left < SLOTS_FOR_ONE_DRAW)
4092          nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4093       const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4094 
4095       /* How many draws can we fit in the current batch */
4096       const int dr = MIN2(num_draws, (size_left_bytes - DRAW_OVERHEAD_BYTES) /
4097                           ONE_DRAW_SLOT_BYTES);
4098 
4099       struct tc_draw_multi *p =
4100          tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
4101                                 dr);
4102       memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
4103 
4104       if (total_offset == 0)
4105          /* the first slot inherits the reference from u_upload_alloc() */
4106          p->info.index.resource = buffer;
4107       else
4108          /* all following slots need a new reference */
4109          tc_set_resource_reference(&p->info.index.resource, buffer);
4110 
4111       p->num_draws = dr;
4112 
4113       /* Upload index buffers. */
4114       for (unsigned i = 0; i < dr; i++) {
4115          unsigned count = draws[i + total_offset].count;
4116 
4117          if (!count) {
4118             p->slot[i].start = 0;
4119             p->slot[i].count = 0;
4120             p->slot[i].index_bias = 0;
4121             continue;
4122          }
4123 
4124          unsigned size = count << index_size_shift;
4125          memcpy(ptr + offset,
4126                 (uint8_t*)info->index.user +
4127                 (draws[i + total_offset].start << index_size_shift), size);
4128          p->slot[i].start = (buffer_offset + offset) >> index_size_shift;
4129          p->slot[i].count = count;
4130          p->slot[i].index_bias = draws[i + total_offset].index_bias;
4131          offset += size;
4132       }
4133 
4134       total_offset += dr;
4135       num_draws -= dr;
4136    }
4137 }
4138 
4139 static void
tc_draw_indirect(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4140 tc_draw_indirect(struct pipe_context *_pipe, const struct pipe_draw_info *info,
4141                  unsigned drawid_offset,
4142                  const struct pipe_draw_indirect_info *indirect,
4143                  const struct pipe_draw_start_count_bias *draws,
4144                  unsigned num_draws)
4145 {
4146    struct threaded_context *tc = threaded_context(_pipe);
4147    assert(!info->has_user_indices);
4148    assert(num_draws == 1);
4149 
4150    struct tc_draw_indirect *p =
4151       tc_add_call(tc, TC_CALL_draw_indirect, tc_draw_indirect);
4152    struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
4153 
4154    if (info->index_size) {
4155       if (!info->take_index_buffer_ownership) {
4156          tc_set_resource_reference(&p->info.index.resource,
4157                                    info->index.resource);
4158       }
4159       tc_add_to_buffer_list(next, info->index.resource);
4160    }
4161    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
4162 
4163    tc_set_resource_reference(&p->indirect.buffer, indirect->buffer);
4164    tc_set_resource_reference(&p->indirect.indirect_draw_count,
4165                              indirect->indirect_draw_count);
4166    p->indirect.count_from_stream_output = NULL;
4167    pipe_so_target_reference(&p->indirect.count_from_stream_output,
4168                             indirect->count_from_stream_output);
4169 
4170    if (indirect->buffer)
4171       tc_add_to_buffer_list(next, indirect->buffer);
4172    if (indirect->indirect_draw_count)
4173       tc_add_to_buffer_list(next, indirect->indirect_draw_count);
4174    if (indirect->count_from_stream_output)
4175       tc_add_to_buffer_list(next, indirect->count_from_stream_output->buffer);
4176 
4177    memcpy(&p->indirect, indirect, sizeof(*indirect));
4178    p->draw.start = draws[0].start;
4179 }
4180 
4181 /* Dispatch table for tc_draw_vbo:
4182  *
4183  * Indexed by:
4184  *    [is_indirect * 8 + index_size_and_has_user_indices * 4 +
4185  *     is_multi_draw * 2 + non_zero_draw_id]
4186  */
4187 static pipe_draw_func draw_funcs[16] = {
4188    tc_draw_single,
4189    tc_draw_single_draw_id,
4190    tc_draw_multi,
4191    tc_draw_multi,
4192    tc_draw_user_indices_single,
4193    tc_draw_user_indices_single_draw_id,
4194    tc_draw_user_indices_multi,
4195    tc_draw_user_indices_multi,
4196    tc_draw_indirect,
4197    tc_draw_indirect,
4198    tc_draw_indirect,
4199    tc_draw_indirect,
4200    tc_draw_indirect,
4201    tc_draw_indirect,
4202    tc_draw_indirect,
4203    tc_draw_indirect,
4204 };
4205 
4206 void
tc_draw_vbo(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4207 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
4208             unsigned drawid_offset,
4209             const struct pipe_draw_indirect_info *indirect,
4210             const struct pipe_draw_start_count_bias *draws,
4211             unsigned num_draws)
4212 {
4213    STATIC_ASSERT(DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX +
4214                  sizeof(intptr_t) == offsetof(struct pipe_draw_info, min_index));
4215 
4216    struct threaded_context *tc = threaded_context(_pipe);
4217    if (tc->options.parse_renderpass_info)
4218       tc_parse_draw(tc);
4219 
4220    /* Use a function table to call the desired variant of draw_vbo. */
4221    unsigned index = (indirect != NULL) * 8 +
4222                     (info->index_size && info->has_user_indices) * 4 +
4223                     (num_draws > 1) * 2 + (drawid_offset != 0);
4224    draw_funcs[index](_pipe, info, drawid_offset, indirect, draws, num_draws);
4225 
4226    /* This must be after tc_add_*call, which can flush the batch. */
4227    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4228       tc_add_all_gfx_bindings_to_buffer_list(tc);
4229 }
4230 
4231 struct tc_draw_single *
tc_add_draw_single_call(struct pipe_context * _pipe,struct pipe_resource * index_bo)4232 tc_add_draw_single_call(struct pipe_context *_pipe,
4233                         struct pipe_resource *index_bo)
4234 {
4235    struct threaded_context *tc = threaded_context(_pipe);
4236 
4237    if (tc->options.parse_renderpass_info)
4238       tc_parse_draw(tc);
4239 
4240    struct tc_draw_single *p =
4241       tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
4242 
4243    if (index_bo)
4244       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], index_bo);
4245 
4246    /* This must be after tc_add_*call, which can flush the batch. */
4247    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4248       tc_add_all_gfx_bindings_to_buffer_list(tc);
4249 
4250    return p;
4251 }
4252 
4253 struct tc_draw_vstate_single {
4254    struct tc_call_base base;
4255    struct pipe_draw_start_count_bias draw;
4256 
4257    /* The following states must be together without holes because they are
4258     * compared by draw merging.
4259     */
4260    struct pipe_vertex_state *state;
4261    uint32_t partial_velem_mask;
4262    struct pipe_draw_vertex_state_info info;
4263 };
4264 
4265 static bool
is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single * first,struct tc_draw_vstate_single * next)4266 is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single *first,
4267                                      struct tc_draw_vstate_single *next)
4268 {
4269    if (next->base.call_id != TC_CALL_draw_vstate_single)
4270       return false;
4271 
4272    return !memcmp(&first->state, &next->state,
4273                   offsetof(struct tc_draw_vstate_single, info) +
4274                   sizeof(struct pipe_draw_vertex_state_info) -
4275                   offsetof(struct tc_draw_vstate_single, state));
4276 }
4277 
4278 static uint16_t
tc_call_draw_vstate_single(struct pipe_context * pipe,void * call)4279 tc_call_draw_vstate_single(struct pipe_context *pipe, void *call)
4280 {
4281    /* Draw call merging. */
4282    struct tc_draw_vstate_single *first = to_call(call, tc_draw_vstate_single);
4283    struct tc_draw_vstate_single *next = get_next_call(first, tc_draw_vstate_single);
4284 
4285    /* If at least 2 consecutive draw calls can be merged... */
4286    if (is_next_call_a_mergeable_draw_vstate(first, next)) {
4287       /* The maximum number of merged draws is given by the batch size. */
4288       struct pipe_draw_start_count_bias draws[TC_SLOTS_PER_BATCH /
4289                                               call_size(tc_draw_vstate_single)];
4290       unsigned num_draws = 2;
4291 
4292       draws[0] = first->draw;
4293       draws[1] = next->draw;
4294 
4295       /* Find how many other draws can be merged. */
4296       next = get_next_call(next, tc_draw_vstate_single);
4297       for (; is_next_call_a_mergeable_draw_vstate(first, next);
4298            next = get_next_call(next, tc_draw_vstate_single),
4299            num_draws++)
4300          draws[num_draws] = next->draw;
4301 
4302       pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
4303                               first->info, draws, num_draws);
4304       /* Since all draws use the same state, drop all references at once. */
4305       tc_drop_vertex_state_references(first->state, num_draws);
4306 
4307       return call_size(tc_draw_vstate_single) * num_draws;
4308    }
4309 
4310    pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
4311                            first->info, &first->draw, 1);
4312    tc_drop_vertex_state_references(first->state, 1);
4313    return call_size(tc_draw_vstate_single);
4314 }
4315 
4316 struct tc_draw_vstate_multi {
4317    struct tc_call_base base;
4318    uint32_t partial_velem_mask;
4319    struct pipe_draw_vertex_state_info info;
4320    unsigned num_draws;
4321    struct pipe_vertex_state *state;
4322    struct pipe_draw_start_count_bias slot[0];
4323 };
4324 
4325 static uint16_t
tc_call_draw_vstate_multi(struct pipe_context * pipe,void * call)4326 tc_call_draw_vstate_multi(struct pipe_context *pipe, void *call)
4327 {
4328    struct tc_draw_vstate_multi *info = (struct tc_draw_vstate_multi*)call;
4329 
4330    pipe->draw_vertex_state(pipe, info->state, info->partial_velem_mask,
4331                            info->info, info->slot, info->num_draws);
4332    tc_drop_vertex_state_references(info->state, 1);
4333    return info->base.num_slots;
4334 }
4335 
4336 static void
tc_draw_vertex_state(struct pipe_context * _pipe,struct pipe_vertex_state * state,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4337 tc_draw_vertex_state(struct pipe_context *_pipe,
4338                      struct pipe_vertex_state *state,
4339                      uint32_t partial_velem_mask,
4340                      struct pipe_draw_vertex_state_info info,
4341                      const struct pipe_draw_start_count_bias *draws,
4342                      unsigned num_draws)
4343 {
4344    struct threaded_context *tc = threaded_context(_pipe);
4345    if (tc->options.parse_renderpass_info)
4346       tc_parse_draw(tc);
4347 
4348    if (num_draws == 1) {
4349       /* Single draw. */
4350       struct tc_draw_vstate_single *p =
4351          tc_add_call(tc, TC_CALL_draw_vstate_single, tc_draw_vstate_single);
4352       p->partial_velem_mask = partial_velem_mask;
4353       p->draw = draws[0];
4354       p->info.mode = info.mode;
4355       p->info.take_vertex_state_ownership = false;
4356 
4357       /* This should be always 0 for simplicity because we assume that
4358        * index_bias doesn't vary.
4359        */
4360       assert(draws[0].index_bias == 0);
4361 
4362       if (!info.take_vertex_state_ownership)
4363          tc_set_vertex_state_reference(&p->state, state);
4364       else
4365          p->state = state;
4366 
4367 
4368       /* This must be after tc_add_*call, which can flush the batch. */
4369       if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4370          tc_add_all_gfx_bindings_to_buffer_list(tc);
4371       return;
4372    }
4373 
4374    const int draw_overhead_bytes = sizeof(struct tc_draw_vstate_multi);
4375    const int one_draw_slot_bytes = sizeof(((struct tc_draw_vstate_multi*)NULL)->slot[0]);
4376    const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
4377                                                sizeof(struct tc_call_base));
4378    /* Multi draw. */
4379    int total_offset = 0;
4380    bool take_vertex_state_ownership = info.take_vertex_state_ownership;
4381    while (num_draws) {
4382       struct tc_batch *next = &tc->batch_slots[tc->next];
4383 
4384       int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4385       /* If there isn't enough place for one draw, try to fill the next one */
4386       if (nb_slots_left < slots_for_one_draw)
4387          nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4388       const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4389 
4390       /* How many draws can we fit in the current batch */
4391       const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
4392 
4393       /* Non-indexed call or indexed with a real index buffer. */
4394       struct tc_draw_vstate_multi *p =
4395          tc_add_slot_based_call(tc, TC_CALL_draw_vstate_multi, tc_draw_vstate_multi, dr);
4396 
4397       if (!take_vertex_state_ownership)
4398          tc_set_vertex_state_reference(&p->state, state);
4399       else
4400          p->state = state;
4401 
4402       take_vertex_state_ownership = false;
4403       p->partial_velem_mask = partial_velem_mask;
4404       p->info.mode = info.mode;
4405       p->info.take_vertex_state_ownership = false;
4406       p->num_draws = dr;
4407       memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
4408       num_draws -= dr;
4409 
4410       total_offset += dr;
4411    }
4412 
4413 
4414    /* This must be after tc_add_*call, which can flush the batch. */
4415    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4416       tc_add_all_gfx_bindings_to_buffer_list(tc);
4417 }
4418 
4419 struct tc_launch_grid_call {
4420    struct tc_call_base base;
4421    struct pipe_grid_info info;
4422 };
4423 
4424 static uint16_t
tc_call_launch_grid(struct pipe_context * pipe,void * call)4425 tc_call_launch_grid(struct pipe_context *pipe, void *call)
4426 {
4427    struct pipe_grid_info *p = &to_call(call, tc_launch_grid_call)->info;
4428 
4429    pipe->launch_grid(pipe, p);
4430    tc_drop_resource_reference(p->indirect);
4431    return call_size(tc_launch_grid_call);
4432 }
4433 
4434 static void
tc_launch_grid(struct pipe_context * _pipe,const struct pipe_grid_info * info)4435 tc_launch_grid(struct pipe_context *_pipe,
4436                const struct pipe_grid_info *info)
4437 {
4438    struct threaded_context *tc = threaded_context(_pipe);
4439    struct tc_launch_grid_call *p = tc_add_call(tc, TC_CALL_launch_grid,
4440                                                tc_launch_grid_call);
4441    assert(info->input == NULL);
4442 
4443    tc_set_resource_reference(&p->info.indirect, info->indirect);
4444    memcpy(&p->info, info, sizeof(*info));
4445 
4446    if (info->indirect)
4447       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->indirect);
4448 
4449    /* This must be after tc_add_*call, which can flush the batch. */
4450    if (unlikely(tc->add_all_compute_bindings_to_buffer_list))
4451       tc_add_all_compute_bindings_to_buffer_list(tc);
4452 }
4453 
4454 static uint16_t
tc_call_resource_copy_region(struct pipe_context * pipe,void * call)4455 tc_call_resource_copy_region(struct pipe_context *pipe, void *call)
4456 {
4457    struct tc_resource_copy_region *p = to_call(call, tc_resource_copy_region);
4458 
4459    pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
4460                               p->dstz, p->src, p->src_level, &p->src_box);
4461    tc_drop_resource_reference(p->dst);
4462    tc_drop_resource_reference(p->src);
4463    return call_size(tc_resource_copy_region);
4464 }
4465 
4466 static void
tc_resource_copy_region(struct pipe_context * _pipe,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)4467 tc_resource_copy_region(struct pipe_context *_pipe,
4468                         struct pipe_resource *dst, unsigned dst_level,
4469                         unsigned dstx, unsigned dsty, unsigned dstz,
4470                         struct pipe_resource *src, unsigned src_level,
4471                         const struct pipe_box *src_box)
4472 {
4473    struct threaded_context *tc = threaded_context(_pipe);
4474    struct threaded_resource *tdst = threaded_resource(dst);
4475    struct tc_resource_copy_region *p =
4476       tc_add_call(tc, TC_CALL_resource_copy_region,
4477                   tc_resource_copy_region);
4478 
4479    if (dst->target == PIPE_BUFFER)
4480       tc_buffer_disable_cpu_storage(dst);
4481 
4482    tc_set_resource_batch_usage(tc, dst);
4483    tc_set_resource_reference(&p->dst, dst);
4484    p->dst_level = dst_level;
4485    p->dstx = dstx;
4486    p->dsty = dsty;
4487    p->dstz = dstz;
4488    tc_set_resource_batch_usage(tc, src);
4489    tc_set_resource_reference(&p->src, src);
4490    p->src_level = src_level;
4491    p->src_box = *src_box;
4492 
4493    if (dst->target == PIPE_BUFFER) {
4494       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
4495 
4496       tc_add_to_buffer_list(next, src);
4497       tc_add_to_buffer_list(next, dst);
4498 
4499       util_range_add(&tdst->b, &tdst->valid_buffer_range,
4500                      dstx, dstx + src_box->width);
4501    }
4502 }
4503 
4504 struct tc_blit_call {
4505    struct tc_call_base base;
4506    struct pipe_blit_info info;
4507 };
4508 
4509 static uint16_t
tc_call_blit(struct pipe_context * pipe,void * call)4510 tc_call_blit(struct pipe_context *pipe, void *call)
4511 {
4512    struct pipe_blit_info *blit = &to_call(call, tc_blit_call)->info;
4513 
4514    pipe->blit(pipe, blit);
4515    tc_drop_resource_reference(blit->dst.resource);
4516    tc_drop_resource_reference(blit->src.resource);
4517    return call_size(tc_blit_call);
4518 }
4519 
4520 static void
tc_blit_enqueue(struct threaded_context * tc,const struct pipe_blit_info * info)4521 tc_blit_enqueue(struct threaded_context *tc, const struct pipe_blit_info *info)
4522 {
4523    struct tc_blit_call *blit = tc_add_call(tc, TC_CALL_blit, tc_blit_call);
4524 
4525    tc_set_resource_batch_usage(tc, info->dst.resource);
4526    tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource);
4527    tc_set_resource_batch_usage(tc, info->src.resource);
4528    tc_set_resource_reference(&blit->info.src.resource, info->src.resource);
4529    memcpy(&blit->info, info, sizeof(*info));
4530 }
4531 
4532 static void
tc_blit(struct pipe_context * _pipe,const struct pipe_blit_info * info)4533 tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
4534 {
4535    struct threaded_context *tc = threaded_context(_pipe);
4536 
4537    /* filter out untracked non-resolves */
4538    if (!tc->options.parse_renderpass_info ||
4539        info->src.resource->nr_samples <= 1 ||
4540        info->dst.resource->nr_samples > 1) {
4541       tc_blit_enqueue(tc, info);
4542       return;
4543    }
4544 
4545    if (tc->fb_resolve == info->dst.resource) {
4546       /* optimize out this blit entirely */
4547       tc->renderpass_info_recording->has_resolve = true;
4548       return;
4549    }
4550    for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
4551       if (tc->fb_resources[i] == info->src.resource) {
4552          tc->renderpass_info_recording->has_resolve = true;
4553          break;
4554       }
4555    }
4556    tc_blit_enqueue(tc, info);
4557 }
4558 
4559 struct tc_generate_mipmap {
4560    struct tc_call_base base;
4561    enum pipe_format format;
4562    unsigned base_level;
4563    unsigned last_level;
4564    unsigned first_layer;
4565    unsigned last_layer;
4566    struct pipe_resource *res;
4567 };
4568 
4569 static uint16_t
tc_call_generate_mipmap(struct pipe_context * pipe,void * call)4570 tc_call_generate_mipmap(struct pipe_context *pipe, void *call)
4571 {
4572    struct tc_generate_mipmap *p = to_call(call, tc_generate_mipmap);
4573    ASSERTED bool result = pipe->generate_mipmap(pipe, p->res, p->format,
4574                                                     p->base_level,
4575                                                     p->last_level,
4576                                                     p->first_layer,
4577                                                     p->last_layer);
4578    assert(result);
4579    tc_drop_resource_reference(p->res);
4580    return call_size(tc_generate_mipmap);
4581 }
4582 
4583 static bool
tc_generate_mipmap(struct pipe_context * _pipe,struct pipe_resource * res,enum pipe_format format,unsigned base_level,unsigned last_level,unsigned first_layer,unsigned last_layer)4584 tc_generate_mipmap(struct pipe_context *_pipe,
4585                    struct pipe_resource *res,
4586                    enum pipe_format format,
4587                    unsigned base_level,
4588                    unsigned last_level,
4589                    unsigned first_layer,
4590                    unsigned last_layer)
4591 {
4592    struct threaded_context *tc = threaded_context(_pipe);
4593    struct pipe_context *pipe = tc->pipe;
4594    struct pipe_screen *screen = pipe->screen;
4595    unsigned bind = PIPE_BIND_SAMPLER_VIEW;
4596 
4597    if (util_format_is_depth_or_stencil(format))
4598       bind = PIPE_BIND_DEPTH_STENCIL;
4599    else
4600       bind = PIPE_BIND_RENDER_TARGET;
4601 
4602    if (!screen->is_format_supported(screen, format, res->target,
4603                                     res->nr_samples, res->nr_storage_samples,
4604                                     bind))
4605       return false;
4606 
4607    struct tc_generate_mipmap *p =
4608       tc_add_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
4609 
4610    tc_set_resource_batch_usage(tc, res);
4611    tc_set_resource_reference(&p->res, res);
4612    p->format = format;
4613    p->base_level = base_level;
4614    p->last_level = last_level;
4615    p->first_layer = first_layer;
4616    p->last_layer = last_layer;
4617    return true;
4618 }
4619 
4620 struct tc_resource_call {
4621    struct tc_call_base base;
4622    struct pipe_resource *resource;
4623 };
4624 
4625 static uint16_t
tc_call_flush_resource(struct pipe_context * pipe,void * call)4626 tc_call_flush_resource(struct pipe_context *pipe, void *call)
4627 {
4628    struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
4629 
4630    pipe->flush_resource(pipe, resource);
4631    tc_drop_resource_reference(resource);
4632    return call_size(tc_resource_call);
4633 }
4634 
4635 static void
tc_flush_resource(struct pipe_context * _pipe,struct pipe_resource * resource)4636 tc_flush_resource(struct pipe_context *_pipe, struct pipe_resource *resource)
4637 {
4638    struct threaded_context *tc = threaded_context(_pipe);
4639    struct tc_resource_call *call = tc_add_call(tc, TC_CALL_flush_resource,
4640                                                tc_resource_call);
4641 
4642    tc_set_resource_batch_usage(tc, resource);
4643    tc_set_resource_reference(&call->resource, resource);
4644 }
4645 
4646 static uint16_t
tc_call_invalidate_resource(struct pipe_context * pipe,void * call)4647 tc_call_invalidate_resource(struct pipe_context *pipe, void *call)
4648 {
4649    struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
4650 
4651    pipe->invalidate_resource(pipe, resource);
4652    tc_drop_resource_reference(resource);
4653    return call_size(tc_resource_call);
4654 }
4655 
4656 static void
tc_invalidate_resource(struct pipe_context * _pipe,struct pipe_resource * resource)4657 tc_invalidate_resource(struct pipe_context *_pipe,
4658                        struct pipe_resource *resource)
4659 {
4660    struct threaded_context *tc = threaded_context(_pipe);
4661 
4662    if (resource->target == PIPE_BUFFER) {
4663       tc_invalidate_buffer(tc, threaded_resource(resource));
4664       return;
4665    }
4666 
4667    struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource,
4668                                                tc_resource_call);
4669    tc_set_resource_batch_usage(tc, resource);
4670    tc_set_resource_reference(&call->resource, resource);
4671 
4672    struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4673    if (info) {
4674       if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] == resource) {
4675          info->zsbuf_invalidate = true;
4676       } else {
4677          for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
4678             if (tc->fb_resources[i] == resource)
4679                info->cbuf_invalidate |= BITFIELD_BIT(i);
4680          }
4681       }
4682    }
4683 }
4684 
4685 struct tc_clear {
4686    struct tc_call_base base;
4687    bool scissor_state_set;
4688    uint8_t stencil;
4689    uint16_t buffers;
4690    float depth;
4691    struct pipe_scissor_state scissor_state;
4692    union pipe_color_union color;
4693 };
4694 
4695 static uint16_t
tc_call_clear(struct pipe_context * pipe,void * call)4696 tc_call_clear(struct pipe_context *pipe, void *call)
4697 {
4698    struct tc_clear *p = to_call(call, tc_clear);
4699 
4700    pipe->clear(pipe, p->buffers, p->scissor_state_set ? &p->scissor_state : NULL, &p->color, p->depth, p->stencil);
4701    return call_size(tc_clear);
4702 }
4703 
4704 static void
tc_clear(struct pipe_context * _pipe,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)4705 tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor_state *scissor_state,
4706          const union pipe_color_union *color, double depth,
4707          unsigned stencil)
4708 {
4709    struct threaded_context *tc = threaded_context(_pipe);
4710    struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear);
4711 
4712    p->buffers = buffers;
4713    if (scissor_state) {
4714       p->scissor_state = *scissor_state;
4715       struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4716       /* partial clear info is useful for drivers to know whether any zs writes occur;
4717        * drivers are responsible for optimizing partial clear -> full clear
4718        */
4719       if (info && buffers & PIPE_CLEAR_DEPTHSTENCIL)
4720          info->zsbuf_clear_partial |= !info->zsbuf_clear;
4721    } else {
4722       struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4723       if (info) {
4724          /* full clears use a different load operation, but are only valid if draws haven't occurred yet */
4725          info->cbuf_clear |= (buffers >> 2) & ~info->cbuf_load;
4726          if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
4727             if (!info->zsbuf_load && !info->zsbuf_clear_partial)
4728                info->zsbuf_clear = true;
4729             else if (!info->zsbuf_clear)
4730                /* this is a clear that occurred after a draw: flag as partial to ensure it isn't ignored */
4731                info->zsbuf_clear_partial = true;
4732          }
4733       }
4734    }
4735    p->scissor_state_set = !!scissor_state;
4736    p->color = *color;
4737    p->depth = depth;
4738    p->stencil = stencil;
4739 }
4740 
4741 struct tc_clear_render_target {
4742    struct tc_call_base base;
4743    bool render_condition_enabled;
4744    unsigned dstx;
4745    unsigned dsty;
4746    unsigned width;
4747    unsigned height;
4748    union pipe_color_union color;
4749    struct pipe_surface *dst;
4750 };
4751 
4752 static uint16_t
tc_call_clear_render_target(struct pipe_context * pipe,void * call)4753 tc_call_clear_render_target(struct pipe_context *pipe, void *call)
4754 {
4755    struct tc_clear_render_target *p = to_call(call, tc_clear_render_target);
4756 
4757    pipe->clear_render_target(pipe, p->dst, &p->color, p->dstx, p->dsty, p->width, p->height,
4758                              p->render_condition_enabled);
4759    tc_drop_surface_reference(p->dst);
4760    return call_size(tc_clear_render_target);
4761 }
4762 
4763 static void
tc_clear_render_target(struct pipe_context * _pipe,struct pipe_surface * dst,const union pipe_color_union * color,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)4764 tc_clear_render_target(struct pipe_context *_pipe,
4765                        struct pipe_surface *dst,
4766                        const union pipe_color_union *color,
4767                        unsigned dstx, unsigned dsty,
4768                        unsigned width, unsigned height,
4769                        bool render_condition_enabled)
4770 {
4771    struct threaded_context *tc = threaded_context(_pipe);
4772    struct tc_clear_render_target *p = tc_add_call(tc, TC_CALL_clear_render_target, tc_clear_render_target);
4773    p->dst = NULL;
4774    pipe_surface_reference(&p->dst, dst);
4775    p->color = *color;
4776    p->dstx = dstx;
4777    p->dsty = dsty;
4778    p->width = width;
4779    p->height = height;
4780    p->render_condition_enabled = render_condition_enabled;
4781 }
4782 
4783 
4784 struct tc_clear_depth_stencil {
4785    struct tc_call_base base;
4786    bool render_condition_enabled;
4787    float depth;
4788    unsigned clear_flags;
4789    unsigned stencil;
4790    unsigned dstx;
4791    unsigned dsty;
4792    unsigned width;
4793    unsigned height;
4794    struct pipe_surface *dst;
4795 };
4796 
4797 
4798 static uint16_t
tc_call_clear_depth_stencil(struct pipe_context * pipe,void * call)4799 tc_call_clear_depth_stencil(struct pipe_context *pipe, void *call)
4800 {
4801    struct tc_clear_depth_stencil *p = to_call(call, tc_clear_depth_stencil);
4802 
4803    pipe->clear_depth_stencil(pipe, p->dst, p->clear_flags, p->depth, p->stencil,
4804                              p->dstx, p->dsty, p->width, p->height,
4805                              p->render_condition_enabled);
4806    tc_drop_surface_reference(p->dst);
4807    return call_size(tc_clear_depth_stencil);
4808 }
4809 
4810 static void
tc_clear_depth_stencil(struct pipe_context * _pipe,struct pipe_surface * dst,unsigned clear_flags,double depth,unsigned stencil,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)4811 tc_clear_depth_stencil(struct pipe_context *_pipe,
4812                        struct pipe_surface *dst, unsigned clear_flags,
4813                        double depth, unsigned stencil, unsigned dstx,
4814                        unsigned dsty, unsigned width, unsigned height,
4815                        bool render_condition_enabled)
4816 {
4817    struct threaded_context *tc = threaded_context(_pipe);
4818    struct tc_clear_depth_stencil *p = tc_add_call(tc, TC_CALL_clear_depth_stencil, tc_clear_depth_stencil);
4819    p->dst = NULL;
4820    pipe_surface_reference(&p->dst, dst);
4821    p->clear_flags = clear_flags;
4822    p->depth = depth;
4823    p->stencil = stencil;
4824    p->dstx = dstx;
4825    p->dsty = dsty;
4826    p->width = width;
4827    p->height = height;
4828    p->render_condition_enabled = render_condition_enabled;
4829 }
4830 
4831 struct tc_clear_buffer {
4832    struct tc_call_base base;
4833    uint8_t clear_value_size;
4834    unsigned offset;
4835    unsigned size;
4836    char clear_value[16];
4837    struct pipe_resource *res;
4838 };
4839 
4840 static uint16_t
tc_call_clear_buffer(struct pipe_context * pipe,void * call)4841 tc_call_clear_buffer(struct pipe_context *pipe, void *call)
4842 {
4843    struct tc_clear_buffer *p = to_call(call, tc_clear_buffer);
4844 
4845    pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
4846                       p->clear_value_size);
4847    tc_drop_resource_reference(p->res);
4848    return call_size(tc_clear_buffer);
4849 }
4850 
4851 static void
tc_clear_buffer(struct pipe_context * _pipe,struct pipe_resource * res,unsigned offset,unsigned size,const void * clear_value,int clear_value_size)4852 tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
4853                 unsigned offset, unsigned size,
4854                 const void *clear_value, int clear_value_size)
4855 {
4856    struct threaded_context *tc = threaded_context(_pipe);
4857    struct threaded_resource *tres = threaded_resource(res);
4858    struct tc_clear_buffer *p =
4859       tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
4860 
4861    tc_buffer_disable_cpu_storage(res);
4862 
4863    tc_set_resource_reference(&p->res, res);
4864    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], res);
4865    p->offset = offset;
4866    p->size = size;
4867    memcpy(p->clear_value, clear_value, clear_value_size);
4868    p->clear_value_size = clear_value_size;
4869 
4870    util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
4871 }
4872 
4873 struct tc_clear_texture {
4874    struct tc_call_base base;
4875    unsigned level;
4876    struct pipe_box box;
4877    char data[16];
4878    struct pipe_resource *res;
4879 };
4880 
4881 static uint16_t
tc_call_clear_texture(struct pipe_context * pipe,void * call)4882 tc_call_clear_texture(struct pipe_context *pipe, void *call)
4883 {
4884    struct tc_clear_texture *p = to_call(call, tc_clear_texture);
4885 
4886    pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
4887    tc_drop_resource_reference(p->res);
4888    return call_size(tc_clear_texture);
4889 }
4890 
4891 static void
tc_clear_texture(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,const struct pipe_box * box,const void * data)4892 tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
4893                  unsigned level, const struct pipe_box *box, const void *data)
4894 {
4895    struct threaded_context *tc = threaded_context(_pipe);
4896    struct tc_clear_texture *p =
4897       tc_add_call(tc, TC_CALL_clear_texture, tc_clear_texture);
4898 
4899    tc_set_resource_batch_usage(tc, res);
4900    tc_set_resource_reference(&p->res, res);
4901    p->level = level;
4902    p->box = *box;
4903    memcpy(p->data, data,
4904           util_format_get_blocksize(res->format));
4905 }
4906 
4907 struct tc_resource_commit {
4908    struct tc_call_base base;
4909    bool commit;
4910    unsigned level;
4911    struct pipe_box box;
4912    struct pipe_resource *res;
4913 };
4914 
4915 static uint16_t
tc_call_resource_commit(struct pipe_context * pipe,void * call)4916 tc_call_resource_commit(struct pipe_context *pipe, void *call)
4917 {
4918    struct tc_resource_commit *p = to_call(call, tc_resource_commit);
4919 
4920    pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
4921    tc_drop_resource_reference(p->res);
4922    return call_size(tc_resource_commit);
4923 }
4924 
4925 static bool
tc_resource_commit(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,struct pipe_box * box,bool commit)4926 tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
4927                    unsigned level, struct pipe_box *box, bool commit)
4928 {
4929    struct threaded_context *tc = threaded_context(_pipe);
4930    struct tc_resource_commit *p =
4931       tc_add_call(tc, TC_CALL_resource_commit, tc_resource_commit);
4932 
4933    tc_set_resource_reference(&p->res, res);
4934    tc_set_resource_batch_usage(tc, res);
4935    p->level = level;
4936    p->box = *box;
4937    p->commit = commit;
4938    return true; /* we don't care about the return value for this call */
4939 }
4940 
4941 static unsigned
tc_init_intel_perf_query_info(struct pipe_context * _pipe)4942 tc_init_intel_perf_query_info(struct pipe_context *_pipe)
4943 {
4944    struct threaded_context *tc = threaded_context(_pipe);
4945    struct pipe_context *pipe = tc->pipe;
4946 
4947    return pipe->init_intel_perf_query_info(pipe);
4948 }
4949 
4950 static void
tc_get_intel_perf_query_info(struct pipe_context * _pipe,unsigned query_index,const char ** name,uint32_t * data_size,uint32_t * n_counters,uint32_t * n_active)4951 tc_get_intel_perf_query_info(struct pipe_context *_pipe,
4952                              unsigned query_index,
4953                              const char **name,
4954                              uint32_t *data_size,
4955                              uint32_t *n_counters,
4956                              uint32_t *n_active)
4957 {
4958    struct threaded_context *tc = threaded_context(_pipe);
4959    struct pipe_context *pipe = tc->pipe;
4960 
4961    tc_sync(tc); /* n_active vs begin/end_intel_perf_query */
4962    pipe->get_intel_perf_query_info(pipe, query_index, name, data_size,
4963          n_counters, n_active);
4964 }
4965 
4966 static void
tc_get_intel_perf_query_counter_info(struct pipe_context * _pipe,unsigned query_index,unsigned counter_index,const char ** name,const char ** desc,uint32_t * offset,uint32_t * data_size,uint32_t * type_enum,uint32_t * data_type_enum,uint64_t * raw_max)4967 tc_get_intel_perf_query_counter_info(struct pipe_context *_pipe,
4968                                      unsigned query_index,
4969                                      unsigned counter_index,
4970                                      const char **name,
4971                                      const char **desc,
4972                                      uint32_t *offset,
4973                                      uint32_t *data_size,
4974                                      uint32_t *type_enum,
4975                                      uint32_t *data_type_enum,
4976                                      uint64_t *raw_max)
4977 {
4978    struct threaded_context *tc = threaded_context(_pipe);
4979    struct pipe_context *pipe = tc->pipe;
4980 
4981    pipe->get_intel_perf_query_counter_info(pipe, query_index, counter_index,
4982          name, desc, offset, data_size, type_enum, data_type_enum, raw_max);
4983 }
4984 
4985 static struct pipe_query *
tc_new_intel_perf_query_obj(struct pipe_context * _pipe,unsigned query_index)4986 tc_new_intel_perf_query_obj(struct pipe_context *_pipe, unsigned query_index)
4987 {
4988    struct threaded_context *tc = threaded_context(_pipe);
4989    struct pipe_context *pipe = tc->pipe;
4990 
4991    return pipe->new_intel_perf_query_obj(pipe, query_index);
4992 }
4993 
4994 static uint16_t
tc_call_begin_intel_perf_query(struct pipe_context * pipe,void * call)4995 tc_call_begin_intel_perf_query(struct pipe_context *pipe, void *call)
4996 {
4997    (void)pipe->begin_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4998    return call_size(tc_query_call);
4999 }
5000 
5001 static bool
tc_begin_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)5002 tc_begin_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
5003 {
5004    struct threaded_context *tc = threaded_context(_pipe);
5005 
5006    tc_add_call(tc, TC_CALL_begin_intel_perf_query, tc_query_call)->query = q;
5007 
5008    /* assume success, begin failure can be signaled from get_intel_perf_query_data */
5009    return true;
5010 }
5011 
5012 static uint16_t
tc_call_end_intel_perf_query(struct pipe_context * pipe,void * call)5013 tc_call_end_intel_perf_query(struct pipe_context *pipe, void *call)
5014 {
5015    pipe->end_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
5016    return call_size(tc_query_call);
5017 }
5018 
5019 static void
tc_end_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)5020 tc_end_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
5021 {
5022    struct threaded_context *tc = threaded_context(_pipe);
5023 
5024    tc_add_call(tc, TC_CALL_end_intel_perf_query, tc_query_call)->query = q;
5025 }
5026 
5027 static void
tc_delete_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)5028 tc_delete_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
5029 {
5030    struct threaded_context *tc = threaded_context(_pipe);
5031    struct pipe_context *pipe = tc->pipe;
5032 
5033    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5034    pipe->delete_intel_perf_query(pipe, q);
5035 }
5036 
5037 static void
tc_wait_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)5038 tc_wait_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
5039 {
5040    struct threaded_context *tc = threaded_context(_pipe);
5041    struct pipe_context *pipe = tc->pipe;
5042 
5043    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5044    pipe->wait_intel_perf_query(pipe, q);
5045 }
5046 
5047 static bool
tc_is_intel_perf_query_ready(struct pipe_context * _pipe,struct pipe_query * q)5048 tc_is_intel_perf_query_ready(struct pipe_context *_pipe, struct pipe_query *q)
5049 {
5050    struct threaded_context *tc = threaded_context(_pipe);
5051    struct pipe_context *pipe = tc->pipe;
5052 
5053    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5054    return pipe->is_intel_perf_query_ready(pipe, q);
5055 }
5056 
5057 static bool
tc_get_intel_perf_query_data(struct pipe_context * _pipe,struct pipe_query * q,size_t data_size,uint32_t * data,uint32_t * bytes_written)5058 tc_get_intel_perf_query_data(struct pipe_context *_pipe,
5059                              struct pipe_query *q,
5060                              size_t data_size,
5061                              uint32_t *data,
5062                              uint32_t *bytes_written)
5063 {
5064    struct threaded_context *tc = threaded_context(_pipe);
5065    struct pipe_context *pipe = tc->pipe;
5066 
5067    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5068    return pipe->get_intel_perf_query_data(pipe, q, data_size, data, bytes_written);
5069 }
5070 
5071 /********************************************************************
5072  * callback
5073  */
5074 
5075 struct tc_callback_call {
5076    struct tc_call_base base;
5077    void (*fn)(void *data);
5078    void *data;
5079 };
5080 
5081 static uint16_t
tc_call_callback(UNUSED struct pipe_context * pipe,void * call)5082 tc_call_callback(UNUSED struct pipe_context *pipe, void *call)
5083 {
5084    struct tc_callback_call *p = to_call(call, tc_callback_call);
5085 
5086    p->fn(p->data);
5087    return call_size(tc_callback_call);
5088 }
5089 
5090 static void
tc_callback(struct pipe_context * _pipe,void (* fn)(void *),void * data,bool asap)5091 tc_callback(struct pipe_context *_pipe, void (*fn)(void *), void *data,
5092             bool asap)
5093 {
5094    struct threaded_context *tc = threaded_context(_pipe);
5095 
5096    if (asap && tc_is_sync(tc)) {
5097       fn(data);
5098       return;
5099    }
5100 
5101    struct tc_callback_call *p =
5102       tc_add_call(tc, TC_CALL_callback, tc_callback_call);
5103    p->fn = fn;
5104    p->data = data;
5105 }
5106 
5107 
5108 /********************************************************************
5109  * create & destroy
5110  */
5111 
5112 static void
tc_destroy(struct pipe_context * _pipe)5113 tc_destroy(struct pipe_context *_pipe)
5114 {
5115    struct threaded_context *tc = threaded_context(_pipe);
5116    struct pipe_context *pipe = tc->pipe;
5117 
5118    if (tc->base.const_uploader &&
5119        tc->base.stream_uploader != tc->base.const_uploader)
5120       u_upload_destroy(tc->base.const_uploader);
5121 
5122    if (tc->base.stream_uploader)
5123       u_upload_destroy(tc->base.stream_uploader);
5124 
5125    tc_sync(tc);
5126 
5127    if (util_queue_is_initialized(&tc->queue)) {
5128       util_queue_destroy(&tc->queue);
5129 
5130       for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
5131          util_queue_fence_destroy(&tc->batch_slots[i].fence);
5132          util_dynarray_fini(&tc->batch_slots[i].renderpass_infos);
5133          assert(!tc->batch_slots[i].token);
5134       }
5135    }
5136 
5137    slab_destroy_child(&tc->pool_transfers);
5138    assert(tc->batch_slots[tc->next].num_total_slots == 0);
5139    pipe->destroy(pipe);
5140 
5141    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
5142       if (!util_queue_fence_is_signalled(&tc->buffer_lists[i].driver_flushed_fence))
5143          util_queue_fence_signal(&tc->buffer_lists[i].driver_flushed_fence);
5144       util_queue_fence_destroy(&tc->buffer_lists[i].driver_flushed_fence);
5145    }
5146 
5147    for (unsigned i = 0; i < ARRAY_SIZE(tc->fb_resources); i++)
5148       pipe_resource_reference(&tc->fb_resources[i], NULL);
5149    pipe_resource_reference(&tc->fb_resolve, NULL);
5150 
5151    FREE(tc);
5152 }
5153 
tc_driver_internal_flush_notify(struct threaded_context * tc)5154 void tc_driver_internal_flush_notify(struct threaded_context *tc)
5155 {
5156    /* Allow drivers to call this function even for internal contexts that
5157     * don't have tc. It simplifies drivers.
5158     */
5159    if (!tc)
5160       return;
5161 
5162    /* Signal fences set by tc_batch_execute. */
5163    for (unsigned i = 0; i < tc->num_signal_fences_next_flush; i++)
5164       util_queue_fence_signal(tc->signal_fences_next_flush[i]);
5165 
5166    tc->num_signal_fences_next_flush = 0;
5167 }
5168 
5169 /**
5170  * Wrap an existing pipe_context into a threaded_context.
5171  *
5172  * \param pipe                 pipe_context to wrap
5173  * \param parent_transfer_pool parent slab pool set up for creating pipe_-
5174  *                             transfer objects; the driver should have one
5175  *                             in pipe_screen.
5176  * \param replace_buffer  callback for replacing a pipe_resource's storage
5177  *                        with another pipe_resource's storage.
5178  * \param options         optional TC options/callbacks
5179  * \param out  if successful, the threaded_context will be returned here in
5180  *             addition to the return value if "out" != NULL
5181  */
5182 struct pipe_context *
threaded_context_create(struct pipe_context * pipe,struct slab_parent_pool * parent_transfer_pool,tc_replace_buffer_storage_func replace_buffer,const struct threaded_context_options * options,struct threaded_context ** out)5183 threaded_context_create(struct pipe_context *pipe,
5184                         struct slab_parent_pool *parent_transfer_pool,
5185                         tc_replace_buffer_storage_func replace_buffer,
5186                         const struct threaded_context_options *options,
5187                         struct threaded_context **out)
5188 {
5189    struct threaded_context *tc;
5190 
5191    if (!pipe)
5192       return NULL;
5193 
5194    if (!debug_get_bool_option("GALLIUM_THREAD", true))
5195       return pipe;
5196 
5197    tc = CALLOC_STRUCT(threaded_context);
5198    if (!tc) {
5199       pipe->destroy(pipe);
5200       return NULL;
5201    }
5202 
5203    if (options) {
5204       /* this is unimplementable */
5205       assert(!(options->parse_renderpass_info && options->driver_calls_flush_notify));
5206       tc->options = *options;
5207    }
5208 
5209    pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options);
5210 
5211    /* The driver context isn't wrapped, so set its "priv" to NULL. */
5212    pipe->priv = NULL;
5213 
5214    tc->pipe = pipe;
5215    tc->replace_buffer_storage = replace_buffer;
5216    tc->map_buffer_alignment =
5217       pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
5218    tc->ubo_alignment =
5219       MAX2(pipe->screen->get_param(pipe->screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT), 64);
5220    tc->base.priv = pipe; /* priv points to the wrapped driver context */
5221    tc->base.screen = pipe->screen;
5222    tc->base.destroy = tc_destroy;
5223    tc->base.callback = tc_callback;
5224 
5225    tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
5226    if (pipe->stream_uploader == pipe->const_uploader)
5227       tc->base.const_uploader = tc->base.stream_uploader;
5228    else
5229       tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
5230 
5231    if (!tc->base.stream_uploader || !tc->base.const_uploader)
5232       goto fail;
5233 
5234    tc->use_forced_staging_uploads = true;
5235 
5236    /* The queue size is the number of batches "waiting". Batches are removed
5237     * from the queue before being executed, so keep one tc_batch slot for that
5238     * execution. Also, keep one unused slot for an unflushed batch.
5239     */
5240    if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL))
5241       goto fail;
5242 
5243    tc->last_completed = -1;
5244    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
5245 #if !defined(NDEBUG) && TC_DEBUG >= 1
5246       tc->batch_slots[i].sentinel = TC_SENTINEL;
5247 #endif
5248       tc->batch_slots[i].tc = tc;
5249       tc->batch_slots[i].batch_idx = i;
5250       util_queue_fence_init(&tc->batch_slots[i].fence);
5251       tc->batch_slots[i].renderpass_info_idx = -1;
5252       if (tc->options.parse_renderpass_info) {
5253          util_dynarray_init(&tc->batch_slots[i].renderpass_infos, NULL);
5254          tc_batch_renderpass_infos_resize(tc, &tc->batch_slots[i]);
5255       }
5256    }
5257    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
5258       util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence);
5259 
5260    list_inithead(&tc->unflushed_queries);
5261 
5262    slab_create_child(&tc->pool_transfers, parent_transfer_pool);
5263 
5264    /* If you have different limits in each shader stage, set the maximum. */
5265    struct pipe_screen *screen = pipe->screen;;
5266    tc->max_const_buffers =
5267       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5268                                PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
5269    tc->max_shader_buffers =
5270       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5271                                PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
5272    tc->max_images =
5273       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5274                                PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
5275    tc->max_samplers =
5276       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5277                                PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
5278 
5279    tc->base.set_context_param = tc_set_context_param; /* always set this */
5280 
5281 #define CTX_INIT(_member) \
5282    tc->base._member = tc->pipe->_member ? tc_##_member : NULL
5283 
5284    CTX_INIT(flush);
5285    CTX_INIT(draw_vbo);
5286    CTX_INIT(draw_vertex_state);
5287    CTX_INIT(launch_grid);
5288    CTX_INIT(resource_copy_region);
5289    CTX_INIT(blit);
5290    CTX_INIT(clear);
5291    CTX_INIT(clear_render_target);
5292    CTX_INIT(clear_depth_stencil);
5293    CTX_INIT(clear_buffer);
5294    CTX_INIT(clear_texture);
5295    CTX_INIT(flush_resource);
5296    CTX_INIT(generate_mipmap);
5297    CTX_INIT(render_condition);
5298    CTX_INIT(create_query);
5299    CTX_INIT(create_batch_query);
5300    CTX_INIT(destroy_query);
5301    CTX_INIT(begin_query);
5302    CTX_INIT(end_query);
5303    CTX_INIT(get_query_result);
5304    CTX_INIT(get_query_result_resource);
5305    CTX_INIT(set_active_query_state);
5306    CTX_INIT(create_blend_state);
5307    CTX_INIT(bind_blend_state);
5308    CTX_INIT(delete_blend_state);
5309    CTX_INIT(create_sampler_state);
5310    CTX_INIT(bind_sampler_states);
5311    CTX_INIT(delete_sampler_state);
5312    CTX_INIT(create_rasterizer_state);
5313    CTX_INIT(bind_rasterizer_state);
5314    CTX_INIT(delete_rasterizer_state);
5315    CTX_INIT(create_depth_stencil_alpha_state);
5316    CTX_INIT(bind_depth_stencil_alpha_state);
5317    CTX_INIT(delete_depth_stencil_alpha_state);
5318    CTX_INIT(link_shader);
5319    CTX_INIT(create_fs_state);
5320    CTX_INIT(bind_fs_state);
5321    CTX_INIT(delete_fs_state);
5322    CTX_INIT(create_vs_state);
5323    CTX_INIT(bind_vs_state);
5324    CTX_INIT(delete_vs_state);
5325    CTX_INIT(create_gs_state);
5326    CTX_INIT(bind_gs_state);
5327    CTX_INIT(delete_gs_state);
5328    CTX_INIT(create_tcs_state);
5329    CTX_INIT(bind_tcs_state);
5330    CTX_INIT(delete_tcs_state);
5331    CTX_INIT(create_tes_state);
5332    CTX_INIT(bind_tes_state);
5333    CTX_INIT(delete_tes_state);
5334    CTX_INIT(create_compute_state);
5335    CTX_INIT(bind_compute_state);
5336    CTX_INIT(delete_compute_state);
5337    CTX_INIT(create_vertex_elements_state);
5338    CTX_INIT(bind_vertex_elements_state);
5339    CTX_INIT(delete_vertex_elements_state);
5340    CTX_INIT(set_blend_color);
5341    CTX_INIT(set_stencil_ref);
5342    CTX_INIT(set_sample_mask);
5343    CTX_INIT(set_min_samples);
5344    CTX_INIT(set_clip_state);
5345    CTX_INIT(set_constant_buffer);
5346    CTX_INIT(set_inlinable_constants);
5347    CTX_INIT(set_framebuffer_state);
5348    CTX_INIT(set_polygon_stipple);
5349    CTX_INIT(set_sample_locations);
5350    CTX_INIT(set_scissor_states);
5351    CTX_INIT(set_viewport_states);
5352    CTX_INIT(set_window_rectangles);
5353    CTX_INIT(set_sampler_views);
5354    CTX_INIT(set_tess_state);
5355    CTX_INIT(set_patch_vertices);
5356    CTX_INIT(set_shader_buffers);
5357    CTX_INIT(set_shader_images);
5358    CTX_INIT(set_vertex_buffers);
5359    CTX_INIT(create_stream_output_target);
5360    CTX_INIT(stream_output_target_destroy);
5361    CTX_INIT(set_stream_output_targets);
5362    CTX_INIT(create_sampler_view);
5363    CTX_INIT(sampler_view_destroy);
5364    CTX_INIT(create_surface);
5365    CTX_INIT(surface_destroy);
5366    CTX_INIT(buffer_map);
5367    CTX_INIT(texture_map);
5368    CTX_INIT(transfer_flush_region);
5369    CTX_INIT(buffer_unmap);
5370    CTX_INIT(texture_unmap);
5371    CTX_INIT(buffer_subdata);
5372    CTX_INIT(texture_subdata);
5373    CTX_INIT(texture_barrier);
5374    CTX_INIT(memory_barrier);
5375    CTX_INIT(resource_commit);
5376    CTX_INIT(create_video_codec);
5377    CTX_INIT(create_video_buffer);
5378    CTX_INIT(set_compute_resources);
5379    CTX_INIT(set_global_binding);
5380    CTX_INIT(get_sample_position);
5381    CTX_INIT(invalidate_resource);
5382    CTX_INIT(get_device_reset_status);
5383    CTX_INIT(set_device_reset_callback);
5384    CTX_INIT(dump_debug_state);
5385    CTX_INIT(set_log_context);
5386    CTX_INIT(emit_string_marker);
5387    CTX_INIT(set_debug_callback);
5388    CTX_INIT(create_fence_fd);
5389    CTX_INIT(fence_server_sync);
5390    CTX_INIT(fence_server_signal);
5391    CTX_INIT(get_timestamp);
5392    CTX_INIT(create_texture_handle);
5393    CTX_INIT(delete_texture_handle);
5394    CTX_INIT(make_texture_handle_resident);
5395    CTX_INIT(create_image_handle);
5396    CTX_INIT(delete_image_handle);
5397    CTX_INIT(make_image_handle_resident);
5398    CTX_INIT(set_frontend_noop);
5399    CTX_INIT(init_intel_perf_query_info);
5400    CTX_INIT(get_intel_perf_query_info);
5401    CTX_INIT(get_intel_perf_query_counter_info);
5402    CTX_INIT(new_intel_perf_query_obj);
5403    CTX_INIT(begin_intel_perf_query);
5404    CTX_INIT(end_intel_perf_query);
5405    CTX_INIT(delete_intel_perf_query);
5406    CTX_INIT(wait_intel_perf_query);
5407    CTX_INIT(is_intel_perf_query_ready);
5408    CTX_INIT(get_intel_perf_query_data);
5409 #undef CTX_INIT
5410 
5411 #define CALL(name) tc->execute_func[TC_CALL_##name] = tc_call_##name;
5412 #include "u_threaded_context_calls.h"
5413 #undef CALL
5414 
5415    if (out)
5416       *out = tc;
5417 
5418    tc_begin_next_buffer_list(tc);
5419    if (tc->options.parse_renderpass_info)
5420       tc_batch_increment_renderpass_info(tc, tc->next, false);
5421    return &tc->base;
5422 
5423 fail:
5424    tc_destroy(&tc->base);
5425    return NULL;
5426 }
5427 
5428 void
threaded_context_init_bytes_mapped_limit(struct threaded_context * tc,unsigned divisor)5429 threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned divisor)
5430 {
5431    uint64_t total_ram;
5432    if (os_get_total_physical_memory(&total_ram)) {
5433       tc->bytes_mapped_limit = total_ram / divisor;
5434       if (sizeof(void*) == 4)
5435          tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL);
5436    }
5437 }
5438 
5439 const struct tc_renderpass_info *
threaded_context_get_renderpass_info(struct threaded_context * tc)5440 threaded_context_get_renderpass_info(struct threaded_context *tc)
5441 {
5442    assert(tc->renderpass_info && tc->options.parse_renderpass_info);
5443    struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info);
5444    while (1) {
5445       util_queue_fence_wait(&info->ready);
5446       if (!info->next)
5447          return &info->info;
5448       info = info->next;
5449    }
5450 }
5451