xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/iris/iris_batch.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef IRIS_BATCH_DOT_H
25 #define IRIS_BATCH_DOT_H
26 
27 #include <stdint.h>
28 #include <stdbool.h>
29 #include <string.h>
30 
31 #include "util/u_dynarray.h"
32 #include "util/perf/u_trace.h"
33 
34 #include "decoder/intel_decoder.h"
35 #include "ds/intel_driver_ds.h"
36 #include "ds/intel_tracepoints.h"
37 
38 #include "iris_fence.h"
39 #include "iris_fine_fence.h"
40 
41 struct iris_context;
42 
43 /* The kernel assumes batchbuffers are smaller than 256kB. */
44 #define MAX_BATCH_SIZE (256 * 1024)
45 
46 /* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END or 12
47  * bytes for MI_BATCH_BUFFER_START (when chaining).  Plus another 24 bytes for
48  * the seqno write (using PIPE_CONTROL), and another 24 bytes for the ISP
49  * invalidation pipe control.
50  */
51 #define BATCH_RESERVED 60
52 
53 /* Our target batch size - flush approximately at this point. */
54 #define BATCH_SZ (128 * 1024 - BATCH_RESERVED)
55 
56 enum iris_batch_name {
57    IRIS_BATCH_RENDER,
58    IRIS_BATCH_COMPUTE,
59    IRIS_BATCH_BLITTER,
60 };
61 
62 /* Same definition as drm_i915_gem_exec_fence so drm_i915_gem_execbuffer2
63  * can directly use exec_fences without extra memory allocation
64  */
65 struct iris_batch_fence {
66    uint32_t handle;
67 
68 #define IRIS_BATCH_FENCE_WAIT (1 << 0)
69 #define IRIS_BATCH_FENCE_SIGNAL (1 << 1)
70    uint32_t flags;
71 };
72 
73 struct iris_batch {
74    struct iris_context *ice;
75    struct iris_screen *screen;
76    struct util_debug_callback *dbg;
77    struct pipe_device_reset_callback *reset;
78 
79    /** What batch is this? (e.g. IRIS_BATCH_RENDER/COMPUTE) */
80    enum iris_batch_name name;
81 
82    /** Current batchbuffer being queued up. */
83    struct iris_bo *bo;
84    void *map;
85    void *map_next;
86 
87    /** Size of the primary batch being submitted to execbuf (in bytes). */
88    unsigned primary_batch_size;
89 
90    /** Total size of all chained batches (in bytes). */
91    unsigned total_chained_batch_size;
92 
93    /** Last binder address set in this hardware context. */
94    uint64_t last_binder_address;
95 
96    /** Write fencing status for mi_builder. */
97    bool write_fence_status;
98 
99    union {
100       struct {
101          uint32_t ctx_id;
102          uint32_t exec_flags;
103       } i915;
104       struct {
105          uint32_t exec_queue_id;
106       } xe;
107    };
108 
109    /** A list of all BOs referenced by this batch */
110    struct iris_bo **exec_bos;
111    int exec_count;
112    int exec_array_size;
113    /** Bitset of whether this batch writes to BO `i'. */
114    BITSET_WORD *bos_written;
115    uint32_t max_gem_handle;
116 
117    /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
118     * instruction is a MI_BATCH_BUFFER_END).
119     */
120    bool noop_enabled;
121 
122    /** Whether the first utrace point has been recorded.
123     */
124    bool begin_trace_recorded;
125 
126    /**
127     * A list of iris_syncobjs associated with this batch.
128     *
129     * The first list entry will always be a signalling sync-point, indicating
130     * that this batch has completed.  The others are likely to be sync-points
131     * to wait on before executing the batch.
132     */
133    struct util_dynarray syncobjs;
134 
135    /** A list of iris_batch_fences to have execbuf signal or wait on */
136    struct util_dynarray exec_fences;
137 
138    /** The amount of aperture space (in bytes) used by all exec_bos */
139    int aperture_space;
140 
141    struct {
142       /** Uploader to use for sequence numbers */
143       struct u_upload_mgr *uploader;
144 
145       /** GPU buffer and CPU map where our seqno's will be written. */
146       struct iris_state_ref ref;
147       uint32_t *map;
148 
149       /** The sequence number to write the next time we add a fence. */
150       uint32_t next;
151    } fine_fences;
152 
153    /** A seqno (and syncobj) for the last batch that was submitted. */
154    struct iris_fine_fence *last_fence;
155 
156    /** List of other batches which we might need to flush to use a BO */
157    struct iris_batch *other_batches[IRIS_BATCH_COUNT - 1];
158    unsigned num_other_batches;
159 
160    /**
161     * Table containing struct iris_bo * that have been accessed within this
162     * batchbuffer and would need flushing before being used with a different
163     * aux mode.
164     */
165    struct hash_table *bo_aux_modes;
166 
167    struct intel_batch_decode_ctx decoder;
168    struct hash_table_u64 *state_sizes;
169 
170    /**
171     * Matrix representation of the cache coherency status of the GPU at the
172     * current end point of the batch.  For every i and j,
173     * coherent_seqnos[i][j] denotes the seqno of the most recent flush of
174     * cache domain j visible to cache domain i (which obviously implies that
175     * coherent_seqnos[i][i] is the most recent flush of cache domain i).  This
176     * can be used to efficiently determine whether synchronization is
177     * necessary before accessing data from cache domain i if it was previously
178     * accessed from another cache domain j.
179     */
180    uint64_t coherent_seqnos[NUM_IRIS_DOMAINS][NUM_IRIS_DOMAINS];
181 
182    /**
183     * A vector representing the cache coherency status of the L3.  For each
184     * cache domain i, l3_coherent_seqnos[i] denotes the seqno of the most
185     * recent flush of that domain which is visible to L3 clients.
186     */
187    uint64_t l3_coherent_seqnos[NUM_IRIS_DOMAINS];
188 
189    /**
190     * Sequence number used to track the completion of any subsequent memory
191     * operations in the batch until the next sync boundary.
192     */
193    uint64_t next_seqno;
194 
195    /** Have we emitted any draw calls to this batch? */
196    bool contains_draw;
197 
198    /** Have we emitted any draw calls with next_seqno? */
199    bool contains_draw_with_next_seqno;
200 
201    /** Batch contains fence signal operation. */
202    bool contains_fence_signal;
203 
204    /**
205     * Number of times iris_batch_sync_region_start() has been called without a
206     * matching iris_batch_sync_region_end() on this batch.
207     */
208    uint32_t sync_region_depth;
209 
210    uint32_t last_aux_map_state;
211    struct iris_measure_batch *measure;
212 
213    /** Where tracepoints are recorded */
214    struct u_trace trace;
215 
216    /** Batch wrapper structure for perfetto */
217    struct intel_ds_queue ds;
218 
219    uint8_t num_3d_primitives_emitted;
220 };
221 
222 void iris_init_batches(struct iris_context *ice);
223 void iris_chain_to_new_batch(struct iris_batch *batch);
224 void iris_destroy_batches(struct iris_context *ice);
225 void iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate);
226 
227 void iris_batch_maybe_begin_frame(struct iris_batch *batch);
228 
229 void _iris_batch_flush(struct iris_batch *batch, const char *file, int line);
230 #define iris_batch_flush(batch) _iris_batch_flush((batch), __FILE__, __LINE__)
231 
232 bool iris_batch_references(struct iris_batch *batch, struct iris_bo *bo);
233 
234 bool iris_batch_prepare_noop(struct iris_batch *batch, bool noop_enable);
235 
236 void iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo,
237                         bool writable, enum iris_domain access);
238 
239 enum pipe_reset_status iris_batch_check_for_reset(struct iris_batch *batch);
240 
241 bool iris_batch_syncobj_to_sync_file_fd(struct iris_batch *batch, int *out_fd);
242 
243 static inline unsigned
iris_batch_bytes_used(struct iris_batch * batch)244 iris_batch_bytes_used(struct iris_batch *batch)
245 {
246    return batch->map_next - batch->map;
247 }
248 
249 static inline uint64_t
iris_batch_current_address_u64(struct iris_batch * batch)250 iris_batch_current_address_u64(struct iris_batch *batch)
251 {
252    return batch->bo->address + (batch->map_next - batch->map);
253 }
254 
255 /**
256  * Ensure the current command buffer has \param size bytes of space
257  * remaining.  If not, this creates a secondary batch buffer and emits
258  * a jump from the primary batch to the start of the secondary.
259  *
260  * Most callers want iris_get_command_space() instead.
261  */
262 static inline void
iris_require_command_space(struct iris_batch * batch,unsigned size)263 iris_require_command_space(struct iris_batch *batch, unsigned size)
264 {
265    const unsigned required_bytes = iris_batch_bytes_used(batch) + size;
266 
267    if (required_bytes >= BATCH_SZ) {
268       iris_chain_to_new_batch(batch);
269    }
270 }
271 
272 /**
273  * Allocate space in the current command buffer, and return a pointer
274  * to the mapped area so the caller can write commands there.
275  *
276  * This should be called whenever emitting commands.
277  */
278 static inline void *
iris_get_command_space(struct iris_batch * batch,unsigned bytes)279 iris_get_command_space(struct iris_batch *batch, unsigned bytes)
280 {
281    if (!batch->begin_trace_recorded) {
282       batch->begin_trace_recorded = true;
283       iris_batch_maybe_begin_frame(batch);
284       trace_intel_begin_batch(&batch->trace);
285    }
286    iris_require_command_space(batch, bytes);
287    void *map = batch->map_next;
288    batch->map_next += bytes;
289    return map;
290 }
291 
292 /**
293  * Helper to emit GPU commands - allocates space, copies them there.
294  */
295 static inline void
iris_batch_emit(struct iris_batch * batch,const void * data,unsigned size)296 iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size)
297 {
298    void *map = iris_get_command_space(batch, size);
299    memcpy(map, data, size);
300 }
301 
302 /**
303  * Get a pointer to the batch's signalling syncobj.  Does not refcount.
304  */
305 static inline struct iris_syncobj *
iris_batch_get_signal_syncobj(struct iris_batch * batch)306 iris_batch_get_signal_syncobj(struct iris_batch *batch)
307 {
308    /* The signalling syncobj is the first one in the list. */
309    struct iris_syncobj *syncobj =
310       ((struct iris_syncobj **) util_dynarray_begin(&batch->syncobjs))[0];
311    return syncobj;
312 }
313 
314 
315 /**
316  * Take a reference to the batch's signalling syncobj.
317  *
318  * Callers can use this to wait for the the current batch under construction
319  * to complete (after flushing it).
320  */
321 static inline void
iris_batch_reference_signal_syncobj(struct iris_batch * batch,struct iris_syncobj ** out_syncobj)322 iris_batch_reference_signal_syncobj(struct iris_batch *batch,
323                                    struct iris_syncobj **out_syncobj)
324 {
325    struct iris_syncobj *syncobj = iris_batch_get_signal_syncobj(batch);
326    iris_syncobj_reference(batch->screen->bufmgr, out_syncobj, syncobj);
327 }
328 
329 /**
330  * Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
331  */
332 static inline void
iris_record_state_size(struct hash_table_u64 * ht,uint32_t offset_from_base,uint32_t size)333 iris_record_state_size(struct hash_table_u64 *ht,
334                        uint32_t offset_from_base,
335                        uint32_t size)
336 {
337    if (ht) {
338       _mesa_hash_table_u64_insert(ht, offset_from_base,
339                                   (void *)(uintptr_t) size);
340    }
341 }
342 
343 /**
344  * Mark the start of a region in the batch with stable synchronization
345  * sequence number.  Any buffer object accessed by the batch buffer only needs
346  * to be marked once (e.g. via iris_bo_bump_seqno()) within a region delimited
347  * by iris_batch_sync_region_start() and iris_batch_sync_region_end().
348  */
349 static inline void
iris_batch_sync_region_start(struct iris_batch * batch)350 iris_batch_sync_region_start(struct iris_batch *batch)
351 {
352    batch->sync_region_depth++;
353 }
354 
355 /**
356  * Mark the end of a region in the batch with stable synchronization sequence
357  * number.  Should be called once after each call to
358  * iris_batch_sync_region_start().
359  */
360 static inline void
iris_batch_sync_region_end(struct iris_batch * batch)361 iris_batch_sync_region_end(struct iris_batch *batch)
362 {
363    assert(batch->sync_region_depth);
364    batch->sync_region_depth--;
365 }
366 
367 /**
368  * Start a new synchronization section at the current point of the batch,
369  * unless disallowed by a previous iris_batch_sync_region_start().
370  */
371 static inline void
iris_batch_sync_boundary(struct iris_batch * batch)372 iris_batch_sync_boundary(struct iris_batch *batch)
373 {
374    if (!batch->sync_region_depth) {
375       batch->contains_draw_with_next_seqno = false;
376       batch->next_seqno = p_atomic_inc_return(&batch->screen->last_seqno);
377       assert(batch->next_seqno > 0);
378    }
379 }
380 
381 /**
382  * Update the cache coherency status of the batch to reflect a flush of the
383  * specified caching domain.
384  */
385 static inline void
iris_batch_mark_flush_sync(struct iris_batch * batch,enum iris_domain access)386 iris_batch_mark_flush_sync(struct iris_batch *batch,
387                            enum iris_domain access)
388 {
389    const struct intel_device_info *devinfo = batch->screen->devinfo;
390 
391    if (iris_domain_is_l3_coherent(devinfo, access))
392       batch->l3_coherent_seqnos[access] = batch->next_seqno - 1;
393    else
394       batch->coherent_seqnos[access][access] = batch->next_seqno - 1;
395 }
396 
397 /**
398  * Update the cache coherency status of the batch to reflect an invalidation
399  * of the specified caching domain.  All prior flushes of other caches will be
400  * considered visible to the specified caching domain.
401  */
402 static inline void
iris_batch_mark_invalidate_sync(struct iris_batch * batch,enum iris_domain access)403 iris_batch_mark_invalidate_sync(struct iris_batch *batch,
404                                 enum iris_domain access)
405 {
406    const struct intel_device_info *devinfo = batch->screen->devinfo;
407 
408    for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
409       if (i == access)
410          continue;
411 
412       if (iris_domain_is_l3_coherent(devinfo, access)) {
413          if (iris_domain_is_read_only(access)) {
414             /* Invalidating a L3-coherent read-only domain "access" also
415              * triggers an invalidation of any matching L3 cachelines as well.
416              *
417              * If domain 'i' is L3-coherent, it sees the latest data in L3,
418              * otherwise it sees the latest globally-observable data.
419              */
420             batch->coherent_seqnos[access][i] =
421                iris_domain_is_l3_coherent(devinfo, i) ?
422                batch->l3_coherent_seqnos[i] : batch->coherent_seqnos[i][i];
423          } else {
424             /* Invalidating L3-coherent write domains does not trigger
425              * an invalidation of any matching L3 cachelines, however.
426              *
427              * It sees the latest data from domain i visible to L3 clients.
428              */
429             batch->coherent_seqnos[access][i] = batch->l3_coherent_seqnos[i];
430          }
431       } else {
432          /* "access" isn't L3-coherent, so invalidating it means it sees the
433           * most recent globally-observable data from domain i.
434           */
435          batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i];
436       }
437    }
438 }
439 
440 /**
441  * Update the cache coherency status of the batch to reflect a reset.  All
442  * previously accessed data can be considered visible to every caching domain
443  * thanks to the kernel's heavyweight flushing at batch buffer boundaries.
444  */
445 static inline void
iris_batch_mark_reset_sync(struct iris_batch * batch)446 iris_batch_mark_reset_sync(struct iris_batch *batch)
447 {
448    for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
449       batch->l3_coherent_seqnos[i] = batch->next_seqno - 1;
450       for (unsigned j = 0; j < NUM_IRIS_DOMAINS; j++)
451          batch->coherent_seqnos[i][j] = batch->next_seqno - 1;
452    }
453 }
454 
455 const char *
456 iris_batch_name_to_string(enum iris_batch_name name);
457 
458 bool
459 iris_batch_is_banned(struct iris_bufmgr *bufmgr, int ret);
460 
461 #define iris_foreach_batch(ice, batch)                \
462    for (struct iris_batch *batch = &ice->batches[0];  \
463         batch <= &ice->batches[((struct iris_screen *)ice->ctx.screen)->devinfo->ver >= 12 ? IRIS_BATCH_BLITTER : IRIS_BATCH_COMPUTE]; \
464         ++batch)
465 
466 void iris_batch_update_syncobjs(struct iris_batch *batch);
467 unsigned iris_batch_num_fences(struct iris_batch *batch);
468 
469 void iris_dump_fence_list(struct iris_batch *batch);
470 void iris_dump_bo_list(struct iris_batch *batch);
471 void iris_batch_decode_batch(struct iris_batch *batch);
472 
473 #endif
474