1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef IRIS_BATCH_DOT_H
25 #define IRIS_BATCH_DOT_H
26
27 #include <stdint.h>
28 #include <stdbool.h>
29 #include <string.h>
30
31 #include "util/u_dynarray.h"
32 #include "util/perf/u_trace.h"
33
34 #include "decoder/intel_decoder.h"
35 #include "ds/intel_driver_ds.h"
36 #include "ds/intel_tracepoints.h"
37
38 #include "iris_fence.h"
39 #include "iris_fine_fence.h"
40
41 struct iris_context;
42
43 /* The kernel assumes batchbuffers are smaller than 256kB. */
44 #define MAX_BATCH_SIZE (256 * 1024)
45
46 /* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END or 12
47 * bytes for MI_BATCH_BUFFER_START (when chaining). Plus another 24 bytes for
48 * the seqno write (using PIPE_CONTROL), and another 24 bytes for the ISP
49 * invalidation pipe control.
50 */
51 #define BATCH_RESERVED 60
52
53 /* Our target batch size - flush approximately at this point. */
54 #define BATCH_SZ (128 * 1024 - BATCH_RESERVED)
55
56 enum iris_batch_name {
57 IRIS_BATCH_RENDER,
58 IRIS_BATCH_COMPUTE,
59 IRIS_BATCH_BLITTER,
60 };
61
62 /* Same definition as drm_i915_gem_exec_fence so drm_i915_gem_execbuffer2
63 * can directly use exec_fences without extra memory allocation
64 */
65 struct iris_batch_fence {
66 uint32_t handle;
67
68 #define IRIS_BATCH_FENCE_WAIT (1 << 0)
69 #define IRIS_BATCH_FENCE_SIGNAL (1 << 1)
70 uint32_t flags;
71 };
72
73 struct iris_batch {
74 struct iris_context *ice;
75 struct iris_screen *screen;
76 struct util_debug_callback *dbg;
77 struct pipe_device_reset_callback *reset;
78
79 /** What batch is this? (e.g. IRIS_BATCH_RENDER/COMPUTE) */
80 enum iris_batch_name name;
81
82 /** Current batchbuffer being queued up. */
83 struct iris_bo *bo;
84 void *map;
85 void *map_next;
86
87 /** Size of the primary batch being submitted to execbuf (in bytes). */
88 unsigned primary_batch_size;
89
90 /** Total size of all chained batches (in bytes). */
91 unsigned total_chained_batch_size;
92
93 /** Last binder address set in this hardware context. */
94 uint64_t last_binder_address;
95
96 /** Write fencing status for mi_builder. */
97 bool write_fence_status;
98
99 union {
100 struct {
101 uint32_t ctx_id;
102 uint32_t exec_flags;
103 } i915;
104 struct {
105 uint32_t exec_queue_id;
106 } xe;
107 };
108
109 /** A list of all BOs referenced by this batch */
110 struct iris_bo **exec_bos;
111 int exec_count;
112 int exec_array_size;
113 /** Bitset of whether this batch writes to BO `i'. */
114 BITSET_WORD *bos_written;
115 uint32_t max_gem_handle;
116
117 /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
118 * instruction is a MI_BATCH_BUFFER_END).
119 */
120 bool noop_enabled;
121
122 /** Whether the first utrace point has been recorded.
123 */
124 bool begin_trace_recorded;
125
126 /**
127 * A list of iris_syncobjs associated with this batch.
128 *
129 * The first list entry will always be a signalling sync-point, indicating
130 * that this batch has completed. The others are likely to be sync-points
131 * to wait on before executing the batch.
132 */
133 struct util_dynarray syncobjs;
134
135 /** A list of iris_batch_fences to have execbuf signal or wait on */
136 struct util_dynarray exec_fences;
137
138 /** The amount of aperture space (in bytes) used by all exec_bos */
139 int aperture_space;
140
141 struct {
142 /** Uploader to use for sequence numbers */
143 struct u_upload_mgr *uploader;
144
145 /** GPU buffer and CPU map where our seqno's will be written. */
146 struct iris_state_ref ref;
147 uint32_t *map;
148
149 /** The sequence number to write the next time we add a fence. */
150 uint32_t next;
151 } fine_fences;
152
153 /** A seqno (and syncobj) for the last batch that was submitted. */
154 struct iris_fine_fence *last_fence;
155
156 /** List of other batches which we might need to flush to use a BO */
157 struct iris_batch *other_batches[IRIS_BATCH_COUNT - 1];
158 unsigned num_other_batches;
159
160 /**
161 * Table containing struct iris_bo * that have been accessed within this
162 * batchbuffer and would need flushing before being used with a different
163 * aux mode.
164 */
165 struct hash_table *bo_aux_modes;
166
167 struct intel_batch_decode_ctx decoder;
168 struct hash_table_u64 *state_sizes;
169
170 /**
171 * Matrix representation of the cache coherency status of the GPU at the
172 * current end point of the batch. For every i and j,
173 * coherent_seqnos[i][j] denotes the seqno of the most recent flush of
174 * cache domain j visible to cache domain i (which obviously implies that
175 * coherent_seqnos[i][i] is the most recent flush of cache domain i). This
176 * can be used to efficiently determine whether synchronization is
177 * necessary before accessing data from cache domain i if it was previously
178 * accessed from another cache domain j.
179 */
180 uint64_t coherent_seqnos[NUM_IRIS_DOMAINS][NUM_IRIS_DOMAINS];
181
182 /**
183 * A vector representing the cache coherency status of the L3. For each
184 * cache domain i, l3_coherent_seqnos[i] denotes the seqno of the most
185 * recent flush of that domain which is visible to L3 clients.
186 */
187 uint64_t l3_coherent_seqnos[NUM_IRIS_DOMAINS];
188
189 /**
190 * Sequence number used to track the completion of any subsequent memory
191 * operations in the batch until the next sync boundary.
192 */
193 uint64_t next_seqno;
194
195 /** Have we emitted any draw calls to this batch? */
196 bool contains_draw;
197
198 /** Have we emitted any draw calls with next_seqno? */
199 bool contains_draw_with_next_seqno;
200
201 /** Batch contains fence signal operation. */
202 bool contains_fence_signal;
203
204 /**
205 * Number of times iris_batch_sync_region_start() has been called without a
206 * matching iris_batch_sync_region_end() on this batch.
207 */
208 uint32_t sync_region_depth;
209
210 uint32_t last_aux_map_state;
211 struct iris_measure_batch *measure;
212
213 /** Where tracepoints are recorded */
214 struct u_trace trace;
215
216 /** Batch wrapper structure for perfetto */
217 struct intel_ds_queue ds;
218
219 uint8_t num_3d_primitives_emitted;
220 };
221
222 void iris_init_batches(struct iris_context *ice);
223 void iris_chain_to_new_batch(struct iris_batch *batch);
224 void iris_destroy_batches(struct iris_context *ice);
225 void iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate);
226
227 void iris_batch_maybe_begin_frame(struct iris_batch *batch);
228
229 void _iris_batch_flush(struct iris_batch *batch, const char *file, int line);
230 #define iris_batch_flush(batch) _iris_batch_flush((batch), __FILE__, __LINE__)
231
232 bool iris_batch_references(struct iris_batch *batch, struct iris_bo *bo);
233
234 bool iris_batch_prepare_noop(struct iris_batch *batch, bool noop_enable);
235
236 void iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo,
237 bool writable, enum iris_domain access);
238
239 enum pipe_reset_status iris_batch_check_for_reset(struct iris_batch *batch);
240
241 bool iris_batch_syncobj_to_sync_file_fd(struct iris_batch *batch, int *out_fd);
242
243 static inline unsigned
iris_batch_bytes_used(struct iris_batch * batch)244 iris_batch_bytes_used(struct iris_batch *batch)
245 {
246 return batch->map_next - batch->map;
247 }
248
249 static inline uint64_t
iris_batch_current_address_u64(struct iris_batch * batch)250 iris_batch_current_address_u64(struct iris_batch *batch)
251 {
252 return batch->bo->address + (batch->map_next - batch->map);
253 }
254
255 /**
256 * Ensure the current command buffer has \param size bytes of space
257 * remaining. If not, this creates a secondary batch buffer and emits
258 * a jump from the primary batch to the start of the secondary.
259 *
260 * Most callers want iris_get_command_space() instead.
261 */
262 static inline void
iris_require_command_space(struct iris_batch * batch,unsigned size)263 iris_require_command_space(struct iris_batch *batch, unsigned size)
264 {
265 const unsigned required_bytes = iris_batch_bytes_used(batch) + size;
266
267 if (required_bytes >= BATCH_SZ) {
268 iris_chain_to_new_batch(batch);
269 }
270 }
271
272 /**
273 * Allocate space in the current command buffer, and return a pointer
274 * to the mapped area so the caller can write commands there.
275 *
276 * This should be called whenever emitting commands.
277 */
278 static inline void *
iris_get_command_space(struct iris_batch * batch,unsigned bytes)279 iris_get_command_space(struct iris_batch *batch, unsigned bytes)
280 {
281 if (!batch->begin_trace_recorded) {
282 batch->begin_trace_recorded = true;
283 iris_batch_maybe_begin_frame(batch);
284 trace_intel_begin_batch(&batch->trace);
285 }
286 iris_require_command_space(batch, bytes);
287 void *map = batch->map_next;
288 batch->map_next += bytes;
289 return map;
290 }
291
292 /**
293 * Helper to emit GPU commands - allocates space, copies them there.
294 */
295 static inline void
iris_batch_emit(struct iris_batch * batch,const void * data,unsigned size)296 iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size)
297 {
298 void *map = iris_get_command_space(batch, size);
299 memcpy(map, data, size);
300 }
301
302 /**
303 * Get a pointer to the batch's signalling syncobj. Does not refcount.
304 */
305 static inline struct iris_syncobj *
iris_batch_get_signal_syncobj(struct iris_batch * batch)306 iris_batch_get_signal_syncobj(struct iris_batch *batch)
307 {
308 /* The signalling syncobj is the first one in the list. */
309 struct iris_syncobj *syncobj =
310 ((struct iris_syncobj **) util_dynarray_begin(&batch->syncobjs))[0];
311 return syncobj;
312 }
313
314
315 /**
316 * Take a reference to the batch's signalling syncobj.
317 *
318 * Callers can use this to wait for the the current batch under construction
319 * to complete (after flushing it).
320 */
321 static inline void
iris_batch_reference_signal_syncobj(struct iris_batch * batch,struct iris_syncobj ** out_syncobj)322 iris_batch_reference_signal_syncobj(struct iris_batch *batch,
323 struct iris_syncobj **out_syncobj)
324 {
325 struct iris_syncobj *syncobj = iris_batch_get_signal_syncobj(batch);
326 iris_syncobj_reference(batch->screen->bufmgr, out_syncobj, syncobj);
327 }
328
329 /**
330 * Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
331 */
332 static inline void
iris_record_state_size(struct hash_table_u64 * ht,uint32_t offset_from_base,uint32_t size)333 iris_record_state_size(struct hash_table_u64 *ht,
334 uint32_t offset_from_base,
335 uint32_t size)
336 {
337 if (ht) {
338 _mesa_hash_table_u64_insert(ht, offset_from_base,
339 (void *)(uintptr_t) size);
340 }
341 }
342
343 /**
344 * Mark the start of a region in the batch with stable synchronization
345 * sequence number. Any buffer object accessed by the batch buffer only needs
346 * to be marked once (e.g. via iris_bo_bump_seqno()) within a region delimited
347 * by iris_batch_sync_region_start() and iris_batch_sync_region_end().
348 */
349 static inline void
iris_batch_sync_region_start(struct iris_batch * batch)350 iris_batch_sync_region_start(struct iris_batch *batch)
351 {
352 batch->sync_region_depth++;
353 }
354
355 /**
356 * Mark the end of a region in the batch with stable synchronization sequence
357 * number. Should be called once after each call to
358 * iris_batch_sync_region_start().
359 */
360 static inline void
iris_batch_sync_region_end(struct iris_batch * batch)361 iris_batch_sync_region_end(struct iris_batch *batch)
362 {
363 assert(batch->sync_region_depth);
364 batch->sync_region_depth--;
365 }
366
367 /**
368 * Start a new synchronization section at the current point of the batch,
369 * unless disallowed by a previous iris_batch_sync_region_start().
370 */
371 static inline void
iris_batch_sync_boundary(struct iris_batch * batch)372 iris_batch_sync_boundary(struct iris_batch *batch)
373 {
374 if (!batch->sync_region_depth) {
375 batch->contains_draw_with_next_seqno = false;
376 batch->next_seqno = p_atomic_inc_return(&batch->screen->last_seqno);
377 assert(batch->next_seqno > 0);
378 }
379 }
380
381 /**
382 * Update the cache coherency status of the batch to reflect a flush of the
383 * specified caching domain.
384 */
385 static inline void
iris_batch_mark_flush_sync(struct iris_batch * batch,enum iris_domain access)386 iris_batch_mark_flush_sync(struct iris_batch *batch,
387 enum iris_domain access)
388 {
389 const struct intel_device_info *devinfo = batch->screen->devinfo;
390
391 if (iris_domain_is_l3_coherent(devinfo, access))
392 batch->l3_coherent_seqnos[access] = batch->next_seqno - 1;
393 else
394 batch->coherent_seqnos[access][access] = batch->next_seqno - 1;
395 }
396
397 /**
398 * Update the cache coherency status of the batch to reflect an invalidation
399 * of the specified caching domain. All prior flushes of other caches will be
400 * considered visible to the specified caching domain.
401 */
402 static inline void
iris_batch_mark_invalidate_sync(struct iris_batch * batch,enum iris_domain access)403 iris_batch_mark_invalidate_sync(struct iris_batch *batch,
404 enum iris_domain access)
405 {
406 const struct intel_device_info *devinfo = batch->screen->devinfo;
407
408 for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
409 if (i == access)
410 continue;
411
412 if (iris_domain_is_l3_coherent(devinfo, access)) {
413 if (iris_domain_is_read_only(access)) {
414 /* Invalidating a L3-coherent read-only domain "access" also
415 * triggers an invalidation of any matching L3 cachelines as well.
416 *
417 * If domain 'i' is L3-coherent, it sees the latest data in L3,
418 * otherwise it sees the latest globally-observable data.
419 */
420 batch->coherent_seqnos[access][i] =
421 iris_domain_is_l3_coherent(devinfo, i) ?
422 batch->l3_coherent_seqnos[i] : batch->coherent_seqnos[i][i];
423 } else {
424 /* Invalidating L3-coherent write domains does not trigger
425 * an invalidation of any matching L3 cachelines, however.
426 *
427 * It sees the latest data from domain i visible to L3 clients.
428 */
429 batch->coherent_seqnos[access][i] = batch->l3_coherent_seqnos[i];
430 }
431 } else {
432 /* "access" isn't L3-coherent, so invalidating it means it sees the
433 * most recent globally-observable data from domain i.
434 */
435 batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i];
436 }
437 }
438 }
439
440 /**
441 * Update the cache coherency status of the batch to reflect a reset. All
442 * previously accessed data can be considered visible to every caching domain
443 * thanks to the kernel's heavyweight flushing at batch buffer boundaries.
444 */
445 static inline void
iris_batch_mark_reset_sync(struct iris_batch * batch)446 iris_batch_mark_reset_sync(struct iris_batch *batch)
447 {
448 for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
449 batch->l3_coherent_seqnos[i] = batch->next_seqno - 1;
450 for (unsigned j = 0; j < NUM_IRIS_DOMAINS; j++)
451 batch->coherent_seqnos[i][j] = batch->next_seqno - 1;
452 }
453 }
454
455 const char *
456 iris_batch_name_to_string(enum iris_batch_name name);
457
458 bool
459 iris_batch_is_banned(struct iris_bufmgr *bufmgr, int ret);
460
461 #define iris_foreach_batch(ice, batch) \
462 for (struct iris_batch *batch = &ice->batches[0]; \
463 batch <= &ice->batches[((struct iris_screen *)ice->ctx.screen)->devinfo->ver >= 12 ? IRIS_BATCH_BLITTER : IRIS_BATCH_COMPUTE]; \
464 ++batch)
465
466 void iris_batch_update_syncobjs(struct iris_batch *batch);
467 unsigned iris_batch_num_fences(struct iris_batch *batch);
468
469 void iris_dump_fence_list(struct iris_batch *batch);
470 void iris_dump_bo_list(struct iris_batch *batch);
471 void iris_batch_decode_batch(struct iris_batch *batch);
472
473 #endif
474