1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * SPDX-License-Identifier: MIT
9 */
10
11 #ifndef RADV_CMD_BUFFER_H
12 #define RADV_CMD_BUFFER_H
13
14 #include "ac_vcn.h"
15
16 #include "vk_command_buffer.h"
17
18 #include "radv_device.h"
19 #include "radv_physical_device.h"
20 #include "radv_pipeline_graphics.h"
21 #include "radv_video.h"
22
23 extern const struct vk_command_buffer_ops radv_cmd_buffer_ops;
24
25 enum radv_dynamic_state_bits {
26 RADV_DYNAMIC_VIEWPORT = 1ull << 0,
27 RADV_DYNAMIC_SCISSOR = 1ull << 1,
28 RADV_DYNAMIC_LINE_WIDTH = 1ull << 2,
29 RADV_DYNAMIC_DEPTH_BIAS = 1ull << 3,
30 RADV_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
31 RADV_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
32 RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
33 RADV_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
34 RADV_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
35 RADV_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
36 RADV_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
37 RADV_DYNAMIC_LINE_STIPPLE = 1ull << 11,
38 RADV_DYNAMIC_CULL_MODE = 1ull << 12,
39 RADV_DYNAMIC_FRONT_FACE = 1ull << 13,
40 RADV_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
41 RADV_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
42 RADV_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
43 RADV_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
44 RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
45 RADV_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
46 RADV_DYNAMIC_STENCIL_OP = 1ull << 20,
47 RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
48 RADV_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
49 RADV_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
50 RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
51 RADV_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
52 RADV_DYNAMIC_LOGIC_OP = 1ull << 26,
53 RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
54 RADV_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
55 RADV_DYNAMIC_VERTEX_INPUT = 1ull << 29,
56 RADV_DYNAMIC_POLYGON_MODE = 1ull << 30,
57 RADV_DYNAMIC_TESS_DOMAIN_ORIGIN = 1ull << 31,
58 RADV_DYNAMIC_LOGIC_OP_ENABLE = 1ull << 32,
59 RADV_DYNAMIC_LINE_STIPPLE_ENABLE = 1ull << 33,
60 RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE = 1ull << 34,
61 RADV_DYNAMIC_SAMPLE_MASK = 1ull << 35,
62 RADV_DYNAMIC_DEPTH_CLIP_ENABLE = 1ull << 36,
63 RADV_DYNAMIC_CONSERVATIVE_RAST_MODE = 1ull << 37,
64 RADV_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE = 1ull << 38,
65 RADV_DYNAMIC_PROVOKING_VERTEX_MODE = 1ull << 39,
66 RADV_DYNAMIC_DEPTH_CLAMP_ENABLE = 1ull << 40,
67 RADV_DYNAMIC_COLOR_WRITE_MASK = 1ull << 41,
68 RADV_DYNAMIC_COLOR_BLEND_ENABLE = 1ull << 42,
69 RADV_DYNAMIC_RASTERIZATION_SAMPLES = 1ull << 43,
70 RADV_DYNAMIC_LINE_RASTERIZATION_MODE = 1ull << 44,
71 RADV_DYNAMIC_COLOR_BLEND_EQUATION = 1ull << 45,
72 RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE = 1ull << 46,
73 RADV_DYNAMIC_DISCARD_RECTANGLE_MODE = 1ull << 47,
74 RADV_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE = 1ull << 48,
75 RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE = 1ull << 49,
76 RADV_DYNAMIC_ALPHA_TO_ONE_ENABLE = 1ull << 50,
77 RADV_DYNAMIC_COLOR_ATTACHMENT_MAP = 1ull << 51,
78 RADV_DYNAMIC_INPUT_ATTACHMENT_MAP = 1ull << 52,
79 RADV_DYNAMIC_ALL = (1ull << 53) - 1,
80 };
81
82 enum radv_cmd_dirty_bits {
83 RADV_CMD_DIRTY_PIPELINE = 1ull << 0,
84 RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 1,
85 RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 2,
86 RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 3,
87 RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 4,
88 RADV_CMD_DIRTY_GUARDBAND = 1ull << 5,
89 RADV_CMD_DIRTY_RBPLUS = 1ull << 6,
90 RADV_CMD_DIRTY_SHADER_QUERY = 1ull << 7,
91 RADV_CMD_DIRTY_OCCLUSION_QUERY = 1ull << 8,
92 RADV_CMD_DIRTY_DB_SHADER_CONTROL = 1ull << 9,
93 RADV_CMD_DIRTY_STREAMOUT_ENABLE = 1ull << 10,
94 RADV_CMD_DIRTY_GRAPHICS_SHADERS = 1ull << 11,
95 RADV_CMD_DIRTY_COLOR_OUTPUT = 1ull << 12,
96 RADV_CMD_DIRTY_FBFETCH_OUTPUT = 1ull << 13,
97 RADV_CMD_DIRTY_ALL = (1ull << 14) - 1,
98 };
99
100 enum radv_cmd_flush_bits {
101 /* Instruction cache. */
102 RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
103 /* Scalar L1 cache. */
104 RADV_CMD_FLAG_INV_SCACHE = 1 << 1,
105 /* Vector L1 cache. */
106 RADV_CMD_FLAG_INV_VCACHE = 1 << 2,
107 /* L2 cache + L2 metadata cache writeback & invalidate.
108 * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
109 RADV_CMD_FLAG_INV_L2 = 1 << 3,
110 /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
111 * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
112 * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
113 RADV_CMD_FLAG_WB_L2 = 1 << 4,
114 /* Invalidate the metadata cache. To be used when the DCC/HTILE metadata
115 * changed and we want to read an image from shaders. */
116 RADV_CMD_FLAG_INV_L2_METADATA = 1 << 5,
117 /* Framebuffer caches */
118 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 6,
119 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 7,
120 RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 8,
121 RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 9,
122 /* Engine synchronization. */
123 RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 10,
124 RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 11,
125 RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 12,
126 RADV_CMD_FLAG_VGT_FLUSH = 1 << 13,
127 /* Pipeline query controls. */
128 RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 14,
129 RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 15,
130 RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 16,
131
132 RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
133 RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META),
134
135 RADV_CMD_FLUSH_ALL_COMPUTE = (RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
136 RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_CS_PARTIAL_FLUSH),
137 };
138
139 struct radv_vertex_binding {
140 VkDeviceSize offset;
141 VkDeviceSize size;
142 VkDeviceSize stride;
143 };
144
145 struct radv_streamout_binding {
146 struct radv_buffer *buffer;
147 VkDeviceSize offset;
148 VkDeviceSize size;
149 };
150
151 struct radv_streamout_state {
152 /* Mask of bound streamout buffers. */
153 uint8_t enabled_mask;
154
155 /* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */
156 uint32_t hw_enabled_mask;
157
158 /* State of VGT_STRMOUT_(CONFIG|EN) */
159 bool streamout_enabled;
160 };
161
162 /**
163 * Attachment state when recording a renderpass instance.
164 *
165 * The clear value is valid only if there exists a pending clear.
166 */
167 struct radv_attachment {
168 VkFormat format;
169 struct radv_image_view *iview;
170 VkImageLayout layout;
171 VkImageLayout stencil_layout;
172
173 union {
174 struct radv_color_buffer_info cb;
175 struct radv_ds_buffer_info ds;
176 };
177
178 struct radv_image_view *resolve_iview;
179 VkResolveModeFlagBits resolve_mode;
180 VkResolveModeFlagBits stencil_resolve_mode;
181 VkImageLayout resolve_layout;
182 VkImageLayout stencil_resolve_layout;
183 };
184
185 struct radv_rendering_state {
186 bool active;
187 bool has_image_views;
188 bool has_input_attachment_no_concurrent_writes;
189 VkRect2D area;
190 uint32_t layer_count;
191 uint32_t view_mask;
192 uint32_t color_samples;
193 uint32_t ds_samples;
194 uint32_t max_samples;
195 struct radv_sample_locations_state sample_locations;
196 uint32_t color_att_count;
197 struct radv_attachment color_att[MAX_RTS];
198 struct radv_attachment ds_att;
199 VkImageAspectFlags ds_att_aspects;
200 struct radv_attachment vrs_att;
201 VkExtent2D vrs_texel_size;
202 };
203
204 struct radv_descriptor_state {
205 struct radv_descriptor_set *sets[MAX_SETS];
206 uint32_t dirty;
207 uint32_t valid;
208 struct radv_push_descriptor_set push_set;
209 uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
210 uint64_t descriptor_buffers[MAX_SETS];
211 bool need_indirect_descriptor_sets;
212 uint64_t indirect_descriptor_sets_va;
213 };
214
215 struct radv_push_constant_state {
216 uint32_t size;
217 uint32_t dynamic_offset_count;
218 };
219
220 enum rgp_flush_bits {
221 RGP_FLUSH_WAIT_ON_EOP_TS = 0x1,
222 RGP_FLUSH_VS_PARTIAL_FLUSH = 0x2,
223 RGP_FLUSH_PS_PARTIAL_FLUSH = 0x4,
224 RGP_FLUSH_CS_PARTIAL_FLUSH = 0x8,
225 RGP_FLUSH_PFP_SYNC_ME = 0x10,
226 RGP_FLUSH_SYNC_CP_DMA = 0x20,
227 RGP_FLUSH_INVAL_VMEM_L0 = 0x40,
228 RGP_FLUSH_INVAL_ICACHE = 0x80,
229 RGP_FLUSH_INVAL_SMEM_L0 = 0x100,
230 RGP_FLUSH_FLUSH_L2 = 0x200,
231 RGP_FLUSH_INVAL_L2 = 0x400,
232 RGP_FLUSH_FLUSH_CB = 0x800,
233 RGP_FLUSH_INVAL_CB = 0x1000,
234 RGP_FLUSH_FLUSH_DB = 0x2000,
235 RGP_FLUSH_INVAL_DB = 0x4000,
236 RGP_FLUSH_INVAL_L1 = 0x8000,
237 };
238
239 enum radv_tracked_reg {
240 RADV_TRACKED_DB_COUNT_CONTROL,
241 RADV_TRACKED_DB_SHADER_CONTROL,
242 RADV_TRACKED_DB_VRS_OVERRIDE_CNTL,
243
244 RADV_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP,
245 RADV_TRACKED_GE_NGG_SUBGRP_CNTL,
246
247 RADV_TRACKED_PA_CL_VRS_CNTL,
248 RADV_TRACKED_PA_CL_VS_OUT_CNTL,
249
250 RADV_TRACKED_PA_SC_BINNER_CNTL_0,
251 RADV_TRACKED_PA_SC_SHADER_CONTROL,
252
253 /* 2 consecutive registers */
254 RADV_TRACKED_SPI_PS_INPUT_ENA,
255 RADV_TRACKED_SPI_PS_INPUT_ADDR,
256
257 RADV_TRACKED_SPI_PS_IN_CONTROL,
258
259 /* 2 consecutive registers */
260 RADV_TRACKED_SPI_SHADER_IDX_FORMAT,
261 RADV_TRACKED_SPI_SHADER_POS_FORMAT,
262
263 RADV_TRACKED_SPI_SHADER_Z_FORMAT,
264 RADV_TRACKED_SPI_VS_OUT_CONFIG,
265
266 /* 3 consecutive registers */
267 RADV_TRACKED_SX_PS_DOWNCONVERT,
268 RADV_TRACKED_SX_BLEND_OPT_EPSILON,
269 RADV_TRACKED_SX_BLEND_OPT_CONTROL,
270
271 RADV_TRACKED_VGT_DRAW_PAYLOAD_CNTL,
272 RADV_TRACKED_VGT_ESGS_RING_ITEMSIZE, /* GFX6-8 */
273 RADV_TRACKED_VGT_GS_MODE,
274 RADV_TRACKED_VGT_GS_INSTANCE_CNT,
275 RADV_TRACKED_VGT_GS_ONCHIP_CNTL,
276 RADV_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
277 RADV_TRACKED_VGT_GS_MAX_VERT_OUT,
278 RADV_TRACKED_VGT_GS_OUT_PRIM_TYPE,
279
280 /* 4 consecutive registers */
281 RADV_TRACKED_VGT_GS_VERT_ITEMSIZE,
282 RADV_TRACKED_VGT_GS_VERT_ITEMSIZE_1,
283 RADV_TRACKED_VGT_GS_VERT_ITEMSIZE_2,
284 RADV_TRACKED_VGT_GS_VERT_ITEMSIZE_3,
285
286 RADV_TRACKED_VGT_GSVS_RING_ITEMSIZE,
287
288 /* 3 consecutive registers */
289 RADV_TRACKED_VGT_GSVS_RING_OFFSET_1,
290 RADV_TRACKED_VGT_GSVS_RING_OFFSET_2,
291 RADV_TRACKED_VGT_GSVS_RING_OFFSET_3,
292
293 RADV_TRACKED_VGT_MULTI_PRIM_IB_RESET_INDX, /* GFX6-7 */
294 RADV_TRACKED_VGT_PRIMITIVEID_EN,
295 RADV_TRACKED_VGT_REUSE_OFF,
296 RADV_TRACKED_VGT_SHADER_STAGES_EN,
297 RADV_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
298
299 RADV_NUM_ALL_TRACKED_REGS,
300 };
301
302 struct radv_tracked_regs {
303 BITSET_DECLARE(reg_saved_mask, RADV_NUM_ALL_TRACKED_REGS);
304 uint32_t reg_value[RADV_NUM_ALL_TRACKED_REGS];
305 uint32_t spi_ps_input_cntl[32];
306 };
307
308 struct radv_cmd_state {
309 /* Vertex descriptors */
310 uint64_t vb_va;
311 unsigned vb_size;
312
313 bool predicating;
314 uint64_t dirty_dynamic;
315 uint32_t dirty;
316
317 VkShaderStageFlags active_stages;
318 struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES];
319 struct radv_shader *gs_copy_shader;
320 struct radv_shader *last_vgt_shader;
321 struct radv_shader *rt_prolog;
322
323 struct radv_shader_object *shader_objs[MESA_VULKAN_SHADER_STAGES];
324
325 uint32_t prefetch_L2_mask;
326
327 struct radv_graphics_pipeline *graphics_pipeline;
328 struct radv_graphics_pipeline *emitted_graphics_pipeline;
329 struct radv_compute_pipeline *compute_pipeline;
330 struct radv_compute_pipeline *emitted_compute_pipeline;
331 struct radv_ray_tracing_pipeline *rt_pipeline; /* emitted = emitted_compute_pipeline */
332 struct radv_dynamic_state dynamic;
333 struct radv_vertex_input_state vertex_input;
334 struct radv_streamout_state streamout;
335
336 struct radv_rendering_state render;
337
338 /* Index buffer */
339 uint32_t index_type;
340 uint32_t max_index_count;
341 uint64_t index_va;
342 int32_t last_index_type;
343
344 enum radv_cmd_flush_bits flush_bits;
345 unsigned active_occlusion_queries;
346 bool perfect_occlusion_queries_enabled;
347 unsigned active_pipeline_queries;
348 unsigned active_pipeline_gds_queries;
349 unsigned active_pipeline_ace_queries; /* Task shader invocations query */
350 unsigned active_prims_gen_queries;
351 unsigned active_prims_xfb_queries;
352 unsigned active_prims_gen_gds_queries;
353 unsigned active_prims_xfb_gds_queries;
354 uint32_t trace_id;
355 uint32_t last_ia_multi_vgt_param;
356 uint32_t last_ge_cntl;
357
358 uint32_t last_num_instances;
359 uint32_t last_first_instance;
360 bool last_vertex_offset_valid;
361 uint32_t last_vertex_offset;
362 uint32_t last_drawid;
363 uint32_t last_subpass_color_count;
364
365 /* Whether CP DMA is busy/idle. */
366 bool dma_is_busy;
367
368 /* Whether any images that are not L2 coherent are dirty from the CB. */
369 bool rb_noncoherent_dirty;
370
371 /* Conditional rendering info. */
372 uint8_t predication_op; /* 32-bit or 64-bit predicate value */
373 int predication_type; /* -1: disabled, 0: normal, 1: inverted */
374 uint64_t predication_va;
375 uint64_t mec_inv_pred_va; /* For inverted predication when using MEC. */
376 bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */
377
378 /* Inheritance info. */
379 VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
380 bool inherited_occlusion_queries;
381 VkQueryControlFlags inherited_query_control_flags;
382
383 bool context_roll_without_scissor_emitted;
384
385 /* SQTT related state. */
386 uint32_t current_event_type;
387 uint32_t num_events;
388 uint32_t num_layout_transitions;
389 bool in_barrier;
390 bool pending_sqtt_barrier_end;
391 enum rgp_flush_bits sqtt_flush_bits;
392
393 /* NGG culling state. */
394 bool has_nggc;
395
396 /* Mesh shading state. */
397 bool mesh_shading;
398
399 uint8_t cb_mip[MAX_RTS];
400 uint8_t ds_mip;
401
402 /* Whether DRAW_{INDEX}_INDIRECT_{MULTI} is emitted. */
403 bool uses_draw_indirect;
404
405 uint32_t rt_stack_size;
406
407 struct radv_shader_part *emitted_vs_prolog;
408 uint32_t vbo_misaligned_mask;
409 uint32_t vbo_unaligned_mask;
410 uint32_t vbo_misaligned_mask_invalid;
411 uint32_t vbo_bound_mask;
412
413 struct radv_shader_part *emitted_ps_epilog;
414
415 /* Per-vertex VRS state. */
416 uint32_t last_vrs_rates;
417 int32_t last_force_vrs_rates_offset;
418
419 /* Whether to suspend streamout for internal driver operations. */
420 bool suspend_streamout;
421
422 /* Whether this commandbuffer uses performance counters. */
423 bool uses_perf_counters;
424
425 struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
426
427 /* Tessellation info when patch control points is dynamic. */
428 unsigned tess_num_patches;
429 unsigned tess_lds_size;
430
431 unsigned spi_shader_col_format;
432 unsigned cb_shader_mask;
433
434 struct radv_multisample_state ms;
435
436 /* Custom blend mode for internal operations. */
437 unsigned custom_blend_mode;
438 unsigned db_render_control;
439
440 unsigned rast_prim;
441
442 uint32_t vtx_base_sgpr;
443 uint8_t vtx_emit_num;
444 bool uses_drawid;
445 bool uses_baseinstance;
446
447 bool uses_out_of_order_rast;
448 bool uses_vrs;
449 bool uses_vrs_attachment;
450 bool uses_vrs_coarse_shading;
451 bool uses_dynamic_patch_control_points;
452 bool uses_fbfetch_output;
453 };
454
455 struct radv_enc_state {
456 uint32_t task_size_offset;
457 uint32_t total_task_size;
458 unsigned shifter;
459 unsigned bits_in_shifter;
460 uint32_t num_zeros;
461 uint32_t byte_index;
462 unsigned bits_output;
463 unsigned bits_size;
464 bool emulation_prevention;
465 bool is_even_frame;
466 unsigned task_id;
467 uint32_t copy_start_offset;
468 };
469
470 struct radv_cmd_buffer_upload {
471 uint8_t *map;
472 unsigned offset;
473 uint64_t size;
474 struct radeon_winsys_bo *upload_bo;
475 struct list_head list;
476 };
477
478 struct radv_cmd_buffer {
479 struct vk_command_buffer vk;
480
481 struct radv_tracked_regs tracked_regs;
482
483 VkCommandBufferUsageFlags usage_flags;
484 struct radeon_cmdbuf *cs;
485 struct radv_cmd_state state;
486 struct radv_buffer *vertex_binding_buffers[MAX_VBS];
487 struct radv_vertex_binding vertex_bindings[MAX_VBS];
488 uint32_t used_vertex_bindings;
489 struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS];
490 enum radv_queue_family qf;
491
492 uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
493 VkShaderStageFlags push_constant_stages;
494 struct radv_descriptor_set_header meta_push_descriptors;
495
496 struct radv_descriptor_state descriptors[MAX_BIND_POINTS];
497
498 struct radv_push_constant_state push_constant_state[MAX_BIND_POINTS];
499
500 uint64_t descriptor_buffers[MAX_SETS];
501
502 struct radv_cmd_buffer_upload upload;
503
504 uint32_t scratch_size_per_wave_needed;
505 uint32_t scratch_waves_wanted;
506 uint32_t compute_scratch_size_per_wave_needed;
507 uint32_t compute_scratch_waves_wanted;
508 uint32_t esgs_ring_size_needed;
509 uint32_t gsvs_ring_size_needed;
510 bool tess_rings_needed;
511 bool task_rings_needed;
512 bool mesh_scratch_ring_needed;
513 bool gds_needed; /* for GFX10 streamout and NGG GS queries */
514 bool gds_oa_needed; /* for GFX10 streamout */
515 bool sample_positions_needed;
516 bool has_indirect_pipeline_binds;
517
518 uint64_t gfx9_fence_va;
519 uint32_t gfx9_fence_idx;
520 uint64_t gfx9_eop_bug_va;
521
522 struct set vs_prologs;
523 struct set ps_epilogs;
524
525 /**
526 * Gang state.
527 * Used when the command buffer needs work done on a different queue
528 * (eg. when a graphics command buffer needs compute work).
529 * Currently only one follower is possible per command buffer.
530 */
531 struct {
532 /** Follower command stream. */
533 struct radeon_cmdbuf *cs;
534
535 /** Flush bits for the follower cmdbuf. */
536 enum radv_cmd_flush_bits flush_bits;
537
538 /**
539 * For synchronization between the follower and leader.
540 * The value of these semaphores are incremented whenever we
541 * encounter a barrier that affects the follower.
542 *
543 * DWORD 0: Leader to follower semaphore.
544 * The leader writes the value and the follower waits.
545 * DWORD 1: Follower to leader semaphore.
546 * The follower writes the value, and the leader waits.
547 */
548 struct {
549 uint64_t va; /* Virtual address of the semaphore. */
550 uint32_t leader_value; /* Current value of the leader. */
551 uint32_t emitted_leader_value; /* Last value emitted by the leader. */
552 uint32_t follower_value; /* Current value of the follower. */
553 uint32_t emitted_follower_value; /* Last value emitted by the follower. */
554 } sem;
555 } gang;
556
557 /**
558 * Whether a query pool has been reset and we have to flush caches.
559 */
560 bool pending_reset_query;
561
562 /**
563 * Bitmask of pending active query flushes.
564 */
565 enum radv_cmd_flush_bits active_query_flush_bits;
566
567 struct {
568 struct radv_video_session *vid;
569 struct radv_video_session_params *params;
570 struct rvcn_sq_var sq;
571 struct rvcn_decode_buffer_s *decode_buffer;
572 struct radv_enc_state enc;
573 uint64_t feedback_query_va;
574 } video;
575
576 struct {
577 /* Temporary space for some transfer queue copy command workarounds. */
578 struct radeon_winsys_bo *copy_temp;
579 } transfer;
580
581 uint64_t shader_upload_seq;
582
583 uint32_t sqtt_cb_id;
584
585 struct set *accel_struct_buffers;
586 struct util_dynarray ray_history;
587 };
588
589 VK_DEFINE_HANDLE_CASTS(radv_cmd_buffer, vk.base, VkCommandBuffer, VK_OBJECT_TYPE_COMMAND_BUFFER)
590
591 static inline struct radv_device *
radv_cmd_buffer_device(const struct radv_cmd_buffer * cmd_buffer)592 radv_cmd_buffer_device(const struct radv_cmd_buffer *cmd_buffer)
593 {
594 return (struct radv_device *)cmd_buffer->vk.base.device;
595 }
596
597 ALWAYS_INLINE static bool
radv_is_streamout_enabled(struct radv_cmd_buffer * cmd_buffer)598 radv_is_streamout_enabled(struct radv_cmd_buffer *cmd_buffer)
599 {
600 struct radv_streamout_state *so = &cmd_buffer->state.streamout;
601
602 /* Streamout must be enabled for the PRIMITIVES_GENERATED query to work. */
603 return (so->streamout_enabled || cmd_buffer->state.active_prims_gen_queries) && !cmd_buffer->state.suspend_streamout;
604 }
605
606 static inline unsigned
vk_to_bind_point(VkPipelineBindPoint bind_point)607 vk_to_bind_point(VkPipelineBindPoint bind_point)
608 {
609 return bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR ? 2 : bind_point;
610 }
611
612 static inline struct radv_descriptor_state *
radv_get_descriptors_state(struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)613 radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
614 {
615 return &cmd_buffer->descriptors[vk_to_bind_point(bind_point)];
616 }
617
618 static inline const struct radv_push_constant_state *
radv_get_push_constants_state(const struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)619 radv_get_push_constants_state(const struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
620 {
621 return &cmd_buffer->push_constant_state[vk_to_bind_point(bind_point)];
622 }
623
624 static inline bool
radv_cmdbuf_has_stage(const struct radv_cmd_buffer * cmd_buffer,gl_shader_stage stage)625 radv_cmdbuf_has_stage(const struct radv_cmd_buffer *cmd_buffer, gl_shader_stage stage)
626 {
627 return !!(cmd_buffer->state.active_stages & mesa_to_vk_shader_stage(stage));
628 }
629
630 static inline uint32_t
radv_get_num_pipeline_stat_queries(struct radv_cmd_buffer * cmd_buffer)631 radv_get_num_pipeline_stat_queries(struct radv_cmd_buffer *cmd_buffer)
632 {
633 /* SAMPLE_STREAMOUTSTATS also requires PIPELINESTAT_START to be enabled. */
634 return cmd_buffer->state.active_pipeline_queries + cmd_buffer->state.active_prims_gen_queries +
635 cmd_buffer->state.active_prims_xfb_queries;
636 }
637
638 static inline void
radv_emit_shader_pointer_head(struct radeon_cmdbuf * cs,unsigned sh_offset,unsigned pointer_count,bool use_32bit_pointers)639 radv_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset, unsigned pointer_count,
640 bool use_32bit_pointers)
641 {
642 radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (use_32bit_pointers ? 1 : 2), 0));
643 radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
644 }
645
646 static inline void
radv_emit_shader_pointer_body(const struct radv_device * device,struct radeon_cmdbuf * cs,uint64_t va,bool use_32bit_pointers)647 radv_emit_shader_pointer_body(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
648 bool use_32bit_pointers)
649 {
650 const struct radv_physical_device *pdev = radv_device_physical(device);
651
652 radeon_emit(cs, va);
653
654 if (use_32bit_pointers) {
655 assert(va == 0 || (va >> 32) == pdev->info.address32_hi);
656 } else {
657 radeon_emit(cs, va >> 32);
658 }
659 }
660
661 static inline void
radv_emit_shader_pointer(const struct radv_device * device,struct radeon_cmdbuf * cs,uint32_t sh_offset,uint64_t va,bool global)662 radv_emit_shader_pointer(const struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset, uint64_t va,
663 bool global)
664 {
665 bool use_32bit_pointers = !global;
666
667 radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
668 radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
669 }
670
671 bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
672
673 void radv_cmd_buffer_reset_rendering(struct radv_cmd_buffer *cmd_buffer);
674
675 bool radv_cmd_buffer_upload_alloc_aligned(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned alignment,
676 unsigned *out_offset, void **ptr);
677
678 bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr);
679
680 bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data,
681 unsigned *out_offset);
682
683 void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
684
685 void radv_cmd_buffer_annotate(struct radv_cmd_buffer *cmd_buffer, const char *annotation);
686
687 void radv_gang_cache_flush(struct radv_cmd_buffer *cmd_buffer);
688
689 bool radv_gang_init(struct radv_cmd_buffer *cmd_buffer);
690
691 void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
692 struct radv_descriptor_set *set, unsigned idx);
693
694 void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
695 VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects);
696
697 void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
698 const VkImageSubresourceRange *range, bool value);
699
700 void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
701 const VkImageSubresourceRange *range, bool value);
702
703 void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
704 int cb_idx, uint32_t color_values[2]);
705
706 unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs);
707
708 enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stages,
709 VkAccessFlags2 src_flags, const struct radv_image *image);
710
711 enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 dst_stages,
712 VkAccessFlags2 dst_flags, const struct radv_image *image);
713
714 struct radv_resolve_barrier {
715 VkPipelineStageFlags2 src_stage_mask;
716 VkPipelineStageFlags2 dst_stage_mask;
717 VkAccessFlags2 src_access_mask;
718 VkAccessFlags2 dst_access_mask;
719 };
720
721 void radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_resolve_barrier *barrier);
722
723 void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint,
724 VkPipelineLayout _layout, uint32_t set, uint32_t descriptorWriteCount,
725 const VkWriteDescriptorSet *pDescriptorWrites);
726
727 struct radv_dispatch_info {
728 /**
729 * Determine the layout of the grid (in block units) to be used.
730 */
731 uint32_t blocks[3];
732
733 /**
734 * A starting offset for the grid. If unaligned is set, the offset
735 * must still be aligned.
736 */
737 uint32_t offsets[3];
738
739 /**
740 * Whether it's an unaligned compute dispatch.
741 */
742 bool unaligned;
743
744 /**
745 * Whether waves must be launched in order.
746 */
747 bool ordered;
748
749 /**
750 * Indirect compute parameters resource.
751 */
752 struct radeon_winsys_bo *indirect;
753 uint64_t va;
754 };
755
756 void radv_compute_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info);
757
758 /*
759 * Takes x,y,z as exact numbers of invocations, instead of blocks.
760 *
761 * Limitations: Can't call normal dispatch functions without binding or rebinding
762 * the compute pipeline.
763 */
764 void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z);
765
766 void radv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t va);
767
768 uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
769 const VkImageSubresourceRange *range);
770
771 uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
772 const VkImageSubresourceRange *range, uint32_t value);
773
774 void radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
775
776 void radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, unsigned pred_op,
777 uint64_t va);
778
779 void radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va, bool draw_visible);
780
781 void radv_end_conditional_rendering(struct radv_cmd_buffer *cmd_buffer);
782
783 uint64_t radv_descriptor_get_va(const struct radv_descriptor_state *descriptors_state, unsigned set_idx);
784
785 struct radv_vbo_info {
786 uint64_t va;
787
788 uint32_t binding;
789 uint32_t stride;
790 uint32_t size;
791
792 uint32_t attrib_offset;
793 uint32_t attrib_index_offset;
794 uint32_t attrib_format_size;
795
796 uint32_t non_trivial_format;
797 };
798
799 void radv_get_vbo_info(const struct radv_cmd_buffer *cmd_buffer, uint32_t vbo_idx, struct radv_vbo_info *vbo_info);
800
801 void radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs,
802 const struct radv_shader *shader);
803
804 void radv_upload_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,
805 struct radv_descriptor_state *descriptors_state);
806
807 #endif /* RADV_CMD_BUFFER_H */
808