1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef ANV_PRIVATE_H
25 #define ANV_PRIVATE_H
26
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdbool.h>
30 #include <pthread.h>
31 #include <assert.h>
32 #include <stdint.h>
33 #include "drm-uapi/drm_fourcc.h"
34
35 #ifdef HAVE_VALGRIND
36 #include <valgrind.h>
37 #include <memcheck.h>
38 #define VG(x) x
39 #else
40 #define VG(x) ((void)0)
41 #endif
42
43 #include "common/intel_aux_map.h"
44 #include "common/intel_bind_timeline.h"
45 #include "common/intel_engine.h"
46 #include "common/intel_gem.h"
47 #include "common/intel_l3_config.h"
48 #include "common/intel_measure.h"
49 #include "common/intel_mem.h"
50 #include "common/intel_sample_positions.h"
51 #include "decoder/intel_decoder.h"
52 #include "dev/intel_device_info.h"
53 #include "blorp/blorp.h"
54 #include "compiler/brw_compiler.h"
55 #include "compiler/brw_kernel.h"
56 #include "compiler/brw_rt.h"
57 #include "ds/intel_driver_ds.h"
58 #include "util/bitset.h"
59 #include "util/bitscan.h"
60 #include "util/detect_os.h"
61 #include "util/macros.h"
62 #include "util/hash_table.h"
63 #include "util/list.h"
64 #include "util/perf/u_trace.h"
65 #include "util/set.h"
66 #include "util/sparse_array.h"
67 #include "util/u_atomic.h"
68 #if DETECT_OS_ANDROID
69 #include "util/u_gralloc/u_gralloc.h"
70 #endif
71 #include "util/u_vector.h"
72 #include "util/u_math.h"
73 #include "util/vma.h"
74 #include "util/xmlconfig.h"
75 #include "vk_acceleration_structure.h"
76 #include "vk_alloc.h"
77 #include "vk_buffer.h"
78 #include "vk_buffer_view.h"
79 #include "vk_command_buffer.h"
80 #include "vk_command_pool.h"
81 #include "vk_debug_report.h"
82 #include "vk_descriptor_update_template.h"
83 #include "vk_device.h"
84 #include "vk_device_memory.h"
85 #include "vk_drm_syncobj.h"
86 #include "vk_enum_defines.h"
87 #include "vk_format.h"
88 #include "vk_framebuffer.h"
89 #include "vk_graphics_state.h"
90 #include "vk_image.h"
91 #include "vk_instance.h"
92 #include "vk_pipeline_cache.h"
93 #include "vk_physical_device.h"
94 #include "vk_sampler.h"
95 #include "vk_shader_module.h"
96 #include "vk_sync.h"
97 #include "vk_sync_timeline.h"
98 #include "vk_texcompress_astc.h"
99 #include "vk_util.h"
100 #include "vk_query_pool.h"
101 #include "vk_queue.h"
102 #include "vk_log.h"
103 #include "vk_ycbcr_conversion.h"
104 #include "vk_video.h"
105
106 #ifdef __cplusplus
107 extern "C" {
108 #endif
109
110 /* Pre-declarations needed for WSI entrypoints */
111 struct wl_surface;
112 struct wl_display;
113 typedef struct xcb_connection_t xcb_connection_t;
114 typedef uint32_t xcb_visualid_t;
115 typedef uint32_t xcb_window_t;
116
117 struct anv_batch;
118 struct anv_buffer;
119 struct anv_buffer_view;
120 struct anv_image_view;
121 struct anv_instance;
122
123 struct intel_aux_map_context;
124 struct intel_perf_config;
125 struct intel_perf_counter_pass;
126 struct intel_perf_query_result;
127
128 #include <vulkan/vulkan.h>
129 #include <vulkan/vk_icd.h>
130
131 #include "anv_android.h"
132 #include "anv_entrypoints.h"
133 #include "anv_kmd_backend.h"
134 #include "anv_rmv.h"
135 #include "isl/isl.h"
136
137 #include "dev/intel_debug.h"
138 #undef MESA_LOG_TAG
139 #define MESA_LOG_TAG "MESA-INTEL"
140 #include "util/log.h"
141 #include "wsi_common.h"
142
143 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
144 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
145 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
146 #endif
147
148 #define NSEC_PER_SEC 1000000000ull
149
150 #define BINDING_TABLE_POOL_BLOCK_SIZE (65536)
151
152 /* 3DSTATE_VERTEX_BUFFER supports 33 VBs, we use 2 for base & drawid SGVs */
153 #define MAX_VBS (33 - 2)
154
155 /* 3DSTATE_VERTEX_ELEMENTS supports up to 34 VEs, but our backend compiler
156 * only supports the push model of VS inputs, and we only have 128 GRFs,
157 * minus the g0 and g1 payload, which gives us a maximum of 31 VEs. Plus,
158 * we use two of them for SGVs.
159 */
160 #define MAX_VES (31 - 2)
161
162 #define MAX_XFB_BUFFERS 4
163 #define MAX_XFB_STREAMS 4
164 #define MAX_SETS 8
165 #define MAX_RTS 8
166 #define MAX_VIEWPORTS 16
167 #define MAX_SCISSORS 16
168 #define MAX_PUSH_CONSTANTS_SIZE 128
169 #define MAX_DYNAMIC_BUFFERS 16
170 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
171 #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
172 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
173 #define MAX_EMBEDDED_SAMPLERS 2048
174 #define MAX_CUSTOM_BORDER_COLORS 4096
175 /* We need 16 for UBO block reads to work and 32 for push UBOs. However, we
176 * use 64 here to avoid cache issues. This could most likely bring it back to
177 * 32 if we had different virtual addresses for the different views on a given
178 * GEM object.
179 */
180 #define ANV_UBO_ALIGNMENT 64
181 #define ANV_SSBO_ALIGNMENT 4
182 #define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4
183 #define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16
184 #define MAX_SAMPLE_LOCATIONS 16
185
186 /* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64
187 * and we can't put anything else there we use 64b.
188 */
189 #define ANV_SURFACE_STATE_SIZE (64)
190
191 /* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
192 *
193 * "The surface state model is used when a Binding Table Index (specified
194 * in the message descriptor) of less than 240 is specified. In this model,
195 * the Binding Table Index is used to index into the binding table, and the
196 * binding table entry contains a pointer to the SURFACE_STATE."
197 *
198 * Binding table values above 240 are used for various things in the hardware
199 * such as stateless, stateless with incoherent cache, SLM, and bindless.
200 */
201 #define MAX_BINDING_TABLE_SIZE 240
202
203 #define ANV_SVGS_VB_INDEX MAX_VBS
204 #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
205
206 /* We reserve this MI ALU register for the purpose of handling predication.
207 * Other code which uses the MI ALU should leave it alone.
208 */
209 #define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
210
211 /* We reserve this MI ALU register to pass around an offset computed from
212 * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.
213 * Other code which uses the MI ALU should leave it alone.
214 */
215 #define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
216
217 /* We reserve this MI ALU register to hold the last programmed bindless
218 * surface state base address so that we can predicate STATE_BASE_ADDRESS
219 * emissions if the address doesn't change.
220 */
221 #define ANV_BINDLESS_SURFACE_BASE_ADDR_REG 0x2668 /* MI_ALU_REG13 */
222
223 #define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1)
224
225 /* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64
226 * and we can't put anything else there we use 64b.
227 */
228 #define ANV_SURFACE_STATE_SIZE (64)
229 #define ANV_SAMPLER_STATE_SIZE (32)
230
231 /* For gfx12 we set the streamout buffers using 4 separate commands
232 * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
233 * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
234 * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the
235 * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode.
236 * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for
237 * 3DSTATE_SO_BUFFER_INDEX_0.
238 */
239 #define SO_BUFFER_INDEX_0_CMD 0x60
240 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
241
242 /* The TR-TT L1 page table entries may contain these values instead of actual
243 * pointers to indicate the regions are either NULL or invalid. We program
244 * these values to TR-TT registers, so we could change them, but it's super
245 * convenient to have the NULL value be 0 because everything is
246 * zero-initialized when allocated.
247 *
248 * Since we reserve these values for NULL/INVALID, then we can't use them as
249 * destinations for TR-TT address translation. Both values are shifted by 16
250 * bits, wich results in graphic addresses 0 and 64k. On Anv the first vma
251 * starts at 2MB, so we already don't use 0 and 64k for anything, so there's
252 * nothing really to reserve. We could instead just reserve random 64kb
253 * ranges from any of the non-TR-TT vmas and use their addresses.
254 */
255 #define ANV_TRTT_L1_NULL_TILE_VAL 0
256 #define ANV_TRTT_L1_INVALID_TILE_VAL 1
257
258 static inline uint32_t
align_down_npot_u32(uint32_t v,uint32_t a)259 align_down_npot_u32(uint32_t v, uint32_t a)
260 {
261 return v - (v % a);
262 }
263
264 /** Alignment must be a power of 2. */
265 static inline bool
anv_is_aligned(uintmax_t n,uintmax_t a)266 anv_is_aligned(uintmax_t n, uintmax_t a)
267 {
268 assert(a == (a & -a));
269 return (n & (a - 1)) == 0;
270 }
271
272 static inline union isl_color_value
vk_to_isl_color(VkClearColorValue color)273 vk_to_isl_color(VkClearColorValue color)
274 {
275 return (union isl_color_value) {
276 .u32 = {
277 color.uint32[0],
278 color.uint32[1],
279 color.uint32[2],
280 color.uint32[3],
281 },
282 };
283 }
284
285 static inline union isl_color_value
vk_to_isl_color_with_format(VkClearColorValue color,enum isl_format format)286 vk_to_isl_color_with_format(VkClearColorValue color, enum isl_format format)
287 {
288 const struct isl_format_layout *fmtl = isl_format_get_layout(format);
289 union isl_color_value isl_color = { .u32 = {0, } };
290
291 #define COPY_COLOR_CHANNEL(c, i) \
292 if (fmtl->channels.c.bits) \
293 isl_color.u32[i] = color.uint32[i]
294
295 COPY_COLOR_CHANNEL(r, 0);
296 COPY_COLOR_CHANNEL(g, 1);
297 COPY_COLOR_CHANNEL(b, 2);
298 COPY_COLOR_CHANNEL(a, 3);
299
300 #undef COPY_COLOR_CHANNEL
301
302 return isl_color;
303 }
304
305 void __anv_perf_warn(struct anv_device *device,
306 const struct vk_object_base *object,
307 const char *file, int line, const char *format, ...)
308 anv_printflike(5, 6);
309
310 /**
311 * Print a FINISHME message, including its source location.
312 */
313 #define anv_finishme(format, ...) \
314 do { \
315 static bool reported = false; \
316 if (!reported) { \
317 mesa_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \
318 ##__VA_ARGS__); \
319 reported = true; \
320 } \
321 } while (0)
322
323 /**
324 * Print a perf warning message. Set INTEL_DEBUG=perf to see these.
325 */
326 #define anv_perf_warn(objects_macro, format, ...) \
327 do { \
328 static bool reported = false; \
329 if (!reported && INTEL_DEBUG(DEBUG_PERF)) { \
330 __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT, \
331 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, \
332 objects_macro, __FILE__, __LINE__, \
333 format, ## __VA_ARGS__); \
334 reported = true; \
335 } \
336 } while (0)
337
338 /* A non-fatal assert. Useful for debugging. */
339 #if MESA_DEBUG
340 #define anv_assert(x) ({ \
341 if (unlikely(!(x))) \
342 mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
343 })
344 #else
345 #define anv_assert(x)
346 #endif
347
348 enum anv_bo_alloc_flags {
349 /** Specifies that the BO must have a 32-bit address
350 *
351 * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
352 */
353 ANV_BO_ALLOC_32BIT_ADDRESS = (1 << 0),
354
355 /** Specifies that the BO may be shared externally */
356 ANV_BO_ALLOC_EXTERNAL = (1 << 1),
357
358 /** Specifies that the BO should be mapped */
359 ANV_BO_ALLOC_MAPPED = (1 << 2),
360
361 /** Specifies that the BO should be coherent.
362 *
363 * Note: In platforms with LLC where HOST_CACHED + HOST_COHERENT is free,
364 * bo can get upgraded to HOST_CACHED_COHERENT
365 */
366 ANV_BO_ALLOC_HOST_COHERENT = (1 << 3),
367
368 /** Specifies that the BO should be captured in error states */
369 ANV_BO_ALLOC_CAPTURE = (1 << 4),
370
371 /** Specifies that the BO will have an address assigned by the caller
372 *
373 * Such BOs do not exist in any VMA heap.
374 */
375 ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
376
377 /** Enables implicit synchronization on the BO
378 *
379 * This is the opposite of EXEC_OBJECT_ASYNC.
380 */
381 ANV_BO_ALLOC_IMPLICIT_SYNC = (1 << 6),
382
383 /** Enables implicit synchronization on the BO
384 *
385 * This is equivalent to EXEC_OBJECT_WRITE.
386 */
387 ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
388
389 /** Has an address which is visible to the client */
390 ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
391
392 /** Align the BO's virtual address to match AUX-TT requirements */
393 ANV_BO_ALLOC_AUX_TT_ALIGNED = (1 << 9),
394
395 /** This buffer is allocated from local memory and should be cpu visible */
396 ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 10),
397
398 /** For non device local allocations */
399 ANV_BO_ALLOC_NO_LOCAL_MEM = (1 << 11),
400
401 /** This buffer will be scanout to display */
402 ANV_BO_ALLOC_SCANOUT = (1 << 12),
403
404 /** For descriptor pools */
405 ANV_BO_ALLOC_DESCRIPTOR_POOL = (1 << 13),
406
407 /** For buffers that will be bound using TR-TT.
408 *
409 * Not for buffers used as the TR-TT page tables.
410 */
411 ANV_BO_ALLOC_TRTT = (1 << 14),
412
413 /** Protected buffer */
414 ANV_BO_ALLOC_PROTECTED = (1 << 15),
415
416 /** Specifies that the BO should be cached and incoherent. */
417 ANV_BO_ALLOC_HOST_CACHED = (1 << 16),
418
419 /** For buffer addressable from the dynamic state heap */
420 ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL = (1 << 17),
421
422 /** Specifies that the BO is imported.
423 *
424 * Imported BOs must also be marked as ANV_BO_ALLOC_EXTERNAL
425 */
426 ANV_BO_ALLOC_IMPORTED = (1 << 18),
427
428 /** Specify whether this BO is internal to the driver */
429 ANV_BO_ALLOC_INTERNAL = (1 << 19),
430
431 /** Allocate with CCS AUX requirements
432 *
433 * This pads the BO include CCS data mapppable through the AUX-TT and
434 * aligned to the AUX-TT requirements.
435 */
436 ANV_BO_ALLOC_AUX_CCS = (1 << 20),
437
438 /** Compressed buffer, only supported in Xe2+ */
439 ANV_BO_ALLOC_COMPRESSED = (1 << 21),
440 };
441
442 /** Specifies that the BO should be cached and coherent. */
443 #define ANV_BO_ALLOC_HOST_CACHED_COHERENT (ANV_BO_ALLOC_HOST_COHERENT | \
444 ANV_BO_ALLOC_HOST_CACHED)
445
446
447 struct anv_bo {
448 const char *name;
449
450 /* The VMA heap in anv_device from which this BO takes its offset.
451 *
452 * This can only be NULL when has_fixed_address is true.
453 */
454 struct util_vma_heap *vma_heap;
455
456 /* All userptr bos in Xe KMD has gem_handle set to workaround_bo->gem_handle */
457 uint32_t gem_handle;
458
459 uint32_t refcount;
460
461 /* Index into the current validation list. This is used by the
462 * validation list building algorithm to track which buffers are already
463 * in the validation list so that we can ensure uniqueness.
464 */
465 uint32_t exec_obj_index;
466
467 /* Index for use with util_sparse_array_free_list */
468 uint32_t free_index;
469
470 /* Last known offset. This value is provided by the kernel when we
471 * execbuf and is used as the presumed offset for the next bunch of
472 * relocations, in canonical address format.
473 */
474 uint64_t offset;
475
476 /** Size of the buffer */
477 uint64_t size;
478
479 /** Offset at which the CCS data is stored */
480 uint64_t ccs_offset;
481
482 /* Map for internally mapped BOs.
483 *
484 * If ANV_BO_ALLOC_MAPPED is set in flags, this is the map for the whole
485 * BO.
486 */
487 void *map;
488
489 /* The actual size of bo allocated by kmd, basically:
490 * align(size, mem_alignment)
491 */
492 uint64_t actual_size;
493
494 /** Flags to pass to the kernel through drm_i915_exec_object2::flags */
495 uint32_t flags;
496
497 enum anv_bo_alloc_flags alloc_flags;
498
499 /** True if this BO wraps a host pointer */
500 bool from_host_ptr:1;
501
502 /** True if this BO is mapped in the GTT (only used for RMV) */
503 bool gtt_mapped:1;
504 };
505
506 static inline bool
anv_bo_is_external(const struct anv_bo * bo)507 anv_bo_is_external(const struct anv_bo *bo)
508 {
509 return bo->alloc_flags & ANV_BO_ALLOC_EXTERNAL;
510 }
511
512 static inline bool
anv_bo_is_vram_only(const struct anv_bo * bo)513 anv_bo_is_vram_only(const struct anv_bo *bo)
514 {
515 return !(bo->alloc_flags & (ANV_BO_ALLOC_NO_LOCAL_MEM |
516 ANV_BO_ALLOC_MAPPED |
517 ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE |
518 ANV_BO_ALLOC_IMPORTED));
519 }
520
521 static inline struct anv_bo *
anv_bo_ref(struct anv_bo * bo)522 anv_bo_ref(struct anv_bo *bo)
523 {
524 p_atomic_inc(&bo->refcount);
525 return bo;
526 }
527
528 enum intel_device_info_mmap_mode
529 anv_bo_get_mmap_mode(struct anv_device *device, struct anv_bo *bo);
530
531 static inline bool
anv_bo_needs_host_cache_flush(enum anv_bo_alloc_flags alloc_flags)532 anv_bo_needs_host_cache_flush(enum anv_bo_alloc_flags alloc_flags)
533 {
534 return (alloc_flags & (ANV_BO_ALLOC_HOST_CACHED | ANV_BO_ALLOC_HOST_COHERENT)) ==
535 ANV_BO_ALLOC_HOST_CACHED;
536 }
537
538 struct anv_address {
539 struct anv_bo *bo;
540 int64_t offset;
541 };
542
543 #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
544
545 static inline struct anv_address
anv_address_from_u64(uint64_t addr_u64)546 anv_address_from_u64(uint64_t addr_u64)
547 {
548 assert(addr_u64 == intel_canonical_address(addr_u64));
549 return (struct anv_address) {
550 .bo = NULL,
551 .offset = addr_u64,
552 };
553 }
554
555 static inline bool
anv_address_is_null(struct anv_address addr)556 anv_address_is_null(struct anv_address addr)
557 {
558 return addr.bo == NULL && addr.offset == 0;
559 }
560
561 static inline uint64_t
anv_address_physical(struct anv_address addr)562 anv_address_physical(struct anv_address addr)
563 {
564 uint64_t address = (addr.bo ? addr.bo->offset : 0ull) + addr.offset;
565 return intel_canonical_address(address);
566 }
567
568 static inline struct u_trace_address
anv_address_utrace(struct anv_address addr)569 anv_address_utrace(struct anv_address addr)
570 {
571 return (struct u_trace_address) {
572 .bo = addr.bo,
573 .offset = addr.offset,
574 };
575 }
576
577 static inline struct anv_address
anv_address_add(struct anv_address addr,uint64_t offset)578 anv_address_add(struct anv_address addr, uint64_t offset)
579 {
580 addr.offset += offset;
581 return addr;
582 }
583
584 static inline struct anv_address
anv_address_add_aligned(struct anv_address addr,uint64_t offset,uint32_t alignment)585 anv_address_add_aligned(struct anv_address addr, uint64_t offset, uint32_t alignment)
586 {
587 addr.offset = align(addr.offset + offset, alignment);
588 return addr;
589 }
590
591 static inline void *
anv_address_map(struct anv_address addr)592 anv_address_map(struct anv_address addr)
593 {
594 if (addr.bo == NULL)
595 return NULL;
596
597 if (addr.bo->map == NULL)
598 return NULL;
599
600 return addr.bo->map + addr.offset;
601 }
602
603 /* Represent a virtual address range */
604 struct anv_va_range {
605 uint64_t addr;
606 uint64_t size;
607 };
608
609 /* Represents a lock-free linked list of "free" things. This is used by
610 * both the block pool and the state pools. Unfortunately, in order to
611 * solve the ABA problem, we can't use a single uint32_t head.
612 */
613 union anv_free_list {
614 struct {
615 uint32_t offset;
616
617 /* A simple count that is incremented every time the head changes. */
618 uint32_t count;
619 };
620 /* Make sure it's aligned to 64 bits. This will make atomic operations
621 * faster on 32 bit platforms.
622 */
623 alignas(8) uint64_t u64;
624 };
625
626 #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
627
628 struct anv_block_state {
629 union {
630 struct {
631 uint32_t next;
632 uint32_t end;
633 };
634 /* Make sure it's aligned to 64 bits. This will make atomic operations
635 * faster on 32 bit platforms.
636 */
637 alignas(8) uint64_t u64;
638 };
639 };
640
641 #define anv_block_pool_foreach_bo(bo, pool) \
642 for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \
643 _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \
644 _pp_bo++)
645
646 #define ANV_MAX_BLOCK_POOL_BOS 20
647
648 struct anv_block_pool {
649 const char *name;
650
651 struct anv_device *device;
652
653 struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS];
654 struct anv_bo *bo;
655 uint32_t nbos;
656
657 /* Maximum size of the pool */
658 uint64_t max_size;
659
660 /* Current size of the pool */
661 uint64_t size;
662
663 /* The canonical address where the start of the pool is pinned. The various bos that
664 * are created as the pool grows will have addresses in the range
665 * [start_address, start_address + BLOCK_POOL_MEMFD_SIZE).
666 */
667 uint64_t start_address;
668
669 /* The offset from the start of the bo to the "center" of the block
670 * pool. Pointers to allocated blocks are given by
671 * bo.map + center_bo_offset + offsets.
672 */
673 uint32_t center_bo_offset;
674
675 struct anv_block_state state;
676
677 enum anv_bo_alloc_flags bo_alloc_flags;
678 };
679
680 /* Block pools are backed by a fixed-size 1GB memfd */
681 #define BLOCK_POOL_MEMFD_SIZE (1ul << 30)
682
683 /* The center of the block pool is also the middle of the memfd. This may
684 * change in the future if we decide differently for some reason.
685 */
686 #define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2)
687
688 static inline uint32_t
anv_block_pool_size(struct anv_block_pool * pool)689 anv_block_pool_size(struct anv_block_pool *pool)
690 {
691 return pool->state.end;
692 }
693
694 struct anv_state {
695 int64_t offset;
696 uint32_t alloc_size;
697 uint32_t idx;
698 void *map;
699 };
700
701 #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
702
703 struct anv_fixed_size_state_pool {
704 union anv_free_list free_list;
705 struct anv_block_state block;
706 };
707
708 #define ANV_MIN_STATE_SIZE_LOG2 6
709 #define ANV_MAX_STATE_SIZE_LOG2 24
710
711 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
712
713 struct anv_free_entry {
714 uint32_t next;
715 struct anv_state state;
716 };
717
718 struct anv_state_table {
719 struct anv_device *device;
720 int fd;
721 struct anv_free_entry *map;
722 uint32_t size;
723 uint64_t max_size;
724 struct anv_block_state state;
725 struct u_vector cleanups;
726 };
727
728 struct anv_state_pool {
729 struct anv_block_pool block_pool;
730
731 /* Offset into the relevant state base address where the state pool starts
732 * allocating memory.
733 */
734 int64_t start_offset;
735
736 struct anv_state_table table;
737
738 /* The size of blocks which will be allocated from the block pool */
739 uint32_t block_size;
740
741 struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
742 };
743
744 struct anv_state_reserved_pool {
745 struct anv_state_pool *pool;
746 union anv_free_list reserved_blocks;
747 uint32_t count;
748 };
749
750 struct anv_state_reserved_array_pool {
751 struct anv_state_pool *pool;
752 simple_mtx_t mutex;
753 /* Bitfield of usable elements */
754 BITSET_WORD *states;
755 /* Backing store */
756 struct anv_state state;
757 /* Number of elements */
758 uint32_t count;
759 /* Stride between each element */
760 uint32_t stride;
761 /* Size of each element */
762 uint32_t size;
763 };
764
765 struct anv_state_stream {
766 struct anv_state_pool *state_pool;
767
768 /* The size of blocks to allocate from the state pool */
769 uint32_t block_size;
770
771 /* Current block we're allocating from */
772 struct anv_state block;
773
774 /* Offset into the current block at which to allocate the next state */
775 uint32_t next;
776
777 /* Sum of all the blocks in all_blocks */
778 uint32_t total_size;
779
780 /* List of all blocks allocated from this pool */
781 struct util_dynarray all_blocks;
782 };
783
784 /* The block_pool functions exported for testing only. The block pool should
785 * only be used via a state pool (see below).
786 */
787 VkResult anv_block_pool_init(struct anv_block_pool *pool,
788 struct anv_device *device,
789 const char *name,
790 uint64_t start_address,
791 uint32_t initial_size,
792 uint32_t max_size);
793 void anv_block_pool_finish(struct anv_block_pool *pool);
794 VkResult anv_block_pool_alloc(struct anv_block_pool *pool,
795 uint32_t block_size,
796 int64_t *offset,
797 uint32_t *padding);
798 void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t
799 size);
800
801 struct anv_state_pool_params {
802 const char *name;
803 uint64_t base_address;
804 int64_t start_offset;
805 uint32_t block_size;
806 uint32_t max_size;
807 };
808
809 VkResult anv_state_pool_init(struct anv_state_pool *pool,
810 struct anv_device *device,
811 const struct anv_state_pool_params *params);
812 void anv_state_pool_finish(struct anv_state_pool *pool);
813 struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
814 uint32_t state_size, uint32_t alignment);
815 void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state);
816
817 static inline struct anv_address
anv_state_pool_state_address(struct anv_state_pool * pool,struct anv_state state)818 anv_state_pool_state_address(struct anv_state_pool *pool, struct anv_state state)
819 {
820 return (struct anv_address) {
821 .bo = pool->block_pool.bo,
822 .offset = state.offset - pool->start_offset,
823 };
824 }
825
826 static inline struct anv_state
anv_state_pool_emit_data(struct anv_state_pool * pool,size_t size,size_t align,const void * p)827 anv_state_pool_emit_data(struct anv_state_pool *pool,
828 size_t size, size_t align,
829 const void *p)
830 {
831 struct anv_state state;
832
833 state = anv_state_pool_alloc(pool, size, align);
834 memcpy(state.map, p, size);
835
836 return state;
837 }
838
839 void anv_state_stream_init(struct anv_state_stream *stream,
840 struct anv_state_pool *state_pool,
841 uint32_t block_size);
842 void anv_state_stream_finish(struct anv_state_stream *stream);
843 struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
844 uint32_t size, uint32_t alignment);
845
846 void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool,
847 struct anv_state_pool *parent,
848 uint32_t count, uint32_t size,
849 uint32_t alignment);
850 void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool);
851 struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool);
852 void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,
853 struct anv_state state);
854
855 VkResult anv_state_reserved_array_pool_init(struct anv_state_reserved_array_pool *pool,
856 struct anv_state_pool *parent,
857 uint32_t count, uint32_t size,
858 uint32_t alignment);
859 void anv_state_reserved_array_pool_finish(struct anv_state_reserved_array_pool *pool);
860 struct anv_state anv_state_reserved_array_pool_alloc(struct anv_state_reserved_array_pool *pool,
861 bool alloc_back);
862 struct anv_state anv_state_reserved_array_pool_alloc_index(struct anv_state_reserved_array_pool *pool,
863 unsigned idx);
864 uint32_t anv_state_reserved_array_pool_state_index(struct anv_state_reserved_array_pool *pool,
865 struct anv_state state);
866 void anv_state_reserved_array_pool_free(struct anv_state_reserved_array_pool *pool,
867 struct anv_state state);
868
869 VkResult anv_state_table_init(struct anv_state_table *table,
870 struct anv_device *device,
871 uint32_t initial_entries);
872 void anv_state_table_finish(struct anv_state_table *table);
873 VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
874 uint32_t count);
875 void anv_free_list_push(union anv_free_list *list,
876 struct anv_state_table *table,
877 uint32_t idx, uint32_t count);
878 struct anv_state* anv_free_list_pop(union anv_free_list *list,
879 struct anv_state_table *table);
880
881
882 static inline struct anv_state *
anv_state_table_get(struct anv_state_table * table,uint32_t idx)883 anv_state_table_get(struct anv_state_table *table, uint32_t idx)
884 {
885 return &table->map[idx].state;
886 }
887 /**
888 * Implements a pool of re-usable BOs. The interface is identical to that
889 * of block_pool except that each block is its own BO.
890 */
891 struct anv_bo_pool {
892 const char *name;
893
894 struct anv_device *device;
895
896 enum anv_bo_alloc_flags bo_alloc_flags;
897
898 struct util_sparse_array_free_list free_list[16];
899 };
900
901 void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
902 const char *name, enum anv_bo_alloc_flags alloc_flags);
903 void anv_bo_pool_finish(struct anv_bo_pool *pool);
904 VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
905 struct anv_bo **bo_out);
906 void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
907
908 struct anv_scratch_pool {
909 enum anv_bo_alloc_flags alloc_flags;
910 /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
911 struct anv_bo *bos[16][MESA_SHADER_STAGES];
912 uint32_t surfs[16];
913 struct anv_state surf_states[16];
914 };
915
916 void anv_scratch_pool_init(struct anv_device *device,
917 struct anv_scratch_pool *pool,
918 bool protected);
919 void anv_scratch_pool_finish(struct anv_device *device,
920 struct anv_scratch_pool *pool);
921 struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
922 struct anv_scratch_pool *pool,
923 gl_shader_stage stage,
924 unsigned per_thread_scratch);
925 uint32_t anv_scratch_pool_get_surf(struct anv_device *device,
926 struct anv_scratch_pool *pool,
927 unsigned per_thread_scratch);
928
929 /* Note that on Gfx12HP we pass a scratch space surface state offset
930 * shifted by 2 relative to the value specified on the BSpec, since
931 * that allows the compiler to save a shift instruction while
932 * constructing the extended descriptor for SS addressing. That
933 * worked because we limit the scratch surface state pool to 8 MB and
934 * because we relied on the legacy (ExBSO=0) encoding of the extended
935 * descriptor in order to save the shift, which is no longer supported
936 * for the UGM shared function on Xe2 platforms, so we no longer
937 * attempt to do that trick.
938 */
939 #define ANV_SCRATCH_SPACE_SHIFT(ver) ((ver) >= 20 ? 6 : 4)
940
941 /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
942 struct anv_bo_cache {
943 struct util_sparse_array bo_map;
944 pthread_mutex_t mutex;
945 };
946
947 VkResult anv_bo_cache_init(struct anv_bo_cache *cache,
948 struct anv_device *device);
949 void anv_bo_cache_finish(struct anv_bo_cache *cache);
950
951 struct anv_queue_family {
952 /* Standard bits passed on to the client */
953 VkQueueFlags queueFlags;
954 uint32_t queueCount;
955
956 enum intel_engine_class engine_class;
957 bool supports_perf;
958 };
959
960 #define ANV_MAX_QUEUE_FAMILIES 5
961
962 struct anv_memory_type {
963 /* Standard bits passed on to the client */
964 VkMemoryPropertyFlags propertyFlags;
965 uint32_t heapIndex;
966 /* Whether this is the dynamic visible memory type */
967 bool dynamic_visible;
968 bool compressed;
969 };
970
971 struct anv_memory_heap {
972 /* Standard bits passed on to the client */
973 VkDeviceSize size;
974 VkMemoryHeapFlags flags;
975
976 /** Driver-internal book-keeping.
977 *
978 * Align it to 64 bits to make atomic operations faster on 32 bit platforms.
979 */
980 alignas(8) VkDeviceSize used;
981
982 bool is_local_mem;
983 };
984
985 struct anv_memregion {
986 const struct intel_memory_class_instance *region;
987 uint64_t size;
988 uint64_t available;
989 };
990
991 enum anv_timestamp_capture_type {
992 ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE,
993 ANV_TIMESTAMP_CAPTURE_END_OF_PIPE,
994 ANV_TIMESTAMP_CAPTURE_AT_CS_STALL,
995 ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER,
996 ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH,
997 };
998
999 struct anv_physical_device {
1000 struct vk_physical_device vk;
1001
1002 /* Link in anv_instance::physical_devices */
1003 struct list_head link;
1004
1005 struct anv_instance * instance;
1006 char path[20];
1007 struct intel_device_info info;
1008
1009 bool video_decode_enabled;
1010 bool video_encode_enabled;
1011
1012 struct brw_compiler * compiler;
1013 struct isl_device isl_dev;
1014 struct intel_perf_config * perf;
1015 /*
1016 * Number of commands required to implement a performance query begin +
1017 * end.
1018 */
1019 uint32_t n_perf_query_commands;
1020 bool has_exec_async;
1021 bool has_exec_capture;
1022 VkQueueGlobalPriorityKHR max_context_priority;
1023 uint64_t gtt_size;
1024
1025 bool always_use_bindless;
1026 bool use_call_secondary;
1027
1028 /** True if we can use timeline semaphores through execbuf */
1029 bool has_exec_timeline;
1030
1031 /** True if we can read the GPU timestamp register
1032 *
1033 * When running in a virtual context, the timestamp register is unreadable
1034 * on Gfx12+.
1035 */
1036 bool has_reg_timestamp;
1037
1038 /** True if we can create protected contexts. */
1039 bool has_protected_contexts;
1040
1041 /** Whether KMD has the ability to create VM objects */
1042 bool has_vm_control;
1043
1044 /** True if we have the means to do sparse binding (e.g., a Kernel driver
1045 * a vm_bind ioctl).
1046 */
1047 enum anv_sparse_type {
1048 ANV_SPARSE_TYPE_NOT_SUPPORTED = 0,
1049 ANV_SPARSE_TYPE_VM_BIND,
1050 ANV_SPARSE_TYPE_TRTT,
1051 ANV_SPARSE_TYPE_FAKE,
1052 } sparse_type;
1053
1054 /** True if HW supports ASTC LDR */
1055 bool has_astc_ldr;
1056 /** True if denorms in void extents should be flushed to zero */
1057 bool flush_astc_ldr_void_extent_denorms;
1058 /** True if ASTC LDR is supported via emulation */
1059 bool emu_astc_ldr;
1060 /* true if FCV optimization should be disabled. */
1061 bool disable_fcv;
1062 /**/
1063 bool uses_ex_bso;
1064
1065 bool always_flush_cache;
1066
1067 /** True if application memory is allocated with extra AUX memory
1068 *
1069 * Applications quite often pool image allocations together in a single
1070 * VkDeviceMemory object. On platforms like MTL, the alignment of images
1071 * with compression mapped through the AUX translation tables is large :
1072 * 1MB. This can create a lot of wasted space in the application memory
1073 * objects.
1074 *
1075 * To workaround this problem, we allocate CCS data at the end of
1076 * VkDeviceMemory objects. This would not work well for TGL-like platforms
1077 * because the AUX translation tables also contain the format of the
1078 * images, but on MTL the HW ignore those values. So we can share the AUX
1079 * TT entries between different images without problem.
1080 *
1081 * This should be only true for platforms with AUX TT.
1082 */
1083 bool alloc_aux_tt_mem;
1084
1085 /**
1086 * True if the descriptors buffers are holding one of the following :
1087 * - anv_sampled_image_descriptor
1088 * - anv_storage_image_descriptor
1089 * - anv_address_range_descriptor
1090 *
1091 * Accessing the descriptors in a bindless fashion from the shader
1092 * requires an indirection in the shader, first fetch one of the structure
1093 * listed above from the descriptor buffer, then emit the send message to
1094 * the fixed function (sampler, dataport, etc...) with the handle fetched
1095 * above.
1096 *
1097 * We need to do things this way prior to DG2 because the bindless surface
1098 * state space is limited to 64Mb and some application will allocate more
1099 * than what HW can support. On DG2+ we get 4Gb of bindless surface state
1100 * and so we can reference directly RENDER_SURFACE_STATE/SAMPLER_STATE
1101 * structures instead.
1102 */
1103 bool indirect_descriptors;
1104
1105 bool uses_relocs;
1106
1107 /** Can the platform support cooperative matrices and is it enabled? */
1108 bool has_cooperative_matrix;
1109
1110 struct {
1111 uint32_t family_count;
1112 struct anv_queue_family families[ANV_MAX_QUEUE_FAMILIES];
1113 } queue;
1114
1115 struct {
1116 uint32_t type_count;
1117 struct anv_memory_type types[VK_MAX_MEMORY_TYPES];
1118 uint32_t heap_count;
1119 struct anv_memory_heap heaps[VK_MAX_MEMORY_HEAPS];
1120 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
1121 bool need_flush;
1122 #endif
1123 /** Mask of memory types of normal allocations */
1124 uint32_t default_buffer_mem_types;
1125 /** Mask of memory types of data indexable from the dynamic heap */
1126 uint32_t dynamic_visible_mem_types;
1127 /** Mask of memory types of protected buffers/images */
1128 uint32_t protected_mem_types;
1129 /** Mask of memory types of compressed buffers/images */
1130 uint32_t compressed_mem_types;
1131 } memory;
1132
1133 struct {
1134 /**
1135 * General state pool
1136 */
1137 struct anv_va_range general_state_pool;
1138 /**
1139 * Low 32bit heap
1140 */
1141 struct anv_va_range low_heap;
1142 /**
1143 * Binding table pool
1144 */
1145 struct anv_va_range binding_table_pool;
1146 /**
1147 * Internal surface states for blorp & push descriptors.
1148 */
1149 struct anv_va_range internal_surface_state_pool;
1150 /**
1151 * Scratch surfaces (overlaps with internal_surface_state_pool).
1152 */
1153 struct anv_va_range scratch_surface_state_pool;
1154 /**
1155 * Bindless surface states (indirectly referred to by indirect
1156 * descriptors or for direct descriptors)
1157 */
1158 struct anv_va_range bindless_surface_state_pool;
1159 /**
1160 * Dynamic state pool
1161 */
1162 struct anv_va_range dynamic_state_pool;
1163 /**
1164 * Buffer pool that can be index from the dynamic state heap
1165 */
1166 struct anv_va_range dynamic_visible_pool;
1167 /**
1168 * Indirect descriptor pool
1169 */
1170 struct anv_va_range indirect_descriptor_pool;
1171 /**
1172 * Indirect push descriptor pool
1173 */
1174 struct anv_va_range indirect_push_descriptor_pool;
1175 /**
1176 * Instruction state pool
1177 */
1178 struct anv_va_range instruction_state_pool;
1179 /**
1180 * Push descriptor with descriptor buffers
1181 */
1182 struct anv_va_range push_descriptor_buffer_pool;
1183 /**
1184 * AUX-TT
1185 */
1186 struct anv_va_range aux_tt_pool;
1187 /**
1188 * Client heap
1189 */
1190 struct anv_va_range high_heap;
1191 struct anv_va_range trtt;
1192 } va;
1193
1194 /* Either we have a single vram region and it's all mappable, or we have
1195 * both mappable & non-mappable parts. System memory is always available.
1196 */
1197 struct anv_memregion vram_mappable;
1198 struct anv_memregion vram_non_mappable;
1199 struct anv_memregion sys;
1200 uint8_t driver_build_sha1[20];
1201 uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
1202 uint8_t driver_uuid[VK_UUID_SIZE];
1203 uint8_t device_uuid[VK_UUID_SIZE];
1204 uint8_t rt_uuid[VK_UUID_SIZE];
1205
1206 /* Maximum amount of scratch space used by all the GRL kernels */
1207 uint32_t max_grl_scratch_size;
1208
1209 struct vk_sync_type sync_syncobj_type;
1210 struct vk_sync_timeline_type sync_timeline_type;
1211 const struct vk_sync_type * sync_types[4];
1212
1213 struct wsi_device wsi_device;
1214 int local_fd;
1215 bool has_local;
1216 int64_t local_major;
1217 int64_t local_minor;
1218 int master_fd;
1219 bool has_master;
1220 int64_t master_major;
1221 int64_t master_minor;
1222 struct intel_query_engine_info * engine_info;
1223
1224 void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address,
1225 enum anv_timestamp_capture_type, void *);
1226 void (*cmd_capture_data)(struct anv_batch *, struct anv_device *,
1227 struct anv_address, struct anv_address,
1228 uint32_t);
1229 struct intel_measure_device measure_device;
1230
1231 /* Value of PIPELINE_SELECT::PipelineSelection == GPGPU */
1232 uint32_t gpgpu_pipeline_value;
1233
1234 /** A pre packed VERTEX_ELEMENT_STATE feeding 0s to the VS stage
1235 *
1236 * For use when a pipeline has no VS input
1237 */
1238 uint32_t empty_vs_input[2];
1239 };
1240
1241 VkResult anv_physical_device_try_create(struct vk_instance *vk_instance,
1242 struct _drmDevice *drm_device,
1243 struct vk_physical_device **out);
1244
1245 void anv_physical_device_destroy(struct vk_physical_device *vk_device);
1246
1247 static inline uint32_t
anv_physical_device_bindless_heap_size(const struct anv_physical_device * device,bool descriptor_buffer)1248 anv_physical_device_bindless_heap_size(const struct anv_physical_device *device,
1249 bool descriptor_buffer)
1250 {
1251 /* Pre-Gfx12.5, the HW bindless surface heap is only 64MB. After it's 4GB,
1252 * but we have some workarounds that require 2 heaps to overlap, so the
1253 * size is dictated by our VA allocation.
1254 */
1255 return device->uses_ex_bso ?
1256 (descriptor_buffer ?
1257 device->va.dynamic_visible_pool.size :
1258 device->va.bindless_surface_state_pool.size) :
1259 64 * 1024 * 1024 /* 64 MiB */;
1260 }
1261
1262 static inline bool
anv_physical_device_has_vram(const struct anv_physical_device * device)1263 anv_physical_device_has_vram(const struct anv_physical_device *device)
1264 {
1265 return device->vram_mappable.size > 0;
1266 }
1267
1268 struct anv_instance {
1269 struct vk_instance vk;
1270
1271 struct driOptionCache dri_options;
1272 struct driOptionCache available_dri_options;
1273
1274 int mesh_conv_prim_attrs_to_vert_attrs;
1275 bool enable_tbimr;
1276 bool external_memory_implicit_sync;
1277 bool force_guc_low_latency;
1278
1279 /**
1280 * Workarounds for game bugs.
1281 */
1282 uint8_t assume_full_subgroups;
1283 bool assume_full_subgroups_with_barrier;
1284 bool limit_trig_input_range;
1285 bool sample_mask_out_opengl_behaviour;
1286 bool force_filter_addr_rounding;
1287 bool fp64_workaround_enabled;
1288 float lower_depth_range_rate;
1289 unsigned generated_indirect_threshold;
1290 unsigned generated_indirect_ring_threshold;
1291 unsigned query_clear_with_blorp_threshold;
1292 unsigned query_copy_with_shader_threshold;
1293 unsigned force_vk_vendor;
1294 bool has_fake_sparse;
1295 bool disable_fcv;
1296 bool disable_xe2_ccs;
1297 bool compression_control_enabled;
1298 bool anv_fake_nonlocal_memory;
1299
1300 /* HW workarounds */
1301 bool no_16bit;
1302 bool intel_enable_wa_14018912822;
1303
1304 /**
1305 * Ray tracing configuration.
1306 */
1307 unsigned stack_ids;
1308 };
1309
1310 VkResult anv_init_wsi(struct anv_physical_device *physical_device);
1311 void anv_finish_wsi(struct anv_physical_device *physical_device);
1312
1313 struct anv_queue {
1314 struct vk_queue vk;
1315
1316 struct anv_device * device;
1317
1318 const struct anv_queue_family * family;
1319
1320 struct intel_batch_decode_ctx * decoder;
1321
1322 union {
1323 uint32_t exec_flags; /* i915 */
1324 uint32_t context_id; /* i915 */
1325 uint32_t exec_queue_id; /* Xe */
1326 };
1327
1328 /** Context/Engine id which executes companion RCS command buffer */
1329 uint32_t companion_rcs_id;
1330
1331 /** Synchronization object for debug purposes (DEBUG_SYNC) */
1332 struct vk_sync *sync;
1333
1334 /** Companion synchronization object
1335 *
1336 * Vulkan command buffers can be destroyed as soon as their lifecycle moved
1337 * from the Pending state to the Invalid/Executable state. This transition
1338 * happens when the VkFence/VkSemaphore associated with the completion of
1339 * the command buffer work is signaled.
1340 *
1341 * When we're using a companion command buffer to execute part of another
1342 * command buffer, we need to tie the 2 work submissions together to ensure
1343 * when the associated VkFence/VkSemaphore is signaled, both command
1344 * buffers are actually unused by the HW. To do this, we run an empty batch
1345 * buffer that we use to signal after both submissions :
1346 *
1347 * CCS --> main ---> empty_batch (with wait on companion) --> signal
1348 * RCS --> companion -|
1349 *
1350 * When companion batch completes, it signals companion_sync and allow
1351 * empty_batch to execute. Since empty_batch is running on the main engine,
1352 * we're guaranteed that upon completion both main & companion command
1353 * buffers are not used by HW anymore.
1354 */
1355 struct vk_sync *companion_sync;
1356
1357 struct intel_ds_queue ds;
1358
1359 struct anv_async_submit *init_submit;
1360 struct anv_async_submit *init_companion_submit;
1361 };
1362
1363 struct nir_xfb_info;
1364 struct anv_pipeline_bind_map;
1365 struct anv_pipeline_sets_layout;
1366 struct anv_push_descriptor_info;
1367 enum anv_dynamic_push_bits;
1368
1369 void anv_device_init_embedded_samplers(struct anv_device *device);
1370 void anv_device_finish_embedded_samplers(struct anv_device *device);
1371
1372 extern const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2];
1373
1374 struct anv_shader_bin *
1375 anv_device_search_for_kernel(struct anv_device *device,
1376 struct vk_pipeline_cache *cache,
1377 const void *key_data, uint32_t key_size,
1378 bool *user_cache_bit);
1379
1380 struct anv_shader_upload_params;
1381
1382 struct anv_shader_bin *
1383 anv_device_upload_kernel(struct anv_device *device,
1384 struct vk_pipeline_cache *cache,
1385 const struct anv_shader_upload_params *params);
1386
1387 struct nir_shader;
1388 struct nir_shader_compiler_options;
1389
1390 struct nir_shader *
1391 anv_device_search_for_nir(struct anv_device *device,
1392 struct vk_pipeline_cache *cache,
1393 const struct nir_shader_compiler_options *nir_options,
1394 unsigned char sha1_key[20],
1395 void *mem_ctx);
1396
1397 void
1398 anv_device_upload_nir(struct anv_device *device,
1399 struct vk_pipeline_cache *cache,
1400 const struct nir_shader *nir,
1401 unsigned char sha1_key[20]);
1402
1403 void
1404 anv_load_fp64_shader(struct anv_device *device);
1405
1406 /**
1407 * This enum tracks the various HW instructions that hold graphics state
1408 * needing to be reprogrammed. Some instructions are grouped together as they
1409 * pretty much need to be emitted together (like 3DSTATE_URB_*).
1410 *
1411 * Not all bits apply to all platforms. We build a dirty state based on
1412 * enabled extensions & generation on anv_device.
1413 */
1414 enum anv_gfx_state_bits {
1415 /* Pipeline states */
1416 ANV_GFX_STATE_URB, /* All legacy stages, including mesh */
1417 ANV_GFX_STATE_VF_STATISTICS,
1418 ANV_GFX_STATE_VF_SGVS,
1419 ANV_GFX_STATE_VF_SGVS_2,
1420 ANV_GFX_STATE_VF_SGVS_VI, /* 3DSTATE_VERTEX_ELEMENTS for sgvs elements */
1421 ANV_GFX_STATE_VF_SGVS_INSTANCING, /* 3DSTATE_VF_INSTANCING for sgvs elements */
1422 ANV_GFX_STATE_PRIMITIVE_REPLICATION,
1423 ANV_GFX_STATE_SBE,
1424 ANV_GFX_STATE_SBE_SWIZ,
1425 ANV_GFX_STATE_SO_DECL_LIST,
1426 ANV_GFX_STATE_VS,
1427 ANV_GFX_STATE_HS,
1428 ANV_GFX_STATE_DS,
1429 ANV_GFX_STATE_GS,
1430 ANV_GFX_STATE_PS,
1431 ANV_GFX_STATE_SBE_MESH,
1432 ANV_GFX_STATE_CLIP_MESH,
1433 ANV_GFX_STATE_MESH_CONTROL,
1434 ANV_GFX_STATE_MESH_SHADER,
1435 ANV_GFX_STATE_MESH_DISTRIB,
1436 ANV_GFX_STATE_TASK_CONTROL,
1437 ANV_GFX_STATE_TASK_SHADER,
1438 ANV_GFX_STATE_TASK_REDISTRIB,
1439 /* Dynamic states */
1440 ANV_GFX_STATE_BLEND_STATE, /* Just the dynamic state structure */
1441 ANV_GFX_STATE_BLEND_STATE_PTR, /* The pointer to the dynamic state */
1442 ANV_GFX_STATE_CLIP,
1443 ANV_GFX_STATE_CC_STATE,
1444 ANV_GFX_STATE_CC_STATE_PTR,
1445 ANV_GFX_STATE_CPS,
1446 ANV_GFX_STATE_DEPTH_BOUNDS,
1447 ANV_GFX_STATE_INDEX_BUFFER,
1448 ANV_GFX_STATE_LINE_STIPPLE,
1449 ANV_GFX_STATE_MULTISAMPLE,
1450 ANV_GFX_STATE_PS_BLEND,
1451 ANV_GFX_STATE_RASTER,
1452 ANV_GFX_STATE_SAMPLE_MASK,
1453 ANV_GFX_STATE_SAMPLE_PATTERN,
1454 ANV_GFX_STATE_SCISSOR,
1455 ANV_GFX_STATE_SF,
1456 ANV_GFX_STATE_STREAMOUT,
1457 ANV_GFX_STATE_TE,
1458 ANV_GFX_STATE_VERTEX_INPUT,
1459 ANV_GFX_STATE_VF,
1460 ANV_GFX_STATE_VF_TOPOLOGY,
1461 ANV_GFX_STATE_VFG,
1462 ANV_GFX_STATE_VIEWPORT_CC,
1463 ANV_GFX_STATE_VIEWPORT_CC_PTR,
1464 ANV_GFX_STATE_VIEWPORT_SF_CLIP,
1465 ANV_GFX_STATE_WM,
1466 ANV_GFX_STATE_WM_DEPTH_STENCIL,
1467 ANV_GFX_STATE_PS_EXTRA,
1468 ANV_GFX_STATE_PMA_FIX, /* Fake state to implement workaround */
1469 ANV_GFX_STATE_WA_18019816803, /* Fake state to implement workaround */
1470 ANV_GFX_STATE_WA_14018283232, /* Fake state to implement workaround */
1471 ANV_GFX_STATE_TBIMR_TILE_PASS_INFO,
1472
1473 ANV_GFX_STATE_MAX,
1474 };
1475
1476 const char *anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state);
1477
1478 /* This structure tracks the values to program in HW instructions for
1479 * corresponding to dynamic states of the Vulkan API. Only fields that need to
1480 * be reemitted outside of the VkPipeline object are tracked here.
1481 */
1482 struct anv_gfx_dynamic_state {
1483 /* 3DSTATE_BLEND_STATE_POINTERS */
1484 struct {
1485 bool AlphaToCoverageEnable;
1486 bool AlphaToOneEnable;
1487 bool IndependentAlphaBlendEnable;
1488 bool ColorDitherEnable;
1489 struct {
1490 bool WriteDisableAlpha;
1491 bool WriteDisableRed;
1492 bool WriteDisableGreen;
1493 bool WriteDisableBlue;
1494
1495 uint32_t LogicOpFunction;
1496 bool LogicOpEnable;
1497
1498 bool ColorBufferBlendEnable;
1499 uint32_t ColorClampRange;
1500 bool PreBlendColorClampEnable;
1501 bool PostBlendColorClampEnable;
1502 uint32_t SourceBlendFactor;
1503 uint32_t DestinationBlendFactor;
1504 uint32_t ColorBlendFunction;
1505 uint32_t SourceAlphaBlendFactor;
1506 uint32_t DestinationAlphaBlendFactor;
1507 uint32_t AlphaBlendFunction;
1508 } rts[MAX_RTS];
1509
1510 struct anv_state state;
1511 } blend;
1512
1513 /* 3DSTATE_CC_STATE_POINTERS */
1514 struct {
1515 float BlendConstantColorRed;
1516 float BlendConstantColorGreen;
1517 float BlendConstantColorBlue;
1518 float BlendConstantColorAlpha;
1519
1520 struct anv_state state;
1521 } cc;
1522
1523 /* 3DSTATE_CLIP */
1524 struct {
1525 uint32_t APIMode;
1526 uint32_t ViewportXYClipTestEnable;
1527 uint32_t MaximumVPIndex;
1528 uint32_t TriangleStripListProvokingVertexSelect;
1529 uint32_t LineStripListProvokingVertexSelect;
1530 uint32_t TriangleFanProvokingVertexSelect;
1531 } clip;
1532
1533 /* 3DSTATE_CPS/3DSTATE_CPS_POINTERS */
1534 struct {
1535 /* Gfx11 */
1536 uint32_t CoarsePixelShadingMode;
1537 float MinCPSizeX;
1538 float MinCPSizeY;
1539 /* Gfx12+ */
1540 uint32_t CoarsePixelShadingStateArrayPointer;
1541 } cps;
1542
1543 /* 3DSTATE_DEPTH_BOUNDS */
1544 struct {
1545 bool DepthBoundsTestEnable;
1546 float DepthBoundsTestMinValue;
1547 float DepthBoundsTestMaxValue;
1548 } db;
1549
1550 /* 3DSTATE_GS */
1551 struct {
1552 uint32_t ReorderMode;
1553 } gs;
1554
1555 /* 3DSTATE_LINE_STIPPLE */
1556 struct {
1557 uint32_t LineStipplePattern;
1558 float LineStippleInverseRepeatCount;
1559 uint32_t LineStippleRepeatCount;
1560 } ls;
1561
1562 /* 3DSTATE_MULTISAMPLE */
1563 struct {
1564 uint32_t NumberofMultisamples;
1565 } ms;
1566
1567 /* 3DSTATE_PS */
1568 struct {
1569 uint32_t PositionXYOffsetSelect;
1570
1571 uint32_t KernelStartPointer0;
1572 uint32_t KernelStartPointer1;
1573 uint32_t KernelStartPointer2;
1574
1575 uint32_t DispatchGRFStartRegisterForConstantSetupData0;
1576 uint32_t DispatchGRFStartRegisterForConstantSetupData1;
1577 uint32_t DispatchGRFStartRegisterForConstantSetupData2;
1578
1579 /* Pre-Gfx20 only */
1580 bool _8PixelDispatchEnable;
1581 bool _16PixelDispatchEnable;
1582 bool _32PixelDispatchEnable;
1583
1584 /* Gfx20+ only */
1585 bool Kernel0Enable;
1586 bool Kernel1Enable;
1587 uint32_t Kernel0SIMDWidth;
1588 uint32_t Kernel1SIMDWidth;
1589 uint32_t Kernel0PolyPackingPolicy;
1590 } ps;
1591
1592 /* 3DSTATE_PS_EXTRA */
1593 struct {
1594 bool PixelShaderHasUAV;
1595 bool PixelShaderIsPerSample;
1596 bool PixelShaderKillsPixel;
1597 bool PixelShaderIsPerCoarsePixel;
1598 bool EnablePSDependencyOnCPsizeChange;
1599 } ps_extra;
1600
1601 /* 3DSTATE_PS_BLEND */
1602 struct {
1603 bool HasWriteableRT;
1604 bool ColorBufferBlendEnable;
1605 uint32_t SourceAlphaBlendFactor;
1606 uint32_t DestinationAlphaBlendFactor;
1607 uint32_t SourceBlendFactor;
1608 uint32_t DestinationBlendFactor;
1609 bool AlphaTestEnable;
1610 bool IndependentAlphaBlendEnable;
1611 bool AlphaToCoverageEnable;
1612 } ps_blend;
1613
1614 /* 3DSTATE_RASTER */
1615 struct {
1616 uint32_t APIMode;
1617 bool DXMultisampleRasterizationEnable;
1618 bool AntialiasingEnable;
1619 uint32_t CullMode;
1620 uint32_t FrontWinding;
1621 bool GlobalDepthOffsetEnableSolid;
1622 bool GlobalDepthOffsetEnableWireframe;
1623 bool GlobalDepthOffsetEnablePoint;
1624 float GlobalDepthOffsetConstant;
1625 float GlobalDepthOffsetScale;
1626 float GlobalDepthOffsetClamp;
1627 uint32_t FrontFaceFillMode;
1628 uint32_t BackFaceFillMode;
1629 bool ViewportZFarClipTestEnable;
1630 bool ViewportZNearClipTestEnable;
1631 bool ConservativeRasterizationEnable;
1632 } raster;
1633
1634 /* 3DSTATE_SCISSOR_STATE_POINTERS */
1635 struct {
1636 uint32_t count;
1637 struct {
1638 uint32_t ScissorRectangleYMin;
1639 uint32_t ScissorRectangleXMin;
1640 uint32_t ScissorRectangleYMax;
1641 uint32_t ScissorRectangleXMax;
1642 } elem[MAX_SCISSORS];
1643 } scissor;
1644
1645 /* 3DSTATE_SF */
1646 struct {
1647 float LineWidth;
1648 uint32_t TriangleStripListProvokingVertexSelect;
1649 uint32_t LineStripListProvokingVertexSelect;
1650 uint32_t TriangleFanProvokingVertexSelect;
1651 bool LegacyGlobalDepthBiasEnable;
1652 } sf;
1653
1654 /* 3DSTATE_STREAMOUT */
1655 struct {
1656 bool RenderingDisable;
1657 uint32_t RenderStreamSelect;
1658 uint32_t ReorderMode;
1659 uint32_t ForceRendering;
1660 } so;
1661
1662 /* 3DSTATE_SAMPLE_MASK */
1663 struct {
1664 uint32_t SampleMask;
1665 } sm;
1666
1667 /* 3DSTATE_TE */
1668 struct {
1669 uint32_t OutputTopology;
1670 } te;
1671
1672 /* 3DSTATE_VF */
1673 struct {
1674 bool IndexedDrawCutIndexEnable;
1675 uint32_t CutIndex;
1676 } vf;
1677
1678 /* 3DSTATE_VFG */
1679 struct {
1680 uint32_t DistributionMode;
1681 bool ListCutIndexEnable;
1682 } vfg;
1683
1684 /* 3DSTATE_VF_TOPOLOGY */
1685 struct {
1686 uint32_t PrimitiveTopologyType;
1687 } vft;
1688
1689 /* 3DSTATE_VIEWPORT_STATE_POINTERS_CC */
1690 struct {
1691 uint32_t count;
1692 struct {
1693 float MinimumDepth;
1694 float MaximumDepth;
1695 } elem[MAX_VIEWPORTS];
1696
1697 struct anv_state state;
1698 } vp_cc;
1699
1700 /* 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP */
1701 struct {
1702 uint32_t count;
1703 struct {
1704 float ViewportMatrixElementm00;
1705 float ViewportMatrixElementm11;
1706 float ViewportMatrixElementm22;
1707 float ViewportMatrixElementm30;
1708 float ViewportMatrixElementm31;
1709 float ViewportMatrixElementm32;
1710 float XMinClipGuardband;
1711 float XMaxClipGuardband;
1712 float YMinClipGuardband;
1713 float YMaxClipGuardband;
1714 float XMinViewPort;
1715 float XMaxViewPort;
1716 float YMinViewPort;
1717 float YMaxViewPort;
1718 } elem[MAX_VIEWPORTS];
1719 } vp_sf_clip;
1720
1721 /* 3DSTATE_WM */
1722 struct {
1723 bool LineStippleEnable;
1724 uint32_t BarycentricInterpolationMode;
1725 } wm;
1726
1727 /* 3DSTATE_WM_DEPTH_STENCIL */
1728 struct {
1729 bool DoubleSidedStencilEnable;
1730 uint32_t StencilTestMask;
1731 uint32_t StencilWriteMask;
1732 uint32_t BackfaceStencilTestMask;
1733 uint32_t BackfaceStencilWriteMask;
1734 uint32_t StencilReferenceValue;
1735 uint32_t BackfaceStencilReferenceValue;
1736 bool DepthTestEnable;
1737 bool DepthBufferWriteEnable;
1738 uint32_t DepthTestFunction;
1739 bool StencilTestEnable;
1740 bool StencilBufferWriteEnable;
1741 uint32_t StencilFailOp;
1742 uint32_t StencilPassDepthPassOp;
1743 uint32_t StencilPassDepthFailOp;
1744 uint32_t StencilTestFunction;
1745 uint32_t BackfaceStencilFailOp;
1746 uint32_t BackfaceStencilPassDepthPassOp;
1747 uint32_t BackfaceStencilPassDepthFailOp;
1748 uint32_t BackfaceStencilTestFunction;
1749 } ds;
1750
1751 /* 3DSTATE_TBIMR_TILE_PASS_INFO */
1752 struct {
1753 unsigned TileRectangleHeight;
1754 unsigned TileRectangleWidth;
1755 unsigned VerticalTileCount;
1756 unsigned HorizontalTileCount;
1757 unsigned TBIMRBatchSize;
1758 unsigned TileBoxCheck;
1759 } tbimr;
1760 bool use_tbimr;
1761
1762 bool pma_fix;
1763
1764 /**
1765 * DEPTH and STENCIL attachment write state for Wa_18019816803.
1766 */
1767 bool ds_write_state;
1768
1769 /**
1770 * Toggle tracking for Wa_14018283232.
1771 */
1772 bool wa_14018283232_toggle;
1773
1774 BITSET_DECLARE(dirty, ANV_GFX_STATE_MAX);
1775 };
1776
1777 enum anv_internal_kernel_name {
1778 ANV_INTERNAL_KERNEL_GENERATED_DRAWS,
1779 ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE,
1780 ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT,
1781 ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE,
1782
1783 ANV_INTERNAL_KERNEL_COUNT,
1784 };
1785
1786 enum anv_rt_bvh_build_method {
1787 ANV_BVH_BUILD_METHOD_TRIVIAL,
1788 ANV_BVH_BUILD_METHOD_NEW_SAH,
1789 };
1790
1791 struct anv_device_astc_emu {
1792 struct vk_texcompress_astc_state *texcompress;
1793
1794 /* for flush_astc_ldr_void_extent_denorms */
1795 simple_mtx_t mutex;
1796 VkDescriptorSetLayout ds_layout;
1797 VkPipelineLayout pipeline_layout;
1798 VkPipeline pipeline;
1799 };
1800
1801 struct anv_device {
1802 struct vk_device vk;
1803
1804 struct anv_physical_device * physical;
1805 const struct intel_device_info * info;
1806 const struct anv_kmd_backend * kmd_backend;
1807 struct isl_device isl_dev;
1808 union {
1809 uint32_t context_id; /* i915 */
1810 uint32_t vm_id; /* Xe */
1811 };
1812 int fd;
1813
1814 pthread_mutex_t vma_mutex;
1815 struct util_vma_heap vma_lo;
1816 struct util_vma_heap vma_hi;
1817 struct util_vma_heap vma_desc;
1818 struct util_vma_heap vma_dynamic_visible;
1819 struct util_vma_heap vma_trtt;
1820
1821 /** List of all anv_device_memory objects */
1822 struct list_head memory_objects;
1823
1824 /** List of anv_image objects with a private binding for implicit CCS */
1825 struct list_head image_private_objects;
1826
1827 /** Memory pool for batch buffers */
1828 struct anv_bo_pool batch_bo_pool;
1829 /** Memory pool for utrace timestamp buffers */
1830 struct anv_bo_pool utrace_bo_pool;
1831 /**
1832 * Size of the timestamp captured for utrace.
1833 */
1834 uint32_t utrace_timestamp_size;
1835 /** Memory pool for BVH build buffers */
1836 struct anv_bo_pool bvh_bo_pool;
1837
1838 struct anv_bo_cache bo_cache;
1839
1840 struct anv_state_pool general_state_pool;
1841 struct anv_state_pool aux_tt_pool;
1842 struct anv_state_pool dynamic_state_pool;
1843 struct anv_state_pool instruction_state_pool;
1844 struct anv_state_pool binding_table_pool;
1845 struct anv_state_pool scratch_surface_state_pool;
1846 struct anv_state_pool internal_surface_state_pool;
1847 struct anv_state_pool bindless_surface_state_pool;
1848 struct anv_state_pool indirect_push_descriptor_pool;
1849 struct anv_state_pool push_descriptor_buffer_pool;
1850
1851 struct anv_state_reserved_array_pool custom_border_colors;
1852
1853 /** BO used for various workarounds
1854 *
1855 * There are a number of workarounds on our hardware which require writing
1856 * data somewhere and it doesn't really matter where. For that, we use
1857 * this BO and just write to the first dword or so.
1858 *
1859 * We also need to be able to handle NULL buffers bound as pushed UBOs.
1860 * For that, we use the high bytes (>= 1024) of the workaround BO.
1861 */
1862 struct anv_bo * workaround_bo;
1863 struct anv_address workaround_address;
1864
1865 struct anv_bo * dummy_aux_bo;
1866
1867 /**
1868 * Workarounds for game bugs.
1869 */
1870 struct {
1871 struct set * doom64_images;
1872 } workarounds;
1873
1874 struct anv_bo * trivial_batch_bo;
1875 struct anv_state null_surface_state;
1876
1877 /**
1878 * NULL surface state copy stored in host memory for use as a fast
1879 * memcpy() source.
1880 */
1881 char host_null_surface_state[ANV_SURFACE_STATE_SIZE];
1882
1883 struct vk_pipeline_cache * internal_cache;
1884
1885 struct {
1886 struct blorp_context context;
1887 struct anv_state dynamic_states[BLORP_DYNAMIC_STATE_COUNT];
1888 } blorp;
1889
1890 struct anv_state border_colors;
1891
1892 struct anv_state slice_hash;
1893
1894 /** An array of CPS_STATE structures grouped by MAX_VIEWPORTS elements
1895 *
1896 * We need to emit CPS_STATE structures for each viewport accessible by a
1897 * pipeline. So rather than write many identical CPS_STATE structures
1898 * dynamically, we can enumerate all possible combinaisons and then just
1899 * emit a 3DSTATE_CPS_POINTERS instruction with the right offset into this
1900 * array.
1901 */
1902 struct anv_state cps_states;
1903
1904 uint32_t queue_count;
1905 struct anv_queue * queues;
1906
1907 struct anv_scratch_pool scratch_pool;
1908 struct anv_scratch_pool protected_scratch_pool;
1909 struct anv_bo *rt_scratch_bos[16];
1910 struct anv_bo *btd_fifo_bo;
1911 struct anv_address rt_uuid_addr;
1912
1913 bool robust_buffer_access;
1914
1915 uint32_t protected_session_id;
1916
1917 /** Shadow ray query BO
1918 *
1919 * The ray_query_bo only holds the current ray being traced. When using
1920 * more than 1 ray query per thread, we cannot fit all the queries in
1921 * there, so we need a another buffer to hold query data that is not
1922 * currently being used by the HW for tracing, similar to a scratch space.
1923 *
1924 * The size of the shadow buffer depends on the number of queries per
1925 * shader.
1926 */
1927 struct anv_bo *ray_query_shadow_bos[16];
1928 /** Ray query buffer used to communicated with HW unit.
1929 */
1930 struct anv_bo *ray_query_bo;
1931
1932 struct anv_shader_bin *rt_trampoline;
1933 struct anv_shader_bin *rt_trivial_return;
1934
1935 enum anv_rt_bvh_build_method bvh_build_method;
1936
1937 /** Draw generation shader
1938 *
1939 * Generates direct draw calls out of indirect parameters. Used to
1940 * workaround slowness with indirect draw calls.
1941 */
1942 struct anv_shader_bin *internal_kernels[ANV_INTERNAL_KERNEL_COUNT];
1943 const struct intel_l3_config *internal_kernels_l3_config;
1944
1945 pthread_mutex_t mutex;
1946 pthread_cond_t queue_submit;
1947
1948 struct intel_batch_decode_ctx decoder[ANV_MAX_QUEUE_FAMILIES];
1949 /*
1950 * When decoding a anv_cmd_buffer, we might need to search for BOs through
1951 * the cmd_buffer's list.
1952 */
1953 struct anv_cmd_buffer *cmd_buffer_being_decoded;
1954
1955 int perf_fd; /* -1 if no opened */
1956 struct anv_queue *perf_queue;
1957
1958 struct intel_aux_map_context *aux_map_ctx;
1959
1960 const struct intel_l3_config *l3_config;
1961
1962 struct intel_debug_block_frame *debug_frame_desc;
1963
1964 struct intel_ds_device ds;
1965
1966 nir_shader *fp64_nir;
1967
1968 uint32_t draw_call_count;
1969 struct anv_state breakpoint;
1970 #if DETECT_OS_ANDROID
1971 struct u_gralloc *u_gralloc;
1972 #endif
1973
1974 /** Precompute all dirty graphics bits
1975 *
1976 * Depending on platforms, some of the dirty bits don't apply (for example
1977 * 3DSTATE_PRIMITIVE_REPLICATION is only Gfx12.0+). Disabling some
1978 * extensions like Mesh shaders also allow us to avoid emitting any
1979 * mesh/task related instructions (we only initialize them once at device
1980 * initialization).
1981 */
1982 BITSET_DECLARE(gfx_dirty_state, ANV_GFX_STATE_MAX);
1983
1984 /*
1985 * Command pool for companion RCS command buffer.
1986 */
1987 VkCommandPool companion_rcs_cmd_pool;
1988
1989 struct anv_trtt {
1990 simple_mtx_t mutex;
1991
1992 /* Sometimes we need to run batches from places where we don't have a
1993 * queue coming from the API, so we use this.
1994 */
1995 struct anv_queue *queue;
1996
1997 /* There's only one L3 table, so if l3_addr is zero that means we
1998 * didn't initialize the TR-TT context yet (i.e., we're not using TR-TT
1999 * yet in this context).
2000 */
2001 uint64_t l3_addr;
2002
2003 /* We don't want to access the page tables from the CPU, so just
2004 * maintain a mirror that we can use.
2005 */
2006 uint64_t *l3_mirror;
2007 uint64_t *l2_mirror;
2008
2009 /* We keep a dynamic list of page table bos, and each bo can store
2010 * multiple page tables.
2011 */
2012 struct anv_bo **page_table_bos;
2013 int num_page_table_bos;
2014 int page_table_bos_capacity;
2015
2016 /* These are used to keep track of space available for more page tables
2017 * within a bo.
2018 */
2019 struct anv_bo *cur_page_table_bo;
2020 uint64_t next_page_table_bo_offset;
2021
2022 struct vk_sync *timeline;
2023 uint64_t timeline_val;
2024
2025 /* List of struct anv_trtt_submission that are in flight and can be
2026 * freed once their vk_sync gets signaled.
2027 */
2028 struct list_head in_flight_batches;
2029 } trtt;
2030
2031 /* Number of sparse resources that currently exist. This is used for a
2032 * workaround that makes every memoryBarrier flush more things than it
2033 * should. Some workloads create and then immediately destroy sparse
2034 * resources when they start, so just counting if a sparse resource was
2035 * ever created is not enough.
2036 */
2037 uint32_t num_sparse_resources;
2038
2039 struct anv_device_astc_emu astc_emu;
2040
2041 struct intel_bind_timeline bind_timeline; /* Xe only */
2042
2043 struct {
2044 simple_mtx_t mutex;
2045 struct hash_table *map;
2046 } embedded_samplers;
2047
2048 struct {
2049 /**
2050 * Mutex for the printfs array
2051 */
2052 simple_mtx_t mutex;
2053 /**
2054 * Buffer in which the shader printfs are stored
2055 */
2056 struct anv_bo *bo;
2057 /**
2058 * Array of pointers to u_printf_info
2059 */
2060 struct util_dynarray prints;
2061 } printf;
2062 };
2063
2064 static inline uint32_t
anv_get_first_render_queue_index(struct anv_physical_device * pdevice)2065 anv_get_first_render_queue_index(struct anv_physical_device *pdevice)
2066 {
2067 assert(pdevice != NULL);
2068
2069 for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2070 if (pdevice->queue.families[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) {
2071 return i;
2072 }
2073 }
2074
2075 unreachable("Graphics capable queue family not found");
2076 }
2077
2078 static inline struct anv_state
anv_binding_table_pool_alloc(struct anv_device * device)2079 anv_binding_table_pool_alloc(struct anv_device *device)
2080 {
2081 return anv_state_pool_alloc(&device->binding_table_pool,
2082 device->binding_table_pool.block_size, 0);
2083 }
2084
2085 static inline void
anv_binding_table_pool_free(struct anv_device * device,struct anv_state state)2086 anv_binding_table_pool_free(struct anv_device *device, struct anv_state state)
2087 {
2088 anv_state_pool_free(&device->binding_table_pool, state);
2089 }
2090
2091 static inline struct anv_state
anv_null_surface_state_for_binding_table(struct anv_device * device)2092 anv_null_surface_state_for_binding_table(struct anv_device *device)
2093 {
2094 struct anv_state state = device->null_surface_state;
2095 if (device->physical->indirect_descriptors) {
2096 state.offset += device->physical->va.bindless_surface_state_pool.addr -
2097 device->physical->va.internal_surface_state_pool.addr;
2098 }
2099 return state;
2100 }
2101
2102 static inline struct anv_state
anv_bindless_state_for_binding_table(struct anv_device * device,struct anv_state state)2103 anv_bindless_state_for_binding_table(struct anv_device *device,
2104 struct anv_state state)
2105 {
2106 state.offset += device->physical->va.bindless_surface_state_pool.addr -
2107 device->physical->va.internal_surface_state_pool.addr;
2108 return state;
2109 }
2110
2111 static inline struct anv_state
anv_device_maybe_alloc_surface_state(struct anv_device * device,struct anv_state_stream * surface_state_stream)2112 anv_device_maybe_alloc_surface_state(struct anv_device *device,
2113 struct anv_state_stream *surface_state_stream)
2114 {
2115 if (device->physical->indirect_descriptors) {
2116 if (surface_state_stream)
2117 return anv_state_stream_alloc(surface_state_stream, 64, 64);
2118 return anv_state_pool_alloc(&device->bindless_surface_state_pool, 64, 64);
2119 } else {
2120 return ANV_STATE_NULL;
2121 }
2122 }
2123
2124 static inline uint32_t
anv_mocs(const struct anv_device * device,const struct anv_bo * bo,isl_surf_usage_flags_t usage)2125 anv_mocs(const struct anv_device *device,
2126 const struct anv_bo *bo,
2127 isl_surf_usage_flags_t usage)
2128 {
2129 return isl_mocs(&device->isl_dev, usage, bo && anv_bo_is_external(bo));
2130 }
2131
2132 static inline uint32_t
anv_mocs_for_address(const struct anv_device * device,const struct anv_address * addr)2133 anv_mocs_for_address(const struct anv_device *device,
2134 const struct anv_address *addr)
2135 {
2136 return anv_mocs(device, addr->bo, 0);
2137 }
2138
2139 void anv_device_init_blorp(struct anv_device *device);
2140 void anv_device_finish_blorp(struct anv_device *device);
2141
2142 VkResult anv_device_alloc_bo(struct anv_device *device,
2143 const char *name, uint64_t size,
2144 enum anv_bo_alloc_flags alloc_flags,
2145 uint64_t explicit_address,
2146 struct anv_bo **bo);
2147 VkResult anv_device_map_bo(struct anv_device *device,
2148 struct anv_bo *bo,
2149 uint64_t offset,
2150 size_t size,
2151 void *placed_addr,
2152 void **map_out);
2153 VkResult anv_device_unmap_bo(struct anv_device *device,
2154 struct anv_bo *bo,
2155 void *map, size_t map_size,
2156 bool replace);
2157 VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
2158 void *host_ptr, uint32_t size,
2159 enum anv_bo_alloc_flags alloc_flags,
2160 uint64_t client_address,
2161 struct anv_bo **bo_out);
2162 VkResult anv_device_import_bo(struct anv_device *device, int fd,
2163 enum anv_bo_alloc_flags alloc_flags,
2164 uint64_t client_address,
2165 struct anv_bo **bo);
2166 VkResult anv_device_export_bo(struct anv_device *device,
2167 struct anv_bo *bo, int *fd_out);
2168 VkResult anv_device_get_bo_tiling(struct anv_device *device,
2169 struct anv_bo *bo,
2170 enum isl_tiling *tiling_out);
2171 VkResult anv_device_set_bo_tiling(struct anv_device *device,
2172 struct anv_bo *bo,
2173 uint32_t row_pitch_B,
2174 enum isl_tiling tiling);
2175 void anv_device_release_bo(struct anv_device *device,
2176 struct anv_bo *bo);
2177
anv_device_set_physical(struct anv_device * device,struct anv_physical_device * physical_device)2178 static inline void anv_device_set_physical(struct anv_device *device,
2179 struct anv_physical_device *physical_device)
2180 {
2181 device->physical = physical_device;
2182 device->info = &physical_device->info;
2183 device->isl_dev = physical_device->isl_dev;
2184 }
2185
2186 static inline struct anv_bo *
anv_device_lookup_bo(struct anv_device * device,uint32_t gem_handle)2187 anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
2188 {
2189 return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);
2190 }
2191
2192 VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
2193 int64_t timeout);
2194
2195 VkResult anv_device_print_init(struct anv_device *device);
2196 void anv_device_print_fini(struct anv_device *device);
2197 void anv_device_print_shader_prints(struct anv_device *device);
2198
2199 VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue,
2200 const VkDeviceQueueCreateInfo *pCreateInfo,
2201 uint32_t index_in_family);
2202 void anv_queue_finish(struct anv_queue *queue);
2203
2204 VkResult anv_queue_submit(struct vk_queue *queue,
2205 struct vk_queue_submit *submit);
2206
2207 void anv_queue_trace(struct anv_queue *queue, const char *label,
2208 bool frame, bool begin);
2209
2210 static inline VkResult
anv_queue_post_submit(struct anv_queue * queue,VkResult submit_result)2211 anv_queue_post_submit(struct anv_queue *queue, VkResult submit_result)
2212 {
2213 if (submit_result != VK_SUCCESS)
2214 return submit_result;
2215
2216 VkResult result = VK_SUCCESS;
2217 if (queue->sync) {
2218 result = vk_sync_wait(&queue->device->vk, queue->sync, 0,
2219 VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
2220 if (result != VK_SUCCESS)
2221 result = vk_queue_set_lost(&queue->vk, "sync wait failed");
2222 }
2223
2224 if (INTEL_DEBUG(DEBUG_SHADER_PRINT))
2225 anv_device_print_shader_prints(queue->device);
2226
2227 return result;
2228 }
2229
2230 int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
2231 int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
2232 uint32_t stride, uint32_t tiling);
2233 int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
2234 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
2235 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
2236 int anv_gem_set_context_param(int fd, uint32_t context, uint32_t param,
2237 uint64_t value);
2238 VkResult
2239 anv_gem_import_bo_alloc_flags_to_bo_flags(struct anv_device *device,
2240 struct anv_bo *bo,
2241 enum anv_bo_alloc_flags alloc_flags,
2242 uint32_t *bo_flags);
2243 const struct intel_device_info_pat_entry *
2244 anv_device_get_pat_entry(struct anv_device *device,
2245 enum anv_bo_alloc_flags alloc_flags);
2246
2247 uint64_t anv_vma_alloc(struct anv_device *device,
2248 uint64_t size, uint64_t align,
2249 enum anv_bo_alloc_flags alloc_flags,
2250 uint64_t client_address,
2251 struct util_vma_heap **out_vma_heap);
2252 void anv_vma_free(struct anv_device *device,
2253 struct util_vma_heap *vma_heap,
2254 uint64_t address, uint64_t size);
2255
2256 struct anv_reloc_list {
2257 bool uses_relocs;
2258 uint32_t dep_words;
2259 BITSET_WORD * deps;
2260 const VkAllocationCallbacks *alloc;
2261 };
2262
2263 VkResult anv_reloc_list_init(struct anv_reloc_list *list,
2264 const VkAllocationCallbacks *alloc,
2265 bool uses_relocs);
2266 void anv_reloc_list_finish(struct anv_reloc_list *list);
2267
2268 VkResult
2269 anv_reloc_list_add_bo_impl(struct anv_reloc_list *list, struct anv_bo *target_bo);
2270
2271 static inline VkResult
anv_reloc_list_add_bo(struct anv_reloc_list * list,struct anv_bo * target_bo)2272 anv_reloc_list_add_bo(struct anv_reloc_list *list, struct anv_bo *target_bo)
2273 {
2274 return list->uses_relocs ? anv_reloc_list_add_bo_impl(list, target_bo) : VK_SUCCESS;
2275 }
2276
2277 VkResult anv_reloc_list_append(struct anv_reloc_list *list,
2278 struct anv_reloc_list *other);
2279
2280 struct anv_batch_bo {
2281 /* Link in the anv_cmd_buffer.owned_batch_bos list */
2282 struct list_head link;
2283
2284 struct anv_bo * bo;
2285
2286 /* Bytes actually consumed in this batch BO */
2287 uint32_t length;
2288
2289 /* When this batch BO is used as part of a primary batch buffer, this
2290 * tracked whether it is chained to another primary batch buffer.
2291 *
2292 * If this is the case, the relocation list's last entry points the
2293 * location of the MI_BATCH_BUFFER_START chaining to the next batch.
2294 */
2295 bool chained;
2296
2297 struct anv_reloc_list relocs;
2298 };
2299
2300 struct anv_batch {
2301 const VkAllocationCallbacks * alloc;
2302
2303 /**
2304 * Sum of all the anv_batch_bo sizes allocated for this command buffer.
2305 * Used to increase allocation size for long command buffers.
2306 */
2307 size_t allocated_batch_size;
2308
2309 struct anv_address start_addr;
2310
2311 void * start;
2312 void * end;
2313 void * next;
2314
2315 struct anv_reloc_list * relocs;
2316
2317 /* This callback is called (with the associated user data) in the event
2318 * that the batch runs out of space.
2319 */
2320 VkResult (*extend_cb)(struct anv_batch *, uint32_t, void *);
2321 void * user_data;
2322
2323 /**
2324 * Current error status of the command buffer. Used to track inconsistent
2325 * or incomplete command buffer states that are the consequence of run-time
2326 * errors such as out of memory scenarios. We want to track this in the
2327 * batch because the command buffer object is not visible to some parts
2328 * of the driver.
2329 */
2330 VkResult status;
2331
2332 enum intel_engine_class engine_class;
2333
2334 /**
2335 * Write fencing status for mi_builder.
2336 */
2337 bool write_fence_status;
2338
2339 /**
2340 * Number of 3DPRIMITIVE's emitted for WA 16014538804
2341 */
2342 uint8_t num_3d_primitives_emitted;
2343
2344 struct u_trace * trace;
2345 const char * pc_reasons[4];
2346 uint32_t pc_reasons_count;
2347
2348 };
2349
2350 void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
2351 VkResult anv_batch_emit_ensure_space(struct anv_batch *batch, uint32_t size);
2352 void anv_batch_advance(struct anv_batch *batch, uint32_t size);
2353 void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
2354 struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
2355
2356 static inline struct anv_address
anv_batch_current_address(struct anv_batch * batch)2357 anv_batch_current_address(struct anv_batch *batch)
2358 {
2359 return anv_batch_address(batch, batch->next);
2360 }
2361
2362 static inline void
anv_batch_set_storage(struct anv_batch * batch,struct anv_address addr,void * map,size_t size)2363 anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr,
2364 void *map, size_t size)
2365 {
2366 batch->start_addr = addr;
2367 batch->next = batch->start = map;
2368 batch->end = map + size;
2369 }
2370
2371 static inline VkResult
anv_batch_set_error(struct anv_batch * batch,VkResult error)2372 anv_batch_set_error(struct anv_batch *batch, VkResult error)
2373 {
2374 assert(error != VK_SUCCESS);
2375 if (batch->status == VK_SUCCESS)
2376 batch->status = error;
2377 return batch->status;
2378 }
2379
2380 static inline bool
anv_batch_has_error(struct anv_batch * batch)2381 anv_batch_has_error(struct anv_batch *batch)
2382 {
2383 return batch->status != VK_SUCCESS;
2384 }
2385
2386 static inline uint64_t
_anv_combine_address(struct anv_batch * batch,void * location,const struct anv_address address,uint32_t delta)2387 _anv_combine_address(struct anv_batch *batch, void *location,
2388 const struct anv_address address, uint32_t delta)
2389 {
2390 if (address.bo == NULL)
2391 return address.offset + delta;
2392
2393 if (batch)
2394 anv_reloc_list_add_bo(batch->relocs, address.bo);
2395
2396 return anv_address_physical(anv_address_add(address, delta));
2397 }
2398
2399 #define __gen_address_type struct anv_address
2400 #define __gen_user_data struct anv_batch
2401 #define __gen_combine_address _anv_combine_address
2402
2403 /* Wrapper macros needed to work around preprocessor argument issues. In
2404 * particular, arguments don't get pre-evaluated if they are concatenated.
2405 * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the
2406 * GENX macro won't get evaluated if the emit macro contains "cmd ## foo".
2407 * We can work around this easily enough with these helpers.
2408 */
2409 #define __anv_cmd_length(cmd) cmd ## _length
2410 #define __anv_cmd_length_bias(cmd) cmd ## _length_bias
2411 #define __anv_cmd_header(cmd) cmd ## _header
2412 #define __anv_cmd_pack(cmd) cmd ## _pack
2413 #define __anv_reg_num(reg) reg ## _num
2414
2415 #define anv_pack_struct(dst, struc, ...) do { \
2416 struct struc __template = { \
2417 __VA_ARGS__ \
2418 }; \
2419 __anv_cmd_pack(struc)(NULL, dst, &__template); \
2420 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \
2421 } while (0)
2422
2423 #define anv_batch_emitn(batch, n, cmd, ...) ({ \
2424 void *__dst = anv_batch_emit_dwords(batch, n); \
2425 if (__dst) { \
2426 struct cmd __template = { \
2427 __anv_cmd_header(cmd), \
2428 .DWordLength = n - __anv_cmd_length_bias(cmd), \
2429 __VA_ARGS__ \
2430 }; \
2431 __anv_cmd_pack(cmd)(batch, __dst, &__template); \
2432 } \
2433 __dst; \
2434 })
2435
2436 #define anv_batch_emit_merge(batch, cmd, pipeline, state, name) \
2437 for (struct cmd name = { 0 }, \
2438 *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \
2439 __builtin_expect(_dst != NULL, 1); \
2440 ({ uint32_t _partial[__anv_cmd_length(cmd)]; \
2441 assert((pipeline)->state.len == __anv_cmd_length(cmd)); \
2442 __anv_cmd_pack(cmd)(batch, _partial, &name); \
2443 for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) { \
2444 assert((_partial[i] & \
2445 (pipeline)->batch_data[ \
2446 (pipeline)->state.offset + i]) == 0); \
2447 ((uint32_t *)_dst)[i] = _partial[i] | \
2448 (pipeline)->batch_data[(pipeline)->state.offset + i]; \
2449 } \
2450 VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
2451 _dst = NULL; \
2452 }))
2453
2454 #define anv_batch_emit_merge_protected(batch, cmd, pipeline, state, \
2455 name, protected) \
2456 for (struct cmd name = { 0 }, \
2457 *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \
2458 __builtin_expect(_dst != NULL, 1); \
2459 ({ struct anv_gfx_state_ptr *_cmd_state = protected ? \
2460 &(pipeline)->state##_protected : \
2461 &(pipeline)->state; \
2462 uint32_t _partial[__anv_cmd_length(cmd)]; \
2463 assert(_cmd_state->len == __anv_cmd_length(cmd)); \
2464 __anv_cmd_pack(cmd)(batch, _partial, &name); \
2465 for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) { \
2466 assert((_partial[i] & \
2467 (pipeline)->batch_data[ \
2468 (pipeline)->state.offset + i]) == 0); \
2469 ((uint32_t *)_dst)[i] = _partial[i] | \
2470 (pipeline)->batch_data[_cmd_state->offset + i]; \
2471 } \
2472 VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
2473 _dst = NULL; \
2474 }))
2475
2476 #define anv_batch_emit(batch, cmd, name) \
2477 for (struct cmd name = { __anv_cmd_header(cmd) }, \
2478 *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \
2479 __builtin_expect(_dst != NULL, 1); \
2480 ({ __anv_cmd_pack(cmd)(batch, _dst, &name); \
2481 VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
2482 _dst = NULL; \
2483 }))
2484
2485 #define anv_batch_write_reg(batch, reg, name) \
2486 for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL; \
2487 ({ \
2488 uint32_t _dw[__anv_cmd_length(reg)]; \
2489 __anv_cmd_pack(reg)(NULL, _dw, &name); \
2490 for (unsigned i = 0; i < __anv_cmd_length(reg); i++) { \
2491 anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \
2492 lri.RegisterOffset = __anv_reg_num(reg); \
2493 lri.DataDWord = _dw[i]; \
2494 } \
2495 } \
2496 _cont = NULL; \
2497 }))
2498
2499 /* #define __gen_get_batch_dwords anv_batch_emit_dwords */
2500 /* #define __gen_get_batch_address anv_batch_address */
2501 /* #define __gen_address_value anv_address_physical */
2502 /* #define __gen_address_offset anv_address_add */
2503
2504 /* Base structure used to track a submission that needs some clean operations
2505 * upon completion. Should be embedded into a larger structure.
2506 */
2507 struct anv_async_submit {
2508 struct anv_queue *queue;
2509
2510 struct anv_bo_pool *bo_pool;
2511
2512 bool use_companion_rcs;
2513
2514 bool owns_sync;
2515 struct vk_sync_signal signal;
2516
2517 struct anv_reloc_list relocs;
2518 struct anv_batch batch;
2519 struct util_dynarray batch_bos;
2520 };
2521
2522 VkResult
2523 anv_async_submit_init(struct anv_async_submit *submit,
2524 struct anv_queue *queue,
2525 struct anv_bo_pool *bo_pool,
2526 bool use_companion_rcs,
2527 bool create_signal_sync);
2528
2529 void
2530 anv_async_submit_fini(struct anv_async_submit *submit);
2531
2532 VkResult
2533 anv_async_submit_create(struct anv_queue *queue,
2534 struct anv_bo_pool *bo_pool,
2535 bool use_companion_rcs,
2536 bool create_signal_sync,
2537 struct anv_async_submit **out_submit);
2538
2539 void
2540 anv_async_submit_destroy(struct anv_async_submit *submit);
2541
2542 bool
2543 anv_async_submit_done(struct anv_async_submit *submit);
2544
2545 bool
2546 anv_async_submit_wait(struct anv_async_submit *submit);
2547
2548 struct anv_sparse_submission {
2549 struct anv_queue *queue;
2550
2551 struct anv_vm_bind *binds;
2552 int binds_len;
2553 int binds_capacity;
2554
2555 uint32_t wait_count;
2556 uint32_t signal_count;
2557
2558 struct vk_sync_wait *waits;
2559 struct vk_sync_signal *signals;
2560 };
2561
2562 struct anv_trtt_bind {
2563 uint64_t pte_addr;
2564 uint64_t entry_addr;
2565 };
2566
2567 struct anv_trtt_submission {
2568 struct anv_async_submit base;
2569
2570 struct anv_sparse_submission *sparse;
2571
2572 struct list_head link;
2573 };
2574
2575 struct anv_device_memory {
2576 struct vk_device_memory vk;
2577
2578 struct list_head link;
2579
2580 struct anv_bo * bo;
2581 const struct anv_memory_type * type;
2582
2583 void * map;
2584 size_t map_size;
2585
2586 /* The map, from the user PoV is map + map_delta */
2587 uint64_t map_delta;
2588 };
2589
2590 /**
2591 * Header for Vertex URB Entry (VUE)
2592 */
2593 struct anv_vue_header {
2594 uint32_t Reserved;
2595 uint32_t RTAIndex; /* RenderTargetArrayIndex */
2596 uint32_t ViewportIndex;
2597 float PointWidth;
2598 };
2599
2600 /** Struct representing a sampled image descriptor
2601 *
2602 * This descriptor layout is used for sampled images, bare sampler, and
2603 * combined image/sampler descriptors.
2604 */
2605 struct anv_sampled_image_descriptor {
2606 /** Bindless image handle
2607 *
2608 * This is expected to already be shifted such that the 20-bit
2609 * SURFACE_STATE table index is in the top 20 bits.
2610 */
2611 uint32_t image;
2612
2613 /** Bindless sampler handle
2614 *
2615 * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
2616 * to the dynamic state base address.
2617 */
2618 uint32_t sampler;
2619 };
2620
2621 /** Struct representing a storage image descriptor */
2622 struct anv_storage_image_descriptor {
2623 /** Bindless image handles
2624 *
2625 * These are expected to already be shifted such that the 20-bit
2626 * SURFACE_STATE table index is in the top 20 bits.
2627 */
2628 uint32_t vanilla;
2629
2630 /** Image depth
2631 *
2632 * By default the HW RESINFO message allows us to query the depth of an image :
2633 *
2634 * From the Kaby Lake docs for the RESINFO message:
2635 *
2636 * "Surface Type | ... | Blue
2637 * --------------+-----+----------------
2638 * SURFTYPE_3D | ... | (Depth+1)»LOD"
2639 *
2640 * With VK_EXT_sliced_view_of_3d, we have to support a slice of a 3D image,
2641 * meaning at a depth offset with a new depth value potentially reduced
2642 * from the original image. Unfortunately if we change the Depth value of
2643 * the image, we then run into issues with Yf/Ys tilings where the HW fetch
2644 * data at incorrect locations.
2645 *
2646 * To solve this, we put the slice depth in the descriptor and recompose
2647 * the vec3 (width, height, depth) using this field for z and xy using the
2648 * RESINFO result.
2649 */
2650 uint32_t image_depth;
2651 };
2652
2653 /** Struct representing a address/range descriptor
2654 *
2655 * The fields of this struct correspond directly to the data layout of
2656 * nir_address_format_64bit_bounded_global addresses. The last field is the
2657 * offset in the NIR address so it must be zero so that when you load the
2658 * descriptor you get a pointer to the start of the range.
2659 */
2660 struct anv_address_range_descriptor {
2661 uint64_t address;
2662 uint32_t range;
2663 uint32_t zero;
2664 };
2665
2666 enum anv_descriptor_data {
2667 /** The descriptor contains a BTI reference to a surface state */
2668 ANV_DESCRIPTOR_BTI_SURFACE_STATE = BITFIELD_BIT(0),
2669 /** The descriptor contains a BTI reference to a sampler state */
2670 ANV_DESCRIPTOR_BTI_SAMPLER_STATE = BITFIELD_BIT(1),
2671 /** The descriptor contains an actual buffer view */
2672 ANV_DESCRIPTOR_BUFFER_VIEW = BITFIELD_BIT(2),
2673 /** The descriptor contains inline uniform data */
2674 ANV_DESCRIPTOR_INLINE_UNIFORM = BITFIELD_BIT(3),
2675 /** anv_address_range_descriptor with a buffer address and range */
2676 ANV_DESCRIPTOR_INDIRECT_ADDRESS_RANGE = BITFIELD_BIT(4),
2677 /** Bindless surface handle (through anv_sampled_image_descriptor) */
2678 ANV_DESCRIPTOR_INDIRECT_SAMPLED_IMAGE = BITFIELD_BIT(5),
2679 /** Storage image handles (through anv_storage_image_descriptor) */
2680 ANV_DESCRIPTOR_INDIRECT_STORAGE_IMAGE = BITFIELD_BIT(6),
2681 /** The descriptor contains a single RENDER_SURFACE_STATE */
2682 ANV_DESCRIPTOR_SURFACE = BITFIELD_BIT(7),
2683 /** The descriptor contains a SAMPLER_STATE */
2684 ANV_DESCRIPTOR_SAMPLER = BITFIELD_BIT(8),
2685 /** A tuple of RENDER_SURFACE_STATE & SAMPLER_STATE */
2686 ANV_DESCRIPTOR_SURFACE_SAMPLER = BITFIELD_BIT(9),
2687 };
2688
2689 struct anv_descriptor_set_binding_layout {
2690 /* The type of the descriptors in this binding */
2691 VkDescriptorType type;
2692
2693 /* Flags provided when this binding was created */
2694 VkDescriptorBindingFlags flags;
2695
2696 /* Bitfield representing the type of data this descriptor contains */
2697 enum anv_descriptor_data data;
2698
2699 /* Maximum number of YCbCr texture/sampler planes */
2700 uint8_t max_plane_count;
2701
2702 /* Number of array elements in this binding (or size in bytes for inline
2703 * uniform data)
2704 */
2705 uint32_t array_size;
2706
2707 /* Index into the flattened descriptor set */
2708 uint32_t descriptor_index;
2709
2710 /* Index into the dynamic state array for a dynamic buffer, relative to the
2711 * set.
2712 */
2713 int16_t dynamic_offset_index;
2714
2715 /* Computed surface size from data (for one plane) */
2716 uint16_t descriptor_data_surface_size;
2717
2718 /* Computed sampler size from data (for one plane) */
2719 uint16_t descriptor_data_sampler_size;
2720
2721 /* Index into the descriptor set buffer views */
2722 int32_t buffer_view_index;
2723
2724 /* Offset into the descriptor buffer where the surface descriptor lives */
2725 uint32_t descriptor_surface_offset;
2726
2727 /* Offset into the descriptor buffer where the sampler descriptor lives */
2728 uint16_t descriptor_sampler_offset;
2729
2730 /* Pre computed surface stride (with multiplane descriptor, the descriptor
2731 * includes all the planes)
2732 */
2733 uint16_t descriptor_surface_stride;
2734
2735 /* Pre computed sampler stride (with multiplane descriptor, the descriptor
2736 * includes all the planes)
2737 */
2738 uint16_t descriptor_sampler_stride;
2739
2740 /* Immutable samplers (or NULL if no immutable samplers) */
2741 struct anv_sampler **immutable_samplers;
2742 };
2743
2744 enum anv_descriptor_set_layout_type {
2745 ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_UNKNOWN,
2746 ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT,
2747 ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT,
2748 ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER,
2749 };
2750
2751 struct anv_descriptor_set_layout {
2752 struct vk_object_base base;
2753
2754 VkDescriptorSetLayoutCreateFlags flags;
2755
2756 /* Type of descriptor set layout */
2757 enum anv_descriptor_set_layout_type type;
2758
2759 /* Descriptor set layouts can be destroyed at almost any time */
2760 uint32_t ref_cnt;
2761
2762 /* Number of bindings in this descriptor set */
2763 uint32_t binding_count;
2764
2765 /* Total number of descriptors */
2766 uint32_t descriptor_count;
2767
2768 /* Shader stages affected by this descriptor set */
2769 uint16_t shader_stages;
2770
2771 /* Number of buffer views in this descriptor set */
2772 uint32_t buffer_view_count;
2773
2774 /* Number of dynamic offsets used by this descriptor set */
2775 uint16_t dynamic_offset_count;
2776
2777 /* For each dynamic buffer, which VkShaderStageFlagBits stages are using
2778 * this buffer
2779 */
2780 VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];
2781
2782 /* Size of the descriptor buffer dedicated to surface states for this
2783 * descriptor set
2784 */
2785 uint32_t descriptor_buffer_surface_size;
2786
2787 /* Size of the descriptor buffer dedicated to sampler states for this
2788 * descriptor set
2789 */
2790 uint32_t descriptor_buffer_sampler_size;
2791
2792 /* Number of embedded sampler count */
2793 uint32_t embedded_sampler_count;
2794
2795 /* Bindings in this descriptor set */
2796 struct anv_descriptor_set_binding_layout binding[0];
2797 };
2798
2799 bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,
2800 const struct anv_descriptor_set_layout *set,
2801 const struct anv_descriptor_set_binding_layout *binding);
2802
2803 bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
2804 const struct anv_descriptor_set_layout *set,
2805 const struct anv_descriptor_set_binding_layout *binding);
2806
2807 void anv_descriptor_set_layout_destroy(struct anv_device *device,
2808 struct anv_descriptor_set_layout *layout);
2809
2810 void anv_descriptor_set_layout_print(const struct anv_descriptor_set_layout *layout);
2811
2812 static inline struct anv_descriptor_set_layout *
anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout * layout)2813 anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)
2814 {
2815 assert(layout && layout->ref_cnt >= 1);
2816 p_atomic_inc(&layout->ref_cnt);
2817
2818 return layout;
2819 }
2820
2821 static inline void
anv_descriptor_set_layout_unref(struct anv_device * device,struct anv_descriptor_set_layout * layout)2822 anv_descriptor_set_layout_unref(struct anv_device *device,
2823 struct anv_descriptor_set_layout *layout)
2824 {
2825 assert(layout && layout->ref_cnt >= 1);
2826 if (p_atomic_dec_zero(&layout->ref_cnt))
2827 anv_descriptor_set_layout_destroy(device, layout);
2828 }
2829
2830 struct anv_descriptor {
2831 VkDescriptorType type;
2832
2833 union {
2834 struct {
2835 VkImageLayout layout;
2836 struct anv_image_view *image_view;
2837 struct anv_sampler *sampler;
2838 };
2839
2840 struct {
2841 struct anv_buffer_view *set_buffer_view;
2842 struct anv_buffer *buffer;
2843 uint64_t offset;
2844 uint64_t range;
2845 uint64_t bind_range;
2846 };
2847
2848 struct anv_buffer_view *buffer_view;
2849
2850 struct vk_acceleration_structure *accel_struct;
2851 };
2852 };
2853
2854 struct anv_descriptor_set {
2855 struct vk_object_base base;
2856
2857 struct anv_descriptor_pool *pool;
2858 struct anv_descriptor_set_layout *layout;
2859
2860 /* Amount of space occupied in the the pool by this descriptor set. It can
2861 * be larger than the size of the descriptor set.
2862 */
2863 uint32_t size;
2864
2865 /* Is this descriptor set a push descriptor */
2866 bool is_push;
2867
2868 /* Bitfield of descriptors for which we need to generate surface states.
2869 * Only valid for push descriptors
2870 */
2871 uint32_t generate_surface_states;
2872
2873 /* State relative to anv_descriptor_pool::surface_bo */
2874 struct anv_state desc_surface_mem;
2875 /* State relative to anv_descriptor_pool::sampler_bo */
2876 struct anv_state desc_sampler_mem;
2877 /* Surface state for the descriptor buffer */
2878 struct anv_state desc_surface_state;
2879
2880 /* Descriptor set address pointing to desc_surface_mem (we don't need one
2881 * for sampler because they're never accessed other than by the HW through
2882 * the shader sampler handle).
2883 */
2884 struct anv_address desc_surface_addr;
2885
2886 struct anv_address desc_sampler_addr;
2887
2888 /* Descriptor offset from the
2889 * device->va.internal_surface_state_pool.addr
2890 *
2891 * It just needs to be added to the binding table offset to be put into the
2892 * HW BTI entry.
2893 */
2894 uint32_t desc_offset;
2895
2896 uint32_t buffer_view_count;
2897 struct anv_buffer_view *buffer_views;
2898
2899 /* Link to descriptor pool's desc_sets list . */
2900 struct list_head pool_link;
2901
2902 uint32_t descriptor_count;
2903 struct anv_descriptor descriptors[0];
2904 };
2905
2906 static inline bool
anv_descriptor_set_is_push(struct anv_descriptor_set * set)2907 anv_descriptor_set_is_push(struct anv_descriptor_set *set)
2908 {
2909 return set->pool == NULL;
2910 }
2911
2912 struct anv_surface_state_data {
2913 uint8_t data[ANV_SURFACE_STATE_SIZE];
2914 };
2915
2916 struct anv_buffer_state {
2917 /** Surface state allocated from the bindless heap
2918 *
2919 * Only valid if anv_physical_device::indirect_descriptors is true
2920 */
2921 struct anv_state state;
2922
2923 /** Surface state after genxml packing
2924 *
2925 * Only valid if anv_physical_device::indirect_descriptors is false
2926 */
2927 struct anv_surface_state_data state_data;
2928 };
2929
2930 struct anv_buffer_view {
2931 struct vk_buffer_view vk;
2932
2933 struct anv_address address;
2934
2935 struct anv_buffer_state general;
2936 struct anv_buffer_state storage;
2937 };
2938
2939 struct anv_push_descriptor_set {
2940 struct anv_descriptor_set set;
2941
2942 /* Put this field right behind anv_descriptor_set so it fills up the
2943 * descriptors[0] field. */
2944 struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
2945
2946 /** True if the descriptor set buffer has been referenced by a draw or
2947 * dispatch command.
2948 */
2949 bool set_used_on_gpu;
2950
2951 struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
2952 };
2953
2954 static inline struct anv_address
anv_descriptor_set_address(struct anv_descriptor_set * set)2955 anv_descriptor_set_address(struct anv_descriptor_set *set)
2956 {
2957 if (anv_descriptor_set_is_push(set)) {
2958 /* We have to flag push descriptor set as used on the GPU
2959 * so that the next time we push descriptors, we grab a new memory.
2960 */
2961 struct anv_push_descriptor_set *push_set =
2962 (struct anv_push_descriptor_set *)set;
2963 push_set->set_used_on_gpu = true;
2964 }
2965
2966 return set->desc_surface_addr;
2967 }
2968
2969 struct anv_descriptor_pool_heap {
2970 /* BO allocated to back the pool (unused for host pools) */
2971 struct anv_bo *bo;
2972
2973 /* Host memory allocated to back a host pool */
2974 void *host_mem;
2975
2976 /* Heap tracking allocations in bo/host_mem */
2977 struct util_vma_heap heap;
2978
2979 /* Size of the heap */
2980 uint32_t size;
2981 };
2982
2983 struct anv_descriptor_pool {
2984 struct vk_object_base base;
2985
2986 struct anv_descriptor_pool_heap surfaces;
2987 struct anv_descriptor_pool_heap samplers;
2988
2989 struct anv_state_stream surface_state_stream;
2990 void *surface_state_free_list;
2991
2992 /** List of anv_descriptor_set. */
2993 struct list_head desc_sets;
2994
2995 /** Heap over host_mem */
2996 struct util_vma_heap host_heap;
2997
2998 /** Allocated size of host_mem */
2999 uint32_t host_mem_size;
3000
3001 /**
3002 * VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT. If set, then
3003 * surface_state_stream is unused.
3004 */
3005 bool host_only;
3006
3007 char host_mem[0];
3008 };
3009
3010 bool
3011 anv_push_descriptor_set_init(struct anv_cmd_buffer *cmd_buffer,
3012 struct anv_push_descriptor_set *push_set,
3013 struct anv_descriptor_set_layout *layout);
3014
3015 void
3016 anv_push_descriptor_set_finish(struct anv_push_descriptor_set *push_set);
3017
3018 void
3019 anv_descriptor_set_write_image_view(struct anv_device *device,
3020 struct anv_descriptor_set *set,
3021 const VkDescriptorImageInfo * const info,
3022 VkDescriptorType type,
3023 uint32_t binding,
3024 uint32_t element);
3025
3026 void
3027 anv_descriptor_set_write_buffer_view(struct anv_device *device,
3028 struct anv_descriptor_set *set,
3029 VkDescriptorType type,
3030 struct anv_buffer_view *buffer_view,
3031 uint32_t binding,
3032 uint32_t element);
3033
3034 void
3035 anv_descriptor_set_write_buffer(struct anv_device *device,
3036 struct anv_descriptor_set *set,
3037 VkDescriptorType type,
3038 struct anv_buffer *buffer,
3039 uint32_t binding,
3040 uint32_t element,
3041 VkDeviceSize offset,
3042 VkDeviceSize range);
3043
3044 void
3045 anv_descriptor_write_surface_state(struct anv_device *device,
3046 struct anv_descriptor *desc,
3047 struct anv_state surface_state);
3048
3049 void
3050 anv_descriptor_set_write_acceleration_structure(struct anv_device *device,
3051 struct anv_descriptor_set *set,
3052 struct vk_acceleration_structure *accel,
3053 uint32_t binding,
3054 uint32_t element);
3055
3056 void
3057 anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,
3058 struct anv_descriptor_set *set,
3059 uint32_t binding,
3060 const void *data,
3061 size_t offset,
3062 size_t size);
3063
3064 void
3065 anv_descriptor_set_write(struct anv_device *device,
3066 struct anv_descriptor_set *set_override,
3067 uint32_t write_count,
3068 const VkWriteDescriptorSet *writes);
3069
3070 void
3071 anv_descriptor_set_write_template(struct anv_device *device,
3072 struct anv_descriptor_set *set,
3073 const struct vk_descriptor_update_template *template,
3074 const void *data);
3075
3076 #define ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER (UINT8_MAX - 5)
3077 #define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 4)
3078 #define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 3)
3079 #define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 2)
3080 #define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 1)
3081 #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
3082
3083 struct anv_pipeline_binding {
3084 /** Index in the descriptor set
3085 *
3086 * This is a flattened index; the descriptor set layout is already taken
3087 * into account.
3088 */
3089 uint32_t index;
3090
3091 /** Binding in the descriptor set. Not valid for any of the
3092 * ANV_DESCRIPTOR_SET_*
3093 */
3094 uint32_t binding;
3095
3096 /** Offset in the descriptor buffer
3097 *
3098 * Relative to anv_descriptor_set::desc_addr. This is useful for
3099 * ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT, to generate the binding
3100 * table entry.
3101 */
3102 uint32_t set_offset;
3103
3104 /** The descriptor set this surface corresponds to.
3105 *
3106 * The special ANV_DESCRIPTOR_SET_* values above indicates that this
3107 * binding is not a normal descriptor set but something else.
3108 */
3109 uint8_t set;
3110
3111 union {
3112 /** Plane in the binding index for images */
3113 uint8_t plane;
3114
3115 /** Input attachment index (relative to the subpass) */
3116 uint8_t input_attachment_index;
3117
3118 /** Dynamic offset index
3119 *
3120 * For dynamic UBOs and SSBOs, relative to set.
3121 */
3122 uint8_t dynamic_offset_index;
3123 };
3124 };
3125
3126 struct anv_embedded_sampler_key {
3127 /** No need to track binding elements for embedded samplers as :
3128 *
3129 * VUID-VkDescriptorSetLayoutBinding-flags-08006:
3130 *
3131 * "If VkDescriptorSetLayoutCreateInfo:flags contains
3132 * VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT,
3133 * descriptorCount must: less than or equal to 1"
3134 *
3135 * The following struct can be safely hash as it doesn't include in
3136 * address/offset.
3137 */
3138 uint32_t sampler[4];
3139 uint32_t color[4];
3140 };
3141
3142 struct anv_pipeline_embedded_sampler_binding {
3143 /** The descriptor set this sampler belongs to */
3144 uint8_t set;
3145
3146 /** The binding in the set this sampler belongs to */
3147 uint32_t binding;
3148
3149 /** The data configuring the sampler */
3150 struct anv_embedded_sampler_key key;
3151 };
3152
3153 struct anv_push_range {
3154 /** Index in the descriptor set */
3155 uint32_t index;
3156
3157 /** Descriptor set index */
3158 uint8_t set;
3159
3160 /** Dynamic offset index (for dynamic UBOs), relative to set. */
3161 uint8_t dynamic_offset_index;
3162
3163 /** Start offset in units of 32B */
3164 uint8_t start;
3165
3166 /** Range in units of 32B */
3167 uint8_t length;
3168 };
3169
3170 struct anv_pipeline_sets_layout {
3171 struct anv_device *device;
3172
3173 struct {
3174 struct anv_descriptor_set_layout *layout;
3175 uint32_t dynamic_offset_start;
3176 } set[MAX_SETS];
3177
3178 enum anv_descriptor_set_layout_type type;
3179
3180 uint32_t num_sets;
3181 uint32_t num_dynamic_buffers;
3182 int push_descriptor_set_index;
3183
3184 bool independent_sets;
3185
3186 unsigned char sha1[20];
3187 };
3188
3189 void anv_pipeline_sets_layout_init(struct anv_pipeline_sets_layout *layout,
3190 struct anv_device *device,
3191 bool independent_sets);
3192
3193 void anv_pipeline_sets_layout_fini(struct anv_pipeline_sets_layout *layout);
3194
3195 void anv_pipeline_sets_layout_add(struct anv_pipeline_sets_layout *layout,
3196 uint32_t set_idx,
3197 struct anv_descriptor_set_layout *set_layout);
3198
3199 uint32_t
3200 anv_pipeline_sets_layout_embedded_sampler_count(const struct anv_pipeline_sets_layout *layout);
3201
3202 void anv_pipeline_sets_layout_hash(struct anv_pipeline_sets_layout *layout);
3203
3204 void anv_pipeline_sets_layout_print(const struct anv_pipeline_sets_layout *layout);
3205
3206 struct anv_pipeline_layout {
3207 struct vk_object_base base;
3208
3209 struct anv_pipeline_sets_layout sets_layout;
3210 };
3211
3212 const struct anv_descriptor_set_layout *
3213 anv_pipeline_layout_get_push_set(const struct anv_pipeline_sets_layout *layout,
3214 uint8_t *desc_idx);
3215
3216 struct anv_sparse_binding_data {
3217 uint64_t address;
3218 uint64_t size;
3219
3220 /* This is kept only because it's given to us by vma_alloc() and need to be
3221 * passed back to vma_free(), we have no other particular use for it
3222 */
3223 struct util_vma_heap *vma_heap;
3224 };
3225
3226 #define ANV_SPARSE_BLOCK_SIZE (64 * 1024)
3227
3228 static inline bool
anv_sparse_binding_is_enabled(struct anv_device * device)3229 anv_sparse_binding_is_enabled(struct anv_device *device)
3230 {
3231 return device->vk.enabled_features.sparseBinding;
3232 }
3233
3234 static inline bool
anv_sparse_residency_is_enabled(struct anv_device * device)3235 anv_sparse_residency_is_enabled(struct anv_device *device)
3236 {
3237 return device->vk.enabled_features.sparseResidencyBuffer ||
3238 device->vk.enabled_features.sparseResidencyImage2D ||
3239 device->vk.enabled_features.sparseResidencyImage3D ||
3240 device->vk.enabled_features.sparseResidency2Samples ||
3241 device->vk.enabled_features.sparseResidency4Samples ||
3242 device->vk.enabled_features.sparseResidency8Samples ||
3243 device->vk.enabled_features.sparseResidency16Samples ||
3244 device->vk.enabled_features.sparseResidencyAliased;
3245 }
3246
3247 VkResult anv_init_sparse_bindings(struct anv_device *device,
3248 uint64_t size,
3249 struct anv_sparse_binding_data *sparse,
3250 enum anv_bo_alloc_flags alloc_flags,
3251 uint64_t client_address,
3252 struct anv_address *out_address);
3253 void anv_free_sparse_bindings(struct anv_device *device,
3254 struct anv_sparse_binding_data *sparse);
3255 VkResult anv_sparse_bind_buffer(struct anv_device *device,
3256 struct anv_buffer *buffer,
3257 const VkSparseMemoryBind *vk_bind,
3258 struct anv_sparse_submission *submit);
3259 VkResult anv_sparse_bind_image_opaque(struct anv_device *device,
3260 struct anv_image *image,
3261 const VkSparseMemoryBind *vk_bind,
3262 struct anv_sparse_submission *submit);
3263 VkResult anv_sparse_bind_image_memory(struct anv_queue *queue,
3264 struct anv_image *image,
3265 const VkSparseImageMemoryBind *bind,
3266 struct anv_sparse_submission *submit);
3267 VkResult anv_sparse_bind(struct anv_device *device,
3268 struct anv_sparse_submission *sparse_submit);
3269
3270 VkResult anv_sparse_trtt_garbage_collect_batches(struct anv_device *device,
3271 bool wait_completion);
3272
3273 VkSparseImageFormatProperties
3274 anv_sparse_calc_image_format_properties(struct anv_physical_device *pdevice,
3275 VkImageAspectFlags aspect,
3276 VkImageType vk_image_type,
3277 VkSampleCountFlagBits vk_samples,
3278 struct isl_surf *surf);
3279 void anv_sparse_calc_miptail_properties(struct anv_device *device,
3280 struct anv_image *image,
3281 VkImageAspectFlags vk_aspect,
3282 uint32_t *imageMipTailFirstLod,
3283 VkDeviceSize *imageMipTailSize,
3284 VkDeviceSize *imageMipTailOffset,
3285 VkDeviceSize *imageMipTailStride);
3286 VkResult anv_sparse_image_check_support(struct anv_physical_device *pdevice,
3287 VkImageCreateFlags flags,
3288 VkImageTiling tiling,
3289 VkSampleCountFlagBits samples,
3290 VkImageType type,
3291 VkFormat format);
3292
3293 struct anv_buffer {
3294 struct vk_buffer vk;
3295
3296 /* Set when bound */
3297 struct anv_address address;
3298
3299 struct anv_sparse_binding_data sparse_data;
3300 };
3301
3302 static inline bool
anv_buffer_is_protected(const struct anv_buffer * buffer)3303 anv_buffer_is_protected(const struct anv_buffer *buffer)
3304 {
3305 return buffer->vk.create_flags & VK_BUFFER_CREATE_PROTECTED_BIT;
3306 }
3307
3308 static inline bool
anv_buffer_is_sparse(const struct anv_buffer * buffer)3309 anv_buffer_is_sparse(const struct anv_buffer *buffer)
3310 {
3311 return buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
3312 }
3313
3314 enum anv_cmd_dirty_bits {
3315 ANV_CMD_DIRTY_PIPELINE = 1 << 0,
3316 ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 1,
3317 ANV_CMD_DIRTY_RENDER_AREA = 1 << 2,
3318 ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 3,
3319 ANV_CMD_DIRTY_XFB_ENABLE = 1 << 4,
3320 ANV_CMD_DIRTY_RESTART_INDEX = 1 << 5,
3321 ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE = 1 << 6,
3322 ANV_CMD_DIRTY_FS_MSAA_FLAGS = 1 << 7,
3323 ANV_CMD_DIRTY_COARSE_PIXEL_ACTIVE = 1 << 8,
3324 ANV_CMD_DIRTY_INDIRECT_DATA_STRIDE = 1 << 9,
3325 };
3326 typedef enum anv_cmd_dirty_bits anv_cmd_dirty_mask_t;
3327
3328 enum anv_pipe_bits {
3329 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT = (1 << 0),
3330 ANV_PIPE_STALL_AT_SCOREBOARD_BIT = (1 << 1),
3331 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT = (1 << 2),
3332 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT = (1 << 3),
3333 ANV_PIPE_VF_CACHE_INVALIDATE_BIT = (1 << 4),
3334 ANV_PIPE_DATA_CACHE_FLUSH_BIT = (1 << 5),
3335 ANV_PIPE_TILE_CACHE_FLUSH_BIT = (1 << 6),
3336 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT = (1 << 10),
3337 ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
3338 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT = (1 << 12),
3339 ANV_PIPE_DEPTH_STALL_BIT = (1 << 13),
3340
3341 /* ANV_PIPE_HDC_PIPELINE_FLUSH_BIT is a precise way to ensure prior data
3342 * cache work has completed. Available on Gfx12+. For earlier Gfx we
3343 * must reinterpret this flush as ANV_PIPE_DATA_CACHE_FLUSH_BIT.
3344 */
3345 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT = (1 << 14),
3346 ANV_PIPE_PSS_STALL_SYNC_BIT = (1 << 15),
3347
3348 /*
3349 * This bit flush data-port's Untyped L1 data cache (LSC L1).
3350 */
3351 ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT = (1 << 16),
3352
3353 /* This bit controls the flushing of the engine (Render, Compute) specific
3354 * entries from the compression cache.
3355 */
3356 ANV_PIPE_CCS_CACHE_FLUSH_BIT = (1 << 17),
3357
3358 ANV_PIPE_TLB_INVALIDATE_BIT = (1 << 18),
3359
3360 ANV_PIPE_CS_STALL_BIT = (1 << 20),
3361 ANV_PIPE_END_OF_PIPE_SYNC_BIT = (1 << 21),
3362
3363 /* This bit does not exist directly in PIPE_CONTROL. Instead it means that
3364 * a flush has happened but not a CS stall. The next time we do any sort
3365 * of invalidation we need to insert a CS stall at that time. Otherwise,
3366 * we would have to CS stall on every flush which could be bad.
3367 */
3368 ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT = (1 << 22),
3369
3370 /* This bit does not exist directly in PIPE_CONTROL. It means that Gfx12
3371 * AUX-TT data has changed and we need to invalidate AUX-TT data. This is
3372 * done by writing the AUX-TT register.
3373 */
3374 ANV_PIPE_AUX_TABLE_INVALIDATE_BIT = (1 << 23),
3375
3376 /* This bit does not exist directly in PIPE_CONTROL. It means that a
3377 * PIPE_CONTROL with a post-sync operation will follow. This is used to
3378 * implement a workaround for Gfx9.
3379 */
3380 ANV_PIPE_POST_SYNC_BIT = (1 << 24),
3381 };
3382
3383 /* These bits track the state of buffer writes for queries. They get cleared
3384 * based on PIPE_CONTROL emissions.
3385 */
3386 enum anv_query_bits {
3387 ANV_QUERY_WRITES_RT_FLUSH = (1 << 0),
3388
3389 ANV_QUERY_WRITES_TILE_FLUSH = (1 << 1),
3390
3391 ANV_QUERY_WRITES_CS_STALL = (1 << 2),
3392
3393 ANV_QUERY_WRITES_DATA_FLUSH = (1 << 3),
3394 };
3395
3396 /* It's not clear why DG2 doesn't have issues with L3/CS coherency. But it's
3397 * likely related to performance workaround 14015868140.
3398 *
3399 * For now we enable this only on DG2 and platform prior to Gfx12 where there
3400 * is no tile cache.
3401 */
3402 #define ANV_DEVINFO_HAS_COHERENT_L3_CS(devinfo) \
3403 (intel_device_info_is_dg2(devinfo))
3404
3405 /* Things we need to flush before accessing query data using the command
3406 * streamer.
3407 *
3408 * Prior to DG2 experiments show that the command streamer is not coherent
3409 * with the tile cache so we need to flush it to make any data visible to CS.
3410 *
3411 * Otherwise we want to flush the RT cache which is where blorp writes, either
3412 * for clearing the query buffer or for clearing the destination buffer in
3413 * vkCopyQueryPoolResults().
3414 */
3415 #define ANV_QUERY_RENDER_TARGET_WRITES_PENDING_BITS(devinfo) \
3416 (((!ANV_DEVINFO_HAS_COHERENT_L3_CS(devinfo) && \
3417 (devinfo)->ver >= 12) ? \
3418 ANV_QUERY_WRITES_TILE_FLUSH : 0) | \
3419 ANV_QUERY_WRITES_RT_FLUSH | \
3420 ANV_QUERY_WRITES_CS_STALL)
3421 #define ANV_QUERY_COMPUTE_WRITES_PENDING_BITS \
3422 (ANV_QUERY_WRITES_DATA_FLUSH | \
3423 ANV_QUERY_WRITES_CS_STALL)
3424
3425 #define ANV_PIPE_QUERY_BITS(pending_query_bits) ( \
3426 ((pending_query_bits & ANV_QUERY_WRITES_RT_FLUSH) ? \
3427 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT : 0) | \
3428 ((pending_query_bits & ANV_QUERY_WRITES_TILE_FLUSH) ? \
3429 ANV_PIPE_TILE_CACHE_FLUSH_BIT : 0) | \
3430 ((pending_query_bits & ANV_QUERY_WRITES_CS_STALL) ? \
3431 ANV_PIPE_CS_STALL_BIT : 0) | \
3432 ((pending_query_bits & ANV_QUERY_WRITES_DATA_FLUSH) ? \
3433 (ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
3434 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
3435 ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) : 0))
3436
3437 #define ANV_PIPE_FLUSH_BITS ( \
3438 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
3439 ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
3440 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
3441 ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT | \
3442 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
3443 ANV_PIPE_TILE_CACHE_FLUSH_BIT)
3444
3445 #define ANV_PIPE_STALL_BITS ( \
3446 ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
3447 ANV_PIPE_DEPTH_STALL_BIT | \
3448 ANV_PIPE_CS_STALL_BIT | \
3449 ANV_PIPE_PSS_STALL_SYNC_BIT)
3450
3451 #define ANV_PIPE_INVALIDATE_BITS ( \
3452 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
3453 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
3454 ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
3455 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
3456 ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
3457 ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
3458
3459 /* PIPE_CONTROL bits that should be set only in 3D RCS mode.
3460 * For more details see genX(emit_apply_pipe_flushes).
3461 */
3462 #define ANV_PIPE_GFX_BITS ( \
3463 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
3464 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
3465 ANV_PIPE_TILE_CACHE_FLUSH_BIT | \
3466 ANV_PIPE_DEPTH_STALL_BIT | \
3467 ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
3468 (GFX_VERx10 >= 125 ? ANV_PIPE_PSS_STALL_SYNC_BIT : 0) | \
3469 ANV_PIPE_VF_CACHE_INVALIDATE_BIT)
3470
3471 /* PIPE_CONTROL bits that should be set only in Media/GPGPU RCS mode.
3472 * For more details see genX(emit_apply_pipe_flushes).
3473 *
3474 * Documentation says that untyped L1 dataport cache flush is controlled by
3475 * HDC pipeline flush in 3D mode according to HDC_CHICKEN0 register:
3476 *
3477 * BSpec 47112: PIPE_CONTROL::HDC Pipeline Flush:
3478 *
3479 * "When the "Pipeline Select" mode in PIPELINE_SELECT command is set to
3480 * "3D", HDC Pipeline Flush can also flush/invalidate the LSC Untyped L1
3481 * cache based on the programming of HDC_Chicken0 register bits 13:11."
3482 *
3483 * "When the 'Pipeline Select' mode is set to 'GPGPU', the LSC Untyped L1
3484 * cache flush is controlled by 'Untyped Data-Port Cache Flush' bit in the
3485 * PIPE_CONTROL command."
3486 *
3487 * As part of Wa_22010960976 & Wa_14013347512, i915 is programming
3488 * HDC_CHICKEN0[11:13] = 0 ("Untyped L1 is flushed, for both 3D Pipecontrol
3489 * Dataport flush, and UAV coherency barrier event"). So there is no need
3490 * to set "Untyped Data-Port Cache" in 3D mode.
3491 *
3492 * On MTL the HDC_CHICKEN0 default values changed to match what was programmed
3493 * by Wa_22010960976 & Wa_14013347512 on DG2, but experiments show that the
3494 * change runs a bit deeper. Even manually writing to the HDC_CHICKEN0
3495 * register to force L1 untyped flush with HDC pipeline flush has no effect on
3496 * MTL.
3497 *
3498 * It seems like the HW change completely disconnected L1 untyped flush from
3499 * HDC pipeline flush with no way to bring that behavior back. So leave the L1
3500 * untyped flush active in 3D mode on all platforms since it doesn't seems to
3501 * cause issues there too.
3502 *
3503 * Maybe we'll have some GPGPU only bits here at some point.
3504 */
3505 #define ANV_PIPE_GPGPU_BITS (0)
3506
3507 enum intel_ds_stall_flag
3508 anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits);
3509
3510 #define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \
3511 VK_IMAGE_ASPECT_PLANE_0_BIT | \
3512 VK_IMAGE_ASPECT_PLANE_1_BIT | \
3513 VK_IMAGE_ASPECT_PLANE_2_BIT)
3514
3515 #define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV ( \
3516 VK_IMAGE_ASPECT_COLOR_BIT | \
3517 VK_IMAGE_ASPECT_PLANES_BITS_ANV)
3518
3519 struct anv_vertex_binding {
3520 struct anv_buffer * buffer;
3521 VkDeviceSize offset;
3522 VkDeviceSize size;
3523 };
3524
3525 struct anv_xfb_binding {
3526 struct anv_buffer * buffer;
3527 VkDeviceSize offset;
3528 VkDeviceSize size;
3529 };
3530
3531 struct anv_push_constants {
3532 /** Push constant data provided by the client through vkPushConstants */
3533 uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
3534
3535 #define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint32_t)ANV_UBO_ALIGNMENT - 1)
3536 #define ANV_DESCRIPTOR_SET_OFFSET_MASK (~(uint32_t)(ANV_UBO_ALIGNMENT - 1))
3537
3538 /**
3539 * Base offsets for descriptor sets from
3540 *
3541 * The offset has different meaning depending on a number of factors :
3542 *
3543 * - with descriptor sets (direct or indirect), this relative
3544 * pdevice->va.descriptor_pool
3545 *
3546 * - with descriptor buffers on DG2+, relative
3547 * device->va.descriptor_buffer_pool
3548 *
3549 * - with descriptor buffers prior to DG2, relative the programmed value
3550 * in STATE_BASE_ADDRESS::BindlessSurfaceStateBaseAddress
3551 */
3552 uint32_t desc_surface_offsets[MAX_SETS];
3553
3554 /**
3555 * Base offsets for descriptor sets from
3556 */
3557 uint32_t desc_sampler_offsets[MAX_SETS];
3558
3559 /** Dynamic offsets for dynamic UBOs and SSBOs */
3560 uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
3561
3562 /** Surface buffer base offset
3563 *
3564 * Only used prior to DG2 with descriptor buffers.
3565 *
3566 * (surfaces_base_offset + desc_offsets[set_index]) is relative to
3567 * device->va.descriptor_buffer_pool and can be used to compute a 64bit
3568 * address to the descriptor buffer (using load_desc_set_address_intel).
3569 */
3570 uint32_t surfaces_base_offset;
3571
3572 /* Robust access pushed registers. */
3573 uint64_t push_reg_mask[MESA_SHADER_STAGES];
3574
3575 /** Ray query globals (RT_DISPATCH_GLOBALS) */
3576 uint64_t ray_query_globals;
3577
3578 union {
3579 struct {
3580 /** Dynamic MSAA value */
3581 uint32_t fs_msaa_flags;
3582
3583 /** Dynamic TCS input vertices */
3584 uint32_t tcs_input_vertices;
3585 } gfx;
3586
3587 struct {
3588 /** Base workgroup ID
3589 *
3590 * Used for vkCmdDispatchBase.
3591 */
3592 uint32_t base_work_group_id[3];
3593
3594 /** Subgroup ID
3595 *
3596 * This is never set by software but is implicitly filled out when
3597 * uploading the push constants for compute shaders.
3598 *
3599 * This *MUST* be the last field of the anv_push_constants structure.
3600 */
3601 uint32_t subgroup_id;
3602 } cs;
3603 };
3604 };
3605
3606 struct anv_surface_state {
3607 /** Surface state allocated from the bindless heap
3608 *
3609 * Can be NULL if unused.
3610 */
3611 struct anv_state state;
3612
3613 /** Surface state after genxml packing
3614 *
3615 * Same data as in state.
3616 */
3617 struct anv_surface_state_data state_data;
3618
3619 /** Address of the surface referred to by this state
3620 *
3621 * This address is relative to the start of the BO.
3622 */
3623 struct anv_address address;
3624 /* Address of the aux surface, if any
3625 *
3626 * This field is ANV_NULL_ADDRESS if and only if no aux surface exists.
3627 *
3628 * With the exception of gfx8, the bottom 12 bits of this address' offset
3629 * include extra aux information.
3630 */
3631 struct anv_address aux_address;
3632 /* Address of the clear color, if any
3633 *
3634 * This address is relative to the start of the BO.
3635 */
3636 struct anv_address clear_address;
3637 };
3638
3639 struct anv_attachment {
3640 VkFormat vk_format;
3641 const struct anv_image_view *iview;
3642 VkImageLayout layout;
3643 enum isl_aux_usage aux_usage;
3644 struct anv_surface_state surface_state;
3645
3646 VkResolveModeFlagBits resolve_mode;
3647 const struct anv_image_view *resolve_iview;
3648 VkImageLayout resolve_layout;
3649 };
3650
3651 /** State tracking for vertex buffer flushes
3652 *
3653 * On Gfx8-9, the VF cache only considers the bottom 32 bits of memory
3654 * addresses. If you happen to have two vertex buffers which get placed
3655 * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
3656 * collisions. In order to solve this problem, we track vertex address ranges
3657 * which are live in the cache and invalidate the cache if one ever exceeds 32
3658 * bits.
3659 */
3660 struct anv_vb_cache_range {
3661 /* Virtual address at which the live vertex buffer cache range starts for
3662 * this vertex buffer index.
3663 */
3664 uint64_t start;
3665
3666 /* Virtual address of the byte after where vertex buffer cache range ends.
3667 * This is exclusive such that end - start is the size of the range.
3668 */
3669 uint64_t end;
3670 };
3671
3672 static inline void
anv_merge_vb_cache_range(struct anv_vb_cache_range * dirty,const struct anv_vb_cache_range * bound)3673 anv_merge_vb_cache_range(struct anv_vb_cache_range *dirty,
3674 const struct anv_vb_cache_range *bound)
3675 {
3676 if (dirty->start == dirty->end) {
3677 *dirty = *bound;
3678 } else if (bound->start != bound->end) {
3679 dirty->start = MIN2(dirty->start, bound->start);
3680 dirty->end = MAX2(dirty->end, bound->end);
3681 }
3682 }
3683
3684 /* Check whether we need to apply the Gfx8-9 vertex buffer workaround*/
3685 static inline bool
anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range * bound,struct anv_vb_cache_range * dirty,struct anv_address vb_address,uint32_t vb_size)3686 anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range *bound,
3687 struct anv_vb_cache_range *dirty,
3688 struct anv_address vb_address,
3689 uint32_t vb_size)
3690 {
3691 if (vb_size == 0) {
3692 bound->start = 0;
3693 bound->end = 0;
3694 return false;
3695 }
3696
3697 bound->start = intel_48b_address(anv_address_physical(vb_address));
3698 bound->end = bound->start + vb_size;
3699 assert(bound->end > bound->start); /* No overflow */
3700
3701 /* Align everything to a cache line */
3702 bound->start &= ~(64ull - 1ull);
3703 bound->end = align64(bound->end, 64);
3704
3705 anv_merge_vb_cache_range(dirty, bound);
3706
3707 /* If our range is larger than 32 bits, we have to flush */
3708 assert(bound->end - bound->start <= (1ull << 32));
3709 return (dirty->end - dirty->start) > (1ull << 32);
3710 }
3711
3712 /**
3713 * State tracking for simple internal shaders
3714 */
3715 struct anv_simple_shader {
3716 /* The device associated with this emission */
3717 struct anv_device *device;
3718 /* The command buffer associated with this emission (can be NULL) */
3719 struct anv_cmd_buffer *cmd_buffer;
3720 /* State stream used for various internal allocations */
3721 struct anv_state_stream *dynamic_state_stream;
3722 struct anv_state_stream *general_state_stream;
3723 /* Where to emit the commands (can be different from cmd_buffer->batch) */
3724 struct anv_batch *batch;
3725 /* Shader to use */
3726 struct anv_shader_bin *kernel;
3727 /* L3 config used by the shader */
3728 const struct intel_l3_config *l3_config;
3729 /* Current URB config */
3730 const struct intel_urb_config *urb_cfg;
3731
3732 /* Managed by the simpler shader helper*/
3733 struct anv_state bt_state;
3734 };
3735
3736 /** State tracking for particular pipeline bind point
3737 *
3738 * This struct is the base struct for anv_cmd_graphics_state and
3739 * anv_cmd_compute_state. These are used to track state which is bound to a
3740 * particular type of pipeline. Generic state that applies per-stage such as
3741 * binding table offsets and push constants is tracked generically with a
3742 * per-stage array in anv_cmd_state.
3743 */
3744 struct anv_cmd_pipeline_state {
3745 struct anv_descriptor_set *descriptors[MAX_SETS];
3746 struct {
3747 bool bound;
3748 /**
3749 * Buffer index used by this descriptor set.
3750 */
3751 int32_t buffer_index; /* -1 means push descriptor */
3752 /**
3753 * Offset of the descriptor set in the descriptor buffer.
3754 */
3755 uint32_t buffer_offset;
3756 /**
3757 * Final computed address to be emitted in the descriptor set surface
3758 * state.
3759 */
3760 uint64_t address;
3761 /**
3762 * The descriptor set surface state.
3763 */
3764 struct anv_state state;
3765 } descriptor_buffers[MAX_SETS];
3766 struct anv_push_descriptor_set push_descriptor;
3767
3768 struct anv_push_constants push_constants;
3769
3770 /** Tracks whether the push constant data has changed and need to be reemitted */
3771 bool push_constants_data_dirty;
3772
3773 /* Push constant state allocated when flushing push constants. */
3774 struct anv_state push_constants_state;
3775
3776 /**
3777 * Dynamic buffer offsets.
3778 *
3779 * We have a maximum of MAX_DYNAMIC_BUFFERS per pipeline, but with
3780 * independent sets we cannot know which how much in total is going to be
3781 * used. As a result we need to store the maximum possible number per set.
3782 *
3783 * Those values are written into anv_push_constants::dynamic_offsets at
3784 * flush time when have the pipeline with the final
3785 * anv_pipeline_sets_layout.
3786 */
3787 struct {
3788 uint32_t offsets[MAX_DYNAMIC_BUFFERS];
3789 } dynamic_offsets[MAX_SETS];
3790
3791 /**
3792 * The current bound pipeline.
3793 */
3794 struct anv_pipeline *pipeline;
3795 };
3796
3797 enum anv_coarse_pixel_state {
3798 ANV_COARSE_PIXEL_STATE_UNKNOWN,
3799 ANV_COARSE_PIXEL_STATE_DISABLED,
3800 ANV_COARSE_PIXEL_STATE_ENABLED,
3801 };
3802
3803 /** State tracking for graphics pipeline
3804 *
3805 * This has anv_cmd_pipeline_state as a base struct to track things which get
3806 * bound to a graphics pipeline. Along with general pipeline bind point state
3807 * which is in the anv_cmd_pipeline_state base struct, it also contains other
3808 * state which is graphics-specific.
3809 */
3810 struct anv_cmd_graphics_state {
3811 struct anv_cmd_pipeline_state base;
3812
3813 VkRenderingFlags rendering_flags;
3814 VkRect2D render_area;
3815 uint32_t layer_count;
3816 uint32_t samples;
3817 uint32_t view_mask;
3818 uint32_t color_att_count;
3819 struct anv_state att_states;
3820 struct anv_attachment color_att[MAX_RTS];
3821 struct anv_attachment depth_att;
3822 struct anv_attachment stencil_att;
3823 struct anv_state null_surface_state;
3824
3825 anv_cmd_dirty_mask_t dirty;
3826 uint32_t vb_dirty;
3827
3828 struct anv_vb_cache_range ib_bound_range;
3829 struct anv_vb_cache_range ib_dirty_range;
3830 struct anv_vb_cache_range vb_bound_ranges[33];
3831 struct anv_vb_cache_range vb_dirty_ranges[33];
3832
3833 uint32_t restart_index;
3834
3835 VkShaderStageFlags push_constant_stages;
3836
3837 uint32_t primitive_topology;
3838 bool used_task_shader;
3839
3840 struct anv_buffer *index_buffer;
3841 uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */
3842 uint32_t index_offset;
3843 uint32_t index_size;
3844
3845 uint32_t indirect_data_stride;
3846 bool indirect_data_stride_aligned;
3847
3848 struct vk_vertex_input_state vertex_input;
3849 struct vk_sample_locations_state sample_locations;
3850
3851 /* Dynamic msaa flags, this value can be different from
3852 * anv_push_constants::gfx::fs_msaa_flags, as the push constant value only
3853 * needs to be updated for fragment shaders dynamically checking the value.
3854 */
3855 enum intel_msaa_flags fs_msaa_flags;
3856
3857 bool object_preemption;
3858 bool has_uint_rt;
3859
3860 /* State tracking for Wa_14018912822. */
3861 bool color_blend_zero;
3862 bool alpha_blend_zero;
3863
3864 /**
3865 * State tracking for Wa_18020335297.
3866 */
3867 bool viewport_set;
3868
3869 /**
3870 * State tracking for Wa_18038825448.
3871 */
3872 enum anv_coarse_pixel_state coarse_pixel_active;
3873
3874 struct intel_urb_config urb_cfg;
3875
3876 uint32_t n_occlusion_queries;
3877
3878 struct anv_gfx_dynamic_state dyn_state;
3879 };
3880
3881 enum anv_depth_reg_mode {
3882 ANV_DEPTH_REG_MODE_UNKNOWN = 0,
3883 ANV_DEPTH_REG_MODE_HW_DEFAULT,
3884 ANV_DEPTH_REG_MODE_D16_1X_MSAA,
3885 };
3886
3887 /** State tracking for compute pipeline
3888 *
3889 * This has anv_cmd_pipeline_state as a base struct to track things which get
3890 * bound to a compute pipeline. Along with general pipeline bind point state
3891 * which is in the anv_cmd_pipeline_state base struct, it also contains other
3892 * state which is compute-specific.
3893 */
3894 struct anv_cmd_compute_state {
3895 struct anv_cmd_pipeline_state base;
3896
3897 bool pipeline_dirty;
3898
3899 struct anv_address num_workgroups;
3900
3901 uint32_t scratch_size;
3902 };
3903
3904 struct anv_cmd_ray_tracing_state {
3905 struct anv_cmd_pipeline_state base;
3906
3907 bool pipeline_dirty;
3908
3909 struct {
3910 struct anv_bo *bo;
3911 struct brw_rt_scratch_layout layout;
3912 } scratch;
3913
3914 struct anv_address build_priv_mem_addr;
3915 size_t build_priv_mem_size;
3916 };
3917
3918 enum anv_cmd_descriptor_buffer_mode {
3919 ANV_CMD_DESCRIPTOR_BUFFER_MODE_UNKNOWN,
3920 ANV_CMD_DESCRIPTOR_BUFFER_MODE_LEGACY,
3921 ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER,
3922 };
3923
3924 /** State required while building cmd buffer */
3925 struct anv_cmd_state {
3926 /* PIPELINE_SELECT.PipelineSelection */
3927 uint32_t current_pipeline;
3928 const struct intel_l3_config * current_l3_config;
3929 uint32_t last_aux_map_state;
3930
3931 struct anv_cmd_graphics_state gfx;
3932 struct anv_cmd_compute_state compute;
3933 struct anv_cmd_ray_tracing_state rt;
3934
3935 enum anv_pipe_bits pending_pipe_bits;
3936
3937 /**
3938 * Whether the last programmed STATE_BASE_ADDRESS references
3939 * anv_device::dynamic_state_pool or anv_device::dynamic_state_pool_db for
3940 * the dynamic state heap.
3941 */
3942 enum anv_cmd_descriptor_buffer_mode current_db_mode;
3943
3944 /**
3945 * Whether the command buffer has pending descriptor buffers bound it. This
3946 * variable changes before anv_device::current_db_mode.
3947 */
3948 enum anv_cmd_descriptor_buffer_mode pending_db_mode;
3949
3950 struct {
3951 /**
3952 * Tracks operations susceptible to interfere with queries in the
3953 * destination buffer of vkCmdCopyQueryResults, we need those operations to
3954 * have completed before we do the work of vkCmdCopyQueryResults.
3955 */
3956 enum anv_query_bits buffer_write_bits;
3957
3958 /**
3959 * Tracks clear operations of query buffers that can interact with
3960 * vkCmdQueryBegin*, vkCmdWriteTimestamp*,
3961 * vkCmdWriteAccelerationStructuresPropertiesKHR, etc...
3962 *
3963 * We need the clearing of the buffer completed before with write data with
3964 * the command streamer or a shader.
3965 */
3966 enum anv_query_bits clear_bits;
3967 } queries;
3968
3969 VkShaderStageFlags descriptors_dirty;
3970 VkShaderStageFlags push_descriptors_dirty;
3971 /** Tracks the 3DSTATE_CONSTANT_* instruction that needs to be reemitted */
3972 VkShaderStageFlags push_constants_dirty;
3973
3974 struct {
3975 uint64_t surfaces_address;
3976 uint64_t samplers_address;
3977 bool dirty;
3978 VkShaderStageFlags offsets_dirty;
3979 uint64_t address[MAX_SETS];
3980 } descriptor_buffers;
3981
3982 struct anv_vertex_binding vertex_bindings[MAX_VBS];
3983 bool xfb_enabled;
3984 struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS];
3985 struct anv_state binding_tables[MESA_VULKAN_SHADER_STAGES];
3986 struct anv_state samplers[MESA_VULKAN_SHADER_STAGES];
3987
3988 unsigned char sampler_sha1s[MESA_VULKAN_SHADER_STAGES][20];
3989 unsigned char surface_sha1s[MESA_VULKAN_SHADER_STAGES][20];
3990 unsigned char push_sha1s[MESA_VULKAN_SHADER_STAGES][20];
3991
3992 /**
3993 * Whether or not the gfx8 PMA fix is enabled. We ensure that, at the top
3994 * of any command buffer it is disabled by disabling it in EndCommandBuffer
3995 * and before invoking the secondary in ExecuteCommands.
3996 */
3997 bool pma_fix_enabled;
3998
3999 /**
4000 * Whether or not we know for certain that HiZ is enabled for the current
4001 * subpass. If, for whatever reason, we are unsure as to whether HiZ is
4002 * enabled or not, this will be false.
4003 */
4004 bool hiz_enabled;
4005
4006 /* We ensure the registers for the gfx12 D16 fix are initialized at the
4007 * first non-NULL depth stencil packet emission of every command buffer.
4008 * For secondary command buffer execution, we transfer the state from the
4009 * last command buffer to the primary (if known).
4010 */
4011 enum anv_depth_reg_mode depth_reg_mode;
4012
4013 /* The last auxiliary surface operation (or equivalent operation) provided
4014 * to genX(cmd_buffer_update_color_aux_op).
4015 */
4016 enum isl_aux_op color_aux_op;
4017
4018 /**
4019 * Whether RHWO optimization is enabled (Wa_1508744258).
4020 */
4021 bool rhwo_optimization_enabled;
4022
4023 /**
4024 * Pending state of the RHWO optimization, to be applied at the next
4025 * genX(cmd_buffer_apply_pipe_flushes).
4026 */
4027 bool pending_rhwo_optimization_enabled;
4028
4029 bool conditional_render_enabled;
4030
4031 /**
4032 * Last rendering scale argument provided to
4033 * genX(cmd_buffer_emit_hashing_mode)().
4034 */
4035 unsigned current_hash_scale;
4036
4037 /**
4038 * A buffer used for spill/fill of ray queries.
4039 */
4040 struct anv_bo * ray_query_shadow_bo;
4041
4042 /** Pointer to the last emitted COMPUTE_WALKER.
4043 *
4044 * This is used to edit the instruction post emission to replace the "Post
4045 * Sync" field for utrace timestamp emission.
4046 */
4047 void *last_compute_walker;
4048
4049 /** Pointer to the last emitted EXECUTE_INDIRECT_DISPATCH.
4050 *
4051 * This is used to edit the instruction post emission to replace the "Post
4052 * Sync" field for utrace timestamp emission.
4053 */
4054 void *last_indirect_dispatch;
4055 };
4056
4057 #define ANV_MIN_CMD_BUFFER_BATCH_SIZE 8192
4058 #define ANV_MAX_CMD_BUFFER_BATCH_SIZE (16 * 1024 * 1024)
4059
4060 enum anv_cmd_buffer_exec_mode {
4061 ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,
4062 ANV_CMD_BUFFER_EXEC_MODE_EMIT,
4063 ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
4064 ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
4065 ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
4066 ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,
4067 };
4068
4069 struct anv_measure_batch;
4070
4071 struct anv_cmd_buffer {
4072 struct vk_command_buffer vk;
4073
4074 struct anv_device * device;
4075 struct anv_queue_family * queue_family;
4076
4077 /** Batch where the main commands live */
4078 struct anv_batch batch;
4079
4080 /* Pointer to the location in the batch where MI_BATCH_BUFFER_END was
4081 * recorded upon calling vkEndCommandBuffer(). This is useful if we need to
4082 * rewrite the end to chain multiple batch together at vkQueueSubmit().
4083 */
4084 void * batch_end;
4085
4086 /* Fields required for the actual chain of anv_batch_bo's.
4087 *
4088 * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain().
4089 */
4090 struct list_head batch_bos;
4091 enum anv_cmd_buffer_exec_mode exec_mode;
4092
4093 /* A vector of anv_batch_bo pointers for every batch or surface buffer
4094 * referenced by this command buffer
4095 *
4096 * initialized by anv_cmd_buffer_init_batch_bo_chain()
4097 */
4098 struct u_vector seen_bbos;
4099
4100 /* A vector of int32_t's for every block of binding tables.
4101 *
4102 * initialized by anv_cmd_buffer_init_batch_bo_chain()
4103 */
4104 struct u_vector bt_block_states;
4105 struct anv_state bt_next;
4106
4107 struct anv_reloc_list surface_relocs;
4108
4109 /* Serial for tracking buffer completion */
4110 uint32_t serial;
4111
4112 /* Stream objects for storing temporary data */
4113 struct anv_state_stream surface_state_stream;
4114 struct anv_state_stream dynamic_state_stream;
4115 struct anv_state_stream general_state_stream;
4116 struct anv_state_stream indirect_push_descriptor_stream;
4117 struct anv_state_stream push_descriptor_buffer_stream;
4118
4119 VkCommandBufferUsageFlags usage_flags;
4120
4121 struct anv_query_pool *perf_query_pool;
4122
4123 struct anv_cmd_state state;
4124
4125 struct anv_address return_addr;
4126
4127 /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
4128 uint64_t intel_perf_marker;
4129
4130 struct anv_measure_batch *measure;
4131
4132 /**
4133 * KHR_performance_query requires self modifying command buffers and this
4134 * array has the location of modifying commands to the query begin and end
4135 * instructions storing performance counters. The array length is
4136 * anv_physical_device::n_perf_query_commands.
4137 */
4138 struct mi_address_token *self_mod_locations;
4139
4140 /**
4141 * Index tracking which of the self_mod_locations items have already been
4142 * used.
4143 */
4144 uint32_t perf_reloc_idx;
4145
4146 /**
4147 * Sum of all the anv_batch_bo written sizes for this command buffer
4148 * including any executed secondary command buffer.
4149 */
4150 uint32_t total_batch_size;
4151
4152 struct {
4153 /** Batch generating part of the anv_cmd_buffer::batch */
4154 struct anv_batch batch;
4155
4156 /**
4157 * Location in anv_cmd_buffer::batch at which we left some space to
4158 * insert a MI_BATCH_BUFFER_START into the
4159 * anv_cmd_buffer::generation::batch if needed.
4160 */
4161 struct anv_address jump_addr;
4162
4163 /**
4164 * Location in anv_cmd_buffer::batch at which the generation batch
4165 * should jump back to.
4166 */
4167 struct anv_address return_addr;
4168
4169 /** List of anv_batch_bo used for generation
4170 *
4171 * We have to keep this separated of the anv_cmd_buffer::batch_bos that
4172 * is used for a chaining optimization.
4173 */
4174 struct list_head batch_bos;
4175
4176 /** Ring buffer of generated commands
4177 *
4178 * When generating draws in ring mode, this buffer will hold generated
4179 * 3DPRIMITIVE commands.
4180 */
4181 struct anv_bo *ring_bo;
4182
4183 /**
4184 * State tracking of the generation shader (only used for the non-ring
4185 * mode).
4186 */
4187 struct anv_simple_shader shader_state;
4188 } generation;
4189
4190 /**
4191 * A vector of anv_bo pointers for chunks of memory used by the command
4192 * buffer that are too large to be allocated through dynamic_state_stream.
4193 * This is the case for large enough acceleration structures.
4194 *
4195 * initialized by anv_cmd_buffer_init_batch_bo_chain()
4196 */
4197 struct u_vector dynamic_bos;
4198
4199 /**
4200 * Structure holding tracepoints recorded in the command buffer.
4201 */
4202 struct u_trace trace;
4203
4204 struct {
4205 struct anv_video_session *vid;
4206 struct anv_video_session_params *params;
4207 } video;
4208
4209 /**
4210 * Companion RCS command buffer to support the MSAA operations on compute
4211 * queue.
4212 */
4213 struct anv_cmd_buffer *companion_rcs_cmd_buffer;
4214
4215 /**
4216 * Whether this command buffer is a companion command buffer of compute one.
4217 */
4218 bool is_companion_rcs_cmd_buffer;
4219
4220 };
4221
4222 extern const struct vk_command_buffer_ops anv_cmd_buffer_ops;
4223
4224 /* Determine whether we can chain a given cmd_buffer to another one. We need
4225 * to make sure that we can edit the end of the batch to point to next one,
4226 * which requires the command buffer to not be used simultaneously.
4227 *
4228 * We could in theory also implement chaining with companion command buffers,
4229 * but let's sparse ourselves some pain and misery. This optimization has no
4230 * benefit on the brand new Xe kernel driver.
4231 */
4232 static inline bool
anv_cmd_buffer_is_chainable(struct anv_cmd_buffer * cmd_buffer)4233 anv_cmd_buffer_is_chainable(struct anv_cmd_buffer *cmd_buffer)
4234 {
4235 return !(cmd_buffer->usage_flags &
4236 VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT) &&
4237 !(cmd_buffer->is_companion_rcs_cmd_buffer);
4238 }
4239
4240 static inline bool
anv_cmd_buffer_is_render_queue(const struct anv_cmd_buffer * cmd_buffer)4241 anv_cmd_buffer_is_render_queue(const struct anv_cmd_buffer *cmd_buffer)
4242 {
4243 struct anv_queue_family *queue_family = cmd_buffer->queue_family;
4244 return (queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0;
4245 }
4246
4247 static inline bool
anv_cmd_buffer_is_video_queue(const struct anv_cmd_buffer * cmd_buffer)4248 anv_cmd_buffer_is_video_queue(const struct anv_cmd_buffer *cmd_buffer)
4249 {
4250 struct anv_queue_family *queue_family = cmd_buffer->queue_family;
4251 return ((queue_family->queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) |
4252 (queue_family->queueFlags & VK_QUEUE_VIDEO_ENCODE_BIT_KHR)) != 0;
4253 }
4254
4255 static inline bool
anv_cmd_buffer_is_compute_queue(const struct anv_cmd_buffer * cmd_buffer)4256 anv_cmd_buffer_is_compute_queue(const struct anv_cmd_buffer *cmd_buffer)
4257 {
4258 struct anv_queue_family *queue_family = cmd_buffer->queue_family;
4259 return queue_family->engine_class == INTEL_ENGINE_CLASS_COMPUTE;
4260 }
4261
4262 static inline bool
anv_cmd_buffer_is_blitter_queue(const struct anv_cmd_buffer * cmd_buffer)4263 anv_cmd_buffer_is_blitter_queue(const struct anv_cmd_buffer *cmd_buffer)
4264 {
4265 struct anv_queue_family *queue_family = cmd_buffer->queue_family;
4266 return queue_family->engine_class == INTEL_ENGINE_CLASS_COPY;
4267 }
4268
4269 static inline bool
anv_cmd_buffer_is_render_or_compute_queue(const struct anv_cmd_buffer * cmd_buffer)4270 anv_cmd_buffer_is_render_or_compute_queue(const struct anv_cmd_buffer *cmd_buffer)
4271 {
4272 return anv_cmd_buffer_is_render_queue(cmd_buffer) ||
4273 anv_cmd_buffer_is_compute_queue(cmd_buffer);
4274 }
4275
4276 static inline struct anv_address
anv_cmd_buffer_dynamic_state_address(struct anv_cmd_buffer * cmd_buffer,struct anv_state state)4277 anv_cmd_buffer_dynamic_state_address(struct anv_cmd_buffer *cmd_buffer,
4278 struct anv_state state)
4279 {
4280 return anv_state_pool_state_address(
4281 &cmd_buffer->device->dynamic_state_pool, state);
4282 }
4283
4284 static inline uint64_t
anv_cmd_buffer_descriptor_buffer_address(struct anv_cmd_buffer * cmd_buffer,int32_t buffer_index)4285 anv_cmd_buffer_descriptor_buffer_address(struct anv_cmd_buffer *cmd_buffer,
4286 int32_t buffer_index)
4287 {
4288 if (buffer_index == -1)
4289 return cmd_buffer->device->physical->va.push_descriptor_buffer_pool.addr;
4290
4291 return cmd_buffer->state.descriptor_buffers.address[buffer_index];
4292 }
4293
4294 VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
4295 void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
4296 void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
4297 void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);
4298 void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
4299 struct anv_cmd_buffer *secondary);
4300 void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
4301 VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,
4302 struct anv_cmd_buffer *cmd_buffer,
4303 const VkSemaphore *in_semaphores,
4304 const uint64_t *in_wait_values,
4305 uint32_t num_in_semaphores,
4306 const VkSemaphore *out_semaphores,
4307 const uint64_t *out_signal_values,
4308 uint32_t num_out_semaphores,
4309 VkFence fence,
4310 int perf_query_pass);
4311
4312 void anv_cmd_buffer_reset(struct vk_command_buffer *vk_cmd_buffer,
4313 UNUSED VkCommandBufferResetFlags flags);
4314
4315 struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
4316 const void *data, uint32_t size, uint32_t alignment);
4317 struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
4318 uint32_t *a, uint32_t *b,
4319 uint32_t dwords, uint32_t alignment);
4320
4321 struct anv_address
4322 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer);
4323 struct anv_state
4324 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
4325 uint32_t entries, uint32_t *state_offset);
4326 struct anv_state
4327 anv_cmd_buffer_alloc_surface_states(struct anv_cmd_buffer *cmd_buffer,
4328 uint32_t count);
4329 struct anv_state
4330 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
4331 uint32_t size, uint32_t alignment);
4332 struct anv_state
4333 anv_cmd_buffer_alloc_general_state(struct anv_cmd_buffer *cmd_buffer,
4334 uint32_t size, uint32_t alignment);
4335 static inline struct anv_state
anv_cmd_buffer_alloc_temporary_state(struct anv_cmd_buffer * cmd_buffer,uint32_t size,uint32_t alignment)4336 anv_cmd_buffer_alloc_temporary_state(struct anv_cmd_buffer *cmd_buffer,
4337 uint32_t size, uint32_t alignment)
4338 {
4339 struct anv_state state =
4340 anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
4341 size, alignment);
4342 if (state.map == NULL)
4343 anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4344 return state;
4345 }
4346 static inline struct anv_address
anv_cmd_buffer_temporary_state_address(struct anv_cmd_buffer * cmd_buffer,struct anv_state state)4347 anv_cmd_buffer_temporary_state_address(struct anv_cmd_buffer *cmd_buffer,
4348 struct anv_state state)
4349 {
4350 return anv_state_pool_state_address(
4351 &cmd_buffer->device->dynamic_state_pool, state);
4352 }
4353
4354 void
4355 anv_cmd_buffer_chain_command_buffers(struct anv_cmd_buffer **cmd_buffers,
4356 uint32_t num_cmd_buffers);
4357 void
4358 anv_cmd_buffer_exec_batch_debug(struct anv_queue *queue,
4359 uint32_t cmd_buffer_count,
4360 struct anv_cmd_buffer **cmd_buffers,
4361 struct anv_query_pool *perf_query_pool,
4362 uint32_t perf_query_pass);
4363 void
4364 anv_cmd_buffer_clflush(struct anv_cmd_buffer **cmd_buffers,
4365 uint32_t num_cmd_buffers);
4366
4367 void
4368 anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer *cmd_buffer,
4369 enum anv_pipe_bits flushed_bits);
4370
4371 /**
4372 * A allocation tied to a command buffer.
4373 *
4374 * Don't use anv_cmd_alloc::address::map to write memory from userspace, use
4375 * anv_cmd_alloc::map instead.
4376 */
4377 struct anv_cmd_alloc {
4378 struct anv_address address;
4379 void *map;
4380 size_t size;
4381 };
4382
4383 #define ANV_EMPTY_ALLOC ((struct anv_cmd_alloc) { .map = NULL, .size = 0 })
4384
4385 static inline bool
anv_cmd_alloc_is_empty(struct anv_cmd_alloc alloc)4386 anv_cmd_alloc_is_empty(struct anv_cmd_alloc alloc)
4387 {
4388 return alloc.size == 0;
4389 }
4390
4391 struct anv_cmd_alloc
4392 anv_cmd_buffer_alloc_space(struct anv_cmd_buffer *cmd_buffer,
4393 size_t size, uint32_t alignment,
4394 bool private);
4395
4396 VkResult
4397 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
4398
4399 void anv_cmd_buffer_emit_bt_pool_base_address(struct anv_cmd_buffer *cmd_buffer);
4400
4401 struct anv_state
4402 anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);
4403 struct anv_state
4404 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
4405
4406 VkResult
4407 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
4408 uint32_t num_entries,
4409 uint32_t *state_offset,
4410 struct anv_state *bt_state);
4411
4412 void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
4413
4414 static inline unsigned
anv_cmd_buffer_get_view_count(struct anv_cmd_buffer * cmd_buffer)4415 anv_cmd_buffer_get_view_count(struct anv_cmd_buffer *cmd_buffer)
4416 {
4417 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
4418 return MAX2(1, util_bitcount(gfx->view_mask));
4419 }
4420
4421 /* Save/restore cmd buffer states for meta operations */
4422 enum anv_cmd_saved_state_flags {
4423 ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE = BITFIELD_BIT(0),
4424 ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0 = BITFIELD_BIT(1),
4425 ANV_CMD_SAVED_STATE_PUSH_CONSTANTS = BITFIELD_BIT(2),
4426 };
4427
4428 struct anv_cmd_saved_state {
4429 uint32_t flags;
4430
4431 struct anv_pipeline *pipeline;
4432 struct anv_descriptor_set *descriptor_set;
4433 uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
4434 };
4435
4436 void anv_cmd_buffer_save_state(struct anv_cmd_buffer *cmd_buffer,
4437 uint32_t flags,
4438 struct anv_cmd_saved_state *state);
4439
4440 void anv_cmd_buffer_restore_state(struct anv_cmd_buffer *cmd_buffer,
4441 struct anv_cmd_saved_state *state);
4442
4443 enum anv_bo_sync_state {
4444 /** Indicates that this is a new (or newly reset fence) */
4445 ANV_BO_SYNC_STATE_RESET,
4446
4447 /** Indicates that this fence has been submitted to the GPU but is still
4448 * (as far as we know) in use by the GPU.
4449 */
4450 ANV_BO_SYNC_STATE_SUBMITTED,
4451
4452 ANV_BO_SYNC_STATE_SIGNALED,
4453 };
4454
4455 struct anv_bo_sync {
4456 struct vk_sync sync;
4457
4458 enum anv_bo_sync_state state;
4459 struct anv_bo *bo;
4460 };
4461
4462 extern const struct vk_sync_type anv_bo_sync_type;
4463
4464 static inline bool
vk_sync_is_anv_bo_sync(const struct vk_sync * sync)4465 vk_sync_is_anv_bo_sync(const struct vk_sync *sync)
4466 {
4467 return sync->type == &anv_bo_sync_type;
4468 }
4469
4470 VkResult anv_create_sync_for_memory(struct vk_device *device,
4471 VkDeviceMemory memory,
4472 bool signal_memory,
4473 struct vk_sync **sync_out);
4474
4475 struct anv_event {
4476 struct vk_object_base base;
4477 uint64_t semaphore;
4478 struct anv_state state;
4479 };
4480
4481 #define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
4482
4483 #define anv_foreach_stage(stage, stage_bits) \
4484 for (gl_shader_stage stage, \
4485 __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK); \
4486 stage = __builtin_ffs(__tmp) - 1, __tmp; \
4487 __tmp &= ~(1 << (stage)))
4488
4489 struct anv_pipeline_bind_map {
4490 unsigned char surface_sha1[20];
4491 unsigned char sampler_sha1[20];
4492 unsigned char push_sha1[20];
4493
4494 uint32_t surface_count;
4495 uint32_t sampler_count;
4496 uint32_t embedded_sampler_count;
4497 uint16_t kernel_args_size;
4498 uint16_t kernel_arg_count;
4499
4500 struct anv_pipeline_binding * surface_to_descriptor;
4501 struct anv_pipeline_binding * sampler_to_descriptor;
4502 struct anv_pipeline_embedded_sampler_binding* embedded_sampler_to_binding;
4503 struct brw_kernel_arg_desc * kernel_args;
4504
4505 struct anv_push_range push_ranges[4];
4506 };
4507
4508 struct anv_push_descriptor_info {
4509 /* A bitfield of descriptors used. */
4510 uint32_t used_descriptors;
4511
4512 /* A bitfield of UBOs bindings fully promoted to push constants. */
4513 uint32_t fully_promoted_ubo_descriptors;
4514
4515 /* */
4516 uint8_t used_set_buffer;
4517 };
4518
4519 /* A list of values we push to implement some of the dynamic states */
4520 enum anv_dynamic_push_bits {
4521 ANV_DYNAMIC_PUSH_INPUT_VERTICES = BITFIELD_BIT(0),
4522 };
4523
4524 struct anv_shader_upload_params {
4525 gl_shader_stage stage;
4526
4527 const void *key_data;
4528 uint32_t key_size;
4529
4530 const void *kernel_data;
4531 uint32_t kernel_size;
4532
4533 const struct brw_stage_prog_data *prog_data;
4534 uint32_t prog_data_size;
4535
4536 const struct brw_compile_stats *stats;
4537 uint32_t num_stats;
4538
4539 const struct nir_xfb_info *xfb_info;
4540
4541 const struct anv_pipeline_bind_map *bind_map;
4542
4543 const struct anv_push_descriptor_info *push_desc_info;
4544
4545 enum anv_dynamic_push_bits dynamic_push_values;
4546 };
4547
4548 struct anv_embedded_sampler {
4549 uint32_t ref_cnt;
4550
4551 struct anv_embedded_sampler_key key;
4552
4553 struct anv_state sampler_state;
4554 struct anv_state border_color_state;
4555 };
4556
4557 struct anv_shader_bin {
4558 struct vk_pipeline_cache_object base;
4559
4560 gl_shader_stage stage;
4561
4562 struct anv_state kernel;
4563 uint32_t kernel_size;
4564
4565 const struct brw_stage_prog_data *prog_data;
4566 uint32_t prog_data_size;
4567
4568 struct brw_compile_stats stats[3];
4569 uint32_t num_stats;
4570
4571 struct nir_xfb_info *xfb_info;
4572
4573 struct anv_push_descriptor_info push_desc_info;
4574
4575 struct anv_pipeline_bind_map bind_map;
4576
4577 enum anv_dynamic_push_bits dynamic_push_values;
4578
4579 /* Not saved in the pipeline cache.
4580 *
4581 * Array of pointers of length bind_map.embedded_sampler_count
4582 */
4583 struct anv_embedded_sampler **embedded_samplers;
4584 };
4585
4586 static inline struct anv_shader_bin *
anv_shader_bin_ref(struct anv_shader_bin * shader)4587 anv_shader_bin_ref(struct anv_shader_bin *shader)
4588 {
4589 vk_pipeline_cache_object_ref(&shader->base);
4590
4591 return shader;
4592 }
4593
4594 static inline void
anv_shader_bin_unref(struct anv_device * device,struct anv_shader_bin * shader)4595 anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
4596 {
4597 vk_pipeline_cache_object_unref(&device->vk, &shader->base);
4598 }
4599
4600 struct anv_pipeline_executable {
4601 gl_shader_stage stage;
4602
4603 struct brw_compile_stats stats;
4604
4605 char *nir;
4606 char *disasm;
4607 };
4608
4609 enum anv_pipeline_type {
4610 ANV_PIPELINE_GRAPHICS,
4611 ANV_PIPELINE_GRAPHICS_LIB,
4612 ANV_PIPELINE_COMPUTE,
4613 ANV_PIPELINE_RAY_TRACING,
4614 };
4615
4616 struct anv_pipeline {
4617 struct vk_object_base base;
4618
4619 struct anv_device * device;
4620
4621 struct anv_batch batch;
4622 struct anv_reloc_list batch_relocs;
4623
4624 void * mem_ctx;
4625
4626 enum anv_pipeline_type type;
4627 VkPipelineCreateFlags2KHR flags;
4628
4629 VkShaderStageFlags active_stages;
4630
4631 uint32_t ray_queries;
4632
4633 /**
4634 * Mask of stages that are accessing push descriptors.
4635 */
4636 VkShaderStageFlags use_push_descriptor;
4637
4638 /**
4639 * Mask of stages that are accessing the push descriptors buffer.
4640 */
4641 VkShaderStageFlags use_push_descriptor_buffer;
4642
4643 /**
4644 * Maximum scratch size for all shaders in this pipeline.
4645 */
4646 uint32_t scratch_size;
4647
4648 /* Layout of the sets used by the pipeline. */
4649 struct anv_pipeline_sets_layout layout;
4650
4651 struct util_dynarray executables;
4652
4653 const struct intel_l3_config * l3_config;
4654 };
4655
4656 /* The base graphics pipeline object only hold shaders. */
4657 struct anv_graphics_base_pipeline {
4658 struct anv_pipeline base;
4659
4660 struct vk_sample_locations_state sample_locations;
4661
4662 /* Shaders */
4663 struct anv_shader_bin * shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4664
4665 /* A small hash based of shader_info::source_sha1 for identifying
4666 * shaders in renderdoc/shader-db.
4667 */
4668 uint32_t source_hashes[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4669
4670 /* Feedback index in
4671 * VkPipelineCreationFeedbackCreateInfo::pPipelineStageCreationFeedbacks
4672 *
4673 * For pipeline libraries, we need to remember the order at creation when
4674 * included into a linked pipeline.
4675 */
4676 uint32_t feedback_index[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4677
4678 /* Robustness flags used shaders
4679 */
4680 enum brw_robustness_flags robust_flags[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4681
4682 /* True if at the time the fragment shader was compiled, it didn't have all
4683 * the information to avoid INTEL_MSAA_FLAG_ENABLE_DYNAMIC.
4684 */
4685 bool fragment_dynamic;
4686 };
4687
4688 /* The library graphics pipeline object has a partial graphic state and
4689 * possibly some shaders. If requested, shaders are also present in NIR early
4690 * form.
4691 */
4692 struct anv_graphics_lib_pipeline {
4693 struct anv_graphics_base_pipeline base;
4694
4695 VkGraphicsPipelineLibraryFlagsEXT lib_flags;
4696
4697 struct vk_graphics_pipeline_all_state all_state;
4698 struct vk_graphics_pipeline_state state;
4699
4700 /* Retained shaders for link optimization. */
4701 struct {
4702 /* This hash is the same as computed in
4703 * anv_graphics_pipeline_gather_shaders().
4704 */
4705 unsigned char shader_sha1[20];
4706
4707 enum gl_subgroup_size subgroup_size_type;
4708
4709 /* Hold on the value of VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT
4710 * from library that introduces the stage, so it remains consistent.
4711 */
4712 bool view_index_from_device_index;
4713
4714 /* NIR captured in anv_pipeline_stage_get_nir(), includes specialization
4715 * constants.
4716 */
4717 nir_shader * nir;
4718 } retained_shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4719
4720 /* Whether the shaders have been retained */
4721 bool retain_shaders;
4722 };
4723
4724 struct anv_gfx_state_ptr {
4725 /* Both in dwords */
4726 uint16_t offset;
4727 uint16_t len;
4728 };
4729
4730 /* The final graphics pipeline object has all the graphics state ready to be
4731 * programmed into HW packets (dynamic_state field) or fully baked in its
4732 * batch.
4733 */
4734 struct anv_graphics_pipeline {
4735 struct anv_graphics_base_pipeline base;
4736
4737 struct vk_vertex_input_state vertex_input;
4738 struct vk_sample_locations_state sample_locations;
4739 struct vk_dynamic_graphics_state dynamic_state;
4740
4741 /* If true, the patch control points are passed through push constants
4742 * (anv_push_constants::gfx::tcs_input_vertices)
4743 */
4744 bool dynamic_patch_control_points;
4745
4746 uint32_t view_mask;
4747 uint32_t instance_multiplier;
4748
4749 bool rp_has_ds_self_dep;
4750
4751 bool kill_pixel;
4752 bool uses_xfb;
4753 bool sample_shading_enable;
4754 float min_sample_shading;
4755
4756 /* Number of VERTEX_ELEMENT_STATE input elements used by the shader */
4757 uint32_t vs_input_elements;
4758
4759 /* Number of VERTEX_ELEMENT_STATE elements we need to implement some of the
4760 * draw parameters
4761 */
4762 uint32_t svgs_count;
4763
4764 /* Pre computed VERTEX_ELEMENT_STATE structures for the vertex input that
4765 * can be copied into the anv_cmd_buffer behind a 3DSTATE_VERTEX_BUFFER.
4766 *
4767 * When MESA_VK_DYNAMIC_VI is not dynamic
4768 *
4769 * vertex_input_elems = vs_input_elements + svgs_count
4770 *
4771 * All the VERTEX_ELEMENT_STATE can be directly copied behind a
4772 * 3DSTATE_VERTEX_ELEMENTS instruction in the command buffer. Otherwise
4773 * this array only holds the svgs_count elements.
4774 */
4775 uint32_t vertex_input_elems;
4776 uint32_t vertex_input_data[2 * 31 /* MAX_VES + 2 internal */];
4777
4778 /* Pre computed CS instructions that can directly be copied into
4779 * anv_cmd_buffer.
4780 */
4781 uint32_t batch_data[480];
4782
4783 /* Urb setup utilized by this pipeline. */
4784 struct intel_urb_config urb_cfg;
4785
4786 /* Fully backed instructions, ready to be emitted in the anv_cmd_buffer */
4787 struct {
4788 struct anv_gfx_state_ptr urb;
4789 struct anv_gfx_state_ptr vf_statistics;
4790 struct anv_gfx_state_ptr vf_sgvs;
4791 struct anv_gfx_state_ptr vf_sgvs_2;
4792 struct anv_gfx_state_ptr vf_sgvs_instancing;
4793 struct anv_gfx_state_ptr vf_instancing;
4794 struct anv_gfx_state_ptr primitive_replication;
4795 struct anv_gfx_state_ptr sbe;
4796 struct anv_gfx_state_ptr sbe_swiz;
4797 struct anv_gfx_state_ptr so_decl_list;
4798 struct anv_gfx_state_ptr vs;
4799 struct anv_gfx_state_ptr hs;
4800 struct anv_gfx_state_ptr ds;
4801 struct anv_gfx_state_ptr vs_protected;
4802 struct anv_gfx_state_ptr hs_protected;
4803 struct anv_gfx_state_ptr ds_protected;
4804
4805 struct anv_gfx_state_ptr task_control;
4806 struct anv_gfx_state_ptr task_control_protected;
4807 struct anv_gfx_state_ptr task_shader;
4808 struct anv_gfx_state_ptr task_redistrib;
4809 struct anv_gfx_state_ptr clip_mesh;
4810 struct anv_gfx_state_ptr mesh_control;
4811 struct anv_gfx_state_ptr mesh_control_protected;
4812 struct anv_gfx_state_ptr mesh_shader;
4813 struct anv_gfx_state_ptr mesh_distrib;
4814 struct anv_gfx_state_ptr sbe_mesh;
4815 } final;
4816
4817 /* Pre packed CS instructions & structures that need to be merged later
4818 * with dynamic state.
4819 */
4820 struct {
4821 struct anv_gfx_state_ptr clip;
4822 struct anv_gfx_state_ptr sf;
4823 struct anv_gfx_state_ptr raster;
4824 struct anv_gfx_state_ptr ms;
4825 struct anv_gfx_state_ptr ps_extra;
4826 struct anv_gfx_state_ptr wm;
4827 struct anv_gfx_state_ptr so;
4828 struct anv_gfx_state_ptr gs;
4829 struct anv_gfx_state_ptr gs_protected;
4830 struct anv_gfx_state_ptr te;
4831 struct anv_gfx_state_ptr ps;
4832 struct anv_gfx_state_ptr ps_protected;
4833 struct anv_gfx_state_ptr vfg;
4834 } partial;
4835 };
4836
4837 #define anv_batch_emit_pipeline_state(batch, pipeline, state) \
4838 do { \
4839 if ((pipeline)->state.len == 0) \
4840 break; \
4841 uint32_t *dw; \
4842 dw = anv_batch_emit_dwords((batch), (pipeline)->state.len); \
4843 if (!dw) \
4844 break; \
4845 memcpy(dw, &(pipeline)->batch_data[(pipeline)->state.offset], \
4846 4 * (pipeline)->state.len); \
4847 } while (0)
4848
4849 #define anv_batch_emit_pipeline_state_protected(batch, pipeline, \
4850 state, protected) \
4851 do { \
4852 struct anv_gfx_state_ptr *_cmd_state = protected ? \
4853 &(pipeline)->state##_protected : &(pipeline)->state; \
4854 if (_cmd_state->len == 0) \
4855 break; \
4856 uint32_t *dw; \
4857 dw = anv_batch_emit_dwords((batch), _cmd_state->len); \
4858 if (!dw) \
4859 break; \
4860 memcpy(dw, &(pipeline)->batch_data[_cmd_state->offset], \
4861 4 * _cmd_state->len); \
4862 } while (0)
4863
4864
4865 struct anv_compute_pipeline {
4866 struct anv_pipeline base;
4867
4868 struct anv_shader_bin * cs;
4869 uint32_t batch_data[9];
4870 uint32_t interface_descriptor_data[8];
4871
4872 /* A small hash based of shader_info::source_sha1 for identifying shaders
4873 * in renderdoc/shader-db.
4874 */
4875 uint32_t source_hash;
4876 };
4877
4878 struct anv_rt_shader_group {
4879 VkRayTracingShaderGroupTypeKHR type;
4880
4881 /* Whether this group was imported from another pipeline */
4882 bool imported;
4883
4884 struct anv_shader_bin *general;
4885 struct anv_shader_bin *closest_hit;
4886 struct anv_shader_bin *any_hit;
4887 struct anv_shader_bin *intersection;
4888
4889 /* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */
4890 uint32_t handle[8];
4891 };
4892
4893 struct anv_ray_tracing_pipeline {
4894 struct anv_pipeline base;
4895
4896 /* All shaders in the pipeline */
4897 struct util_dynarray shaders;
4898
4899 uint32_t group_count;
4900 struct anv_rt_shader_group * groups;
4901
4902 /* If non-zero, this is the default computed stack size as per the stack
4903 * size computation in the Vulkan spec. If zero, that indicates that the
4904 * client has requested a dynamic stack size.
4905 */
4906 uint32_t stack_size;
4907 };
4908
4909 #define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \
4910 static inline struct anv_##pipe_type##_pipeline * \
4911 anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline) \
4912 { \
4913 assert(pipeline->type == pipe_enum); \
4914 return (struct anv_##pipe_type##_pipeline *) pipeline; \
4915 }
4916
ANV_DECL_PIPELINE_DOWNCAST(graphics,ANV_PIPELINE_GRAPHICS)4917 ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
4918 ANV_DECL_PIPELINE_DOWNCAST(graphics_lib, ANV_PIPELINE_GRAPHICS_LIB)
4919 ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
4920 ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)
4921
4922 /* Can't use the macro because we need to handle both types. */
4923 static inline struct anv_graphics_base_pipeline *
4924 anv_pipeline_to_graphics_base(struct anv_pipeline *pipeline)
4925 {
4926 assert(pipeline->type == ANV_PIPELINE_GRAPHICS ||
4927 pipeline->type == ANV_PIPELINE_GRAPHICS_LIB);
4928 return (struct anv_graphics_base_pipeline *) pipeline;
4929 }
4930
4931 static inline bool
anv_pipeline_has_stage(const struct anv_graphics_pipeline * pipeline,gl_shader_stage stage)4932 anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
4933 gl_shader_stage stage)
4934 {
4935 return (pipeline->base.base.active_stages & mesa_to_vk_shader_stage(stage)) != 0;
4936 }
4937
4938 static inline bool
anv_pipeline_base_has_stage(const struct anv_graphics_base_pipeline * pipeline,gl_shader_stage stage)4939 anv_pipeline_base_has_stage(const struct anv_graphics_base_pipeline *pipeline,
4940 gl_shader_stage stage)
4941 {
4942 return (pipeline->base.active_stages & mesa_to_vk_shader_stage(stage)) != 0;
4943 }
4944
4945 static inline bool
anv_pipeline_is_primitive(const struct anv_graphics_pipeline * pipeline)4946 anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
4947 {
4948 return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX);
4949 }
4950
4951 static inline bool
anv_pipeline_is_mesh(const struct anv_graphics_pipeline * pipeline)4952 anv_pipeline_is_mesh(const struct anv_graphics_pipeline *pipeline)
4953 {
4954 return anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH);
4955 }
4956
4957 static inline bool
anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer * cmd_buffer)4958 anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer *cmd_buffer)
4959 {
4960 const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx;
4961 const struct vk_dynamic_graphics_state *dyn =
4962 &cmd_buffer->vk.dynamic_graphics_state;
4963 uint8_t color_writes = dyn->cb.color_write_enables;
4964
4965 /* All writes disabled through vkCmdSetColorWriteEnableEXT */
4966 if ((color_writes & ((1u << state->color_att_count) - 1)) == 0)
4967 return true;
4968
4969 /* Or all write masks are empty */
4970 for (uint32_t i = 0; i < state->color_att_count; i++) {
4971 if (dyn->cb.attachments[i].write_mask != 0)
4972 return false;
4973 }
4974
4975 return true;
4976 }
4977
4978 static inline void
anv_cmd_graphic_state_update_has_uint_rt(struct anv_cmd_graphics_state * state)4979 anv_cmd_graphic_state_update_has_uint_rt(struct anv_cmd_graphics_state *state)
4980 {
4981 state->has_uint_rt = false;
4982 for (unsigned a = 0; a < state->color_att_count; a++) {
4983 if (vk_format_is_int(state->color_att[a].vk_format)) {
4984 state->has_uint_rt = true;
4985 break;
4986 }
4987 }
4988 }
4989
4990 #define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage) \
4991 static inline const struct brw_##prefix##_prog_data * \
4992 get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline) \
4993 { \
4994 if (anv_pipeline_has_stage(pipeline, stage)) { \
4995 return (const struct brw_##prefix##_prog_data *) \
4996 pipeline->base.shaders[stage]->prog_data; \
4997 } else { \
4998 return NULL; \
4999 } \
5000 }
5001
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs,MESA_SHADER_VERTEX)5002 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
5003 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
5004 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
5005 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
5006 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
5007 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(mesh, MESA_SHADER_MESH)
5008 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(task, MESA_SHADER_TASK)
5009
5010 static inline const struct brw_cs_prog_data *
5011 get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
5012 {
5013 assert(pipeline->cs);
5014 return (const struct brw_cs_prog_data *) pipeline->cs->prog_data;
5015 }
5016
5017 static inline const struct brw_vue_prog_data *
anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline * pipeline)5018 anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)
5019 {
5020 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
5021 return &get_gs_prog_data(pipeline)->base;
5022 else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
5023 return &get_tes_prog_data(pipeline)->base;
5024 else
5025 return &get_vs_prog_data(pipeline)->base;
5026 }
5027
5028 VkResult
5029 anv_device_init_rt_shaders(struct anv_device *device);
5030
5031 void
5032 anv_device_finish_rt_shaders(struct anv_device *device);
5033
5034 struct anv_kernel_arg {
5035 bool is_ptr;
5036 uint16_t size;
5037
5038 union {
5039 uint64_t u64;
5040 void *ptr;
5041 };
5042 };
5043
5044 struct anv_kernel {
5045 #ifndef NDEBUG
5046 const char *name;
5047 #endif
5048 struct anv_shader_bin *bin;
5049 const struct intel_l3_config *l3_config;
5050 };
5051
5052 struct anv_format_plane {
5053 enum isl_format isl_format:16;
5054 struct isl_swizzle swizzle;
5055
5056 /* What aspect is associated to this plane */
5057 VkImageAspectFlags aspect;
5058 };
5059
5060 struct anv_format {
5061 struct anv_format_plane planes[3];
5062 VkFormat vk_format;
5063 uint8_t n_planes;
5064 bool can_ycbcr;
5065 bool can_video;
5066 };
5067
5068 static inline void
anv_assert_valid_aspect_set(VkImageAspectFlags aspects)5069 anv_assert_valid_aspect_set(VkImageAspectFlags aspects)
5070 {
5071 if (util_bitcount(aspects) == 1) {
5072 assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT |
5073 VK_IMAGE_ASPECT_DEPTH_BIT |
5074 VK_IMAGE_ASPECT_STENCIL_BIT |
5075 VK_IMAGE_ASPECT_PLANE_0_BIT |
5076 VK_IMAGE_ASPECT_PLANE_1_BIT |
5077 VK_IMAGE_ASPECT_PLANE_2_BIT));
5078 } else if (aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) {
5079 assert(aspects == VK_IMAGE_ASPECT_PLANE_0_BIT ||
5080 aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
5081 VK_IMAGE_ASPECT_PLANE_1_BIT) ||
5082 aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
5083 VK_IMAGE_ASPECT_PLANE_1_BIT |
5084 VK_IMAGE_ASPECT_PLANE_2_BIT));
5085 } else {
5086 assert(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
5087 VK_IMAGE_ASPECT_STENCIL_BIT));
5088 }
5089 }
5090
5091 /**
5092 * Return the aspect's plane relative to all_aspects. For an image, for
5093 * instance, all_aspects would be the set of aspects in the image. For
5094 * an image view, all_aspects would be the subset of aspects represented
5095 * by that particular view.
5096 */
5097 static inline uint32_t
anv_aspect_to_plane(VkImageAspectFlags all_aspects,VkImageAspectFlagBits aspect)5098 anv_aspect_to_plane(VkImageAspectFlags all_aspects,
5099 VkImageAspectFlagBits aspect)
5100 {
5101 anv_assert_valid_aspect_set(all_aspects);
5102 assert(util_bitcount(aspect) == 1);
5103 assert(!(aspect & ~all_aspects));
5104
5105 /* Because we always put image and view planes in aspect-bit-order, the
5106 * plane index is the number of bits in all_aspects before aspect.
5107 */
5108 return util_bitcount(all_aspects & (aspect - 1));
5109 }
5110
5111 #define anv_foreach_image_aspect_bit(b, image, aspects) \
5112 u_foreach_bit(b, vk_image_expand_aspect_mask(&(image)->vk, aspects))
5113
5114 const struct anv_format *
5115 anv_get_format(VkFormat format);
5116
5117 static inline uint32_t
anv_get_format_planes(VkFormat vk_format)5118 anv_get_format_planes(VkFormat vk_format)
5119 {
5120 const struct anv_format *format = anv_get_format(vk_format);
5121
5122 return format != NULL ? format->n_planes : 0;
5123 }
5124
5125 struct anv_format_plane
5126 anv_get_format_plane(const struct intel_device_info *devinfo,
5127 VkFormat vk_format, uint32_t plane,
5128 VkImageTiling tiling);
5129
5130 struct anv_format_plane
5131 anv_get_format_aspect(const struct intel_device_info *devinfo,
5132 VkFormat vk_format,
5133 VkImageAspectFlagBits aspect, VkImageTiling tiling);
5134
5135 static inline enum isl_format
anv_get_isl_format(const struct intel_device_info * devinfo,VkFormat vk_format,VkImageAspectFlags aspect,VkImageTiling tiling)5136 anv_get_isl_format(const struct intel_device_info *devinfo, VkFormat vk_format,
5137 VkImageAspectFlags aspect, VkImageTiling tiling)
5138 {
5139 return anv_get_format_aspect(devinfo, vk_format, aspect, tiling).isl_format;
5140 }
5141
5142 bool anv_format_supports_ccs_e(const struct intel_device_info *devinfo,
5143 const enum isl_format format);
5144
5145 bool anv_formats_ccs_e_compatible(const struct intel_device_info *devinfo,
5146 VkImageCreateFlags create_flags,
5147 VkFormat vk_format, VkImageTiling vk_tiling,
5148 VkImageUsageFlags vk_usage,
5149 const VkImageFormatListCreateInfo *fmt_list);
5150
5151 extern VkFormat
5152 vk_format_from_android(unsigned android_format, unsigned android_usage);
5153
5154 static inline VkFormat
anv_get_emulation_format(const struct anv_physical_device * pdevice,VkFormat format)5155 anv_get_emulation_format(const struct anv_physical_device *pdevice, VkFormat format)
5156 {
5157 if (pdevice->flush_astc_ldr_void_extent_denorms) {
5158 const struct util_format_description *desc =
5159 vk_format_description(format);
5160 if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC &&
5161 desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB)
5162 return format;
5163 }
5164
5165 if (pdevice->emu_astc_ldr)
5166 return vk_texcompress_astc_emulation_format(format);
5167
5168 return VK_FORMAT_UNDEFINED;
5169 }
5170
5171 static inline bool
anv_is_format_emulated(const struct anv_physical_device * pdevice,VkFormat format)5172 anv_is_format_emulated(const struct anv_physical_device *pdevice, VkFormat format)
5173 {
5174 return anv_get_emulation_format(pdevice, format) != VK_FORMAT_UNDEFINED;
5175 }
5176
5177 static inline struct isl_swizzle
anv_swizzle_for_render(struct isl_swizzle swizzle)5178 anv_swizzle_for_render(struct isl_swizzle swizzle)
5179 {
5180 /* Sometimes the swizzle will have alpha map to one. We do this to fake
5181 * RGB as RGBA for texturing
5182 */
5183 assert(swizzle.a == ISL_CHANNEL_SELECT_ONE ||
5184 swizzle.a == ISL_CHANNEL_SELECT_ALPHA);
5185
5186 /* But it doesn't matter what we render to that channel */
5187 swizzle.a = ISL_CHANNEL_SELECT_ALPHA;
5188
5189 return swizzle;
5190 }
5191
5192 void
5193 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm);
5194
5195 /**
5196 * Describes how each part of anv_image will be bound to memory.
5197 */
5198 struct anv_image_memory_range {
5199 /**
5200 * Disjoint bindings into which each portion of the image will be bound.
5201 *
5202 * Binding images to memory can be complicated and invold binding different
5203 * portions of the image to different memory objects or regions. For most
5204 * images, everything lives in the MAIN binding and gets bound by
5205 * vkBindImageMemory. For disjoint multi-planar images, each plane has
5206 * a unique, disjoint binding and gets bound by vkBindImageMemory2 with
5207 * VkBindImagePlaneMemoryInfo. There may also exist bits of memory which are
5208 * implicit or driver-managed and live in special-case bindings.
5209 */
5210 enum anv_image_memory_binding {
5211 /**
5212 * Used if and only if image is not multi-planar disjoint. Bound by
5213 * vkBindImageMemory2 without VkBindImagePlaneMemoryInfo.
5214 */
5215 ANV_IMAGE_MEMORY_BINDING_MAIN,
5216
5217 /**
5218 * Used if and only if image is multi-planar disjoint. Bound by
5219 * vkBindImageMemory2 with VkBindImagePlaneMemoryInfo.
5220 */
5221 ANV_IMAGE_MEMORY_BINDING_PLANE_0,
5222 ANV_IMAGE_MEMORY_BINDING_PLANE_1,
5223 ANV_IMAGE_MEMORY_BINDING_PLANE_2,
5224
5225 /**
5226 * Driver-private bo. In special cases we may store the aux surface and/or
5227 * aux state in this binding.
5228 */
5229 ANV_IMAGE_MEMORY_BINDING_PRIVATE,
5230
5231 /** Sentinel */
5232 ANV_IMAGE_MEMORY_BINDING_END,
5233 } binding;
5234
5235 uint32_t alignment;
5236 uint64_t size;
5237
5238 /**
5239 * Offset is relative to the start of the binding created by
5240 * vkBindImageMemory, not to the start of the bo.
5241 */
5242 uint64_t offset;
5243 };
5244
5245 /**
5246 * Subsurface of an anv_image.
5247 */
5248 struct anv_surface {
5249 struct isl_surf isl;
5250 struct anv_image_memory_range memory_range;
5251 };
5252
5253 static inline bool MUST_CHECK
anv_surface_is_valid(const struct anv_surface * surface)5254 anv_surface_is_valid(const struct anv_surface *surface)
5255 {
5256 return surface->isl.size_B > 0 && surface->memory_range.size > 0;
5257 }
5258
5259 struct anv_image {
5260 struct vk_image vk;
5261
5262 uint32_t n_planes;
5263
5264 /**
5265 * Image has multi-planar format and was created with
5266 * VK_IMAGE_CREATE_DISJOINT_BIT.
5267 */
5268 bool disjoint;
5269
5270 /**
5271 * Image is a WSI image
5272 */
5273 bool from_wsi;
5274
5275 /**
5276 * Image was imported from an struct AHardwareBuffer. We have to delay
5277 * final image creation until bind time.
5278 */
5279 bool from_ahb;
5280
5281 /**
5282 * Image was imported from gralloc with VkNativeBufferANDROID. The gralloc bo
5283 * must be released when the image is destroyed.
5284 */
5285 bool from_gralloc;
5286
5287 /**
5288 * If not UNDEFINED, image has a hidden plane at planes[n_planes] for ASTC
5289 * LDR workaround or emulation.
5290 */
5291 VkFormat emu_plane_format;
5292
5293 /**
5294 * The memory bindings created by vkCreateImage and vkBindImageMemory.
5295 *
5296 * For details on the image's memory layout, see check_memory_bindings().
5297 *
5298 * vkCreateImage constructs the `memory_range` for each
5299 * anv_image_memory_binding. After vkCreateImage, each binding is valid if
5300 * and only if `memory_range::size > 0`.
5301 *
5302 * vkBindImageMemory binds each valid `memory_range` to an `address`.
5303 * Usually, the app will provide the address via the parameters of
5304 * vkBindImageMemory. However, special-case bindings may be bound to
5305 * driver-private memory.
5306 */
5307 struct anv_image_binding {
5308 struct anv_image_memory_range memory_range;
5309 struct anv_address address;
5310 struct anv_sparse_binding_data sparse_data;
5311 } bindings[ANV_IMAGE_MEMORY_BINDING_END];
5312
5313 /**
5314 * Image subsurfaces
5315 *
5316 * For each foo, anv_image::planes[x].surface is valid if and only if
5317 * anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane()
5318 * to figure the number associated with a given aspect.
5319 *
5320 * The hardware requires that the depth buffer and stencil buffer be
5321 * separate surfaces. From Vulkan's perspective, though, depth and stencil
5322 * reside in the same VkImage. To satisfy both the hardware and Vulkan, we
5323 * allocate the depth and stencil buffers as separate surfaces in the same
5324 * bo.
5325 */
5326 struct anv_image_plane {
5327 struct anv_surface primary_surface;
5328
5329 /**
5330 * The base aux usage for this image. For color images, this can be
5331 * either CCS_E or CCS_D depending on whether or not we can reliably
5332 * leave CCS on all the time.
5333 */
5334 enum isl_aux_usage aux_usage;
5335
5336 struct anv_surface aux_surface;
5337
5338 /** Location of the compression control surface. */
5339 struct anv_image_memory_range compr_ctrl_memory_range;
5340
5341 /** Location of the fast clear state. */
5342 struct anv_image_memory_range fast_clear_memory_range;
5343
5344 /**
5345 * Whether this image can be fast cleared with non-zero clear colors.
5346 * This can happen with mutable images when formats of different bit
5347 * sizes per components are used.
5348 *
5349 * On Gfx9+, because the clear colors are stored as a 4 components 32bit
5350 * values, we can clear in R16G16_UNORM (store 2 16bit values in the
5351 * components 0 & 1 of the clear color) and then draw in R32_UINT which
5352 * would interpret the clear color as a single component value, using
5353 * only the first 16bit component of the previous written clear color.
5354 *
5355 * On Gfx7/7.5/8, only CC_ZERO/CC_ONE clear colors are supported, this
5356 * boolean will prevent the usage of CC_ONE.
5357 */
5358 bool can_non_zero_fast_clear;
5359
5360 struct {
5361 /** Whether the image has CCS data mapped through AUX-TT. */
5362 bool mapped;
5363
5364 /** Main address of the mapping. */
5365 uint64_t addr;
5366
5367 /** Size of the mapping. */
5368 uint64_t size;
5369 } aux_tt;
5370 } planes[3];
5371
5372 struct anv_image_memory_range vid_dmv_top_surface;
5373
5374 /* Link in the anv_device.image_private_objects list */
5375 struct list_head link;
5376 };
5377
5378 static inline bool
anv_image_is_protected(const struct anv_image * image)5379 anv_image_is_protected(const struct anv_image *image)
5380 {
5381 return image->vk.create_flags & VK_IMAGE_CREATE_PROTECTED_BIT;
5382 }
5383
5384 static inline bool
anv_image_is_sparse(const struct anv_image * image)5385 anv_image_is_sparse(const struct anv_image *image)
5386 {
5387 return image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
5388 }
5389
5390 static inline bool
anv_image_is_externally_shared(const struct anv_image * image)5391 anv_image_is_externally_shared(const struct anv_image *image)
5392 {
5393 return image->vk.drm_format_mod != DRM_FORMAT_MOD_INVALID ||
5394 image->vk.external_handle_types != 0;
5395 }
5396
5397 static inline bool
anv_image_has_private_binding(const struct anv_image * image)5398 anv_image_has_private_binding(const struct anv_image *image)
5399 {
5400 const struct anv_image_binding private_binding =
5401 image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE];
5402 return private_binding.memory_range.size != 0;
5403 }
5404
5405 static inline bool
anv_image_format_is_d16_or_s8(const struct anv_image * image)5406 anv_image_format_is_d16_or_s8(const struct anv_image *image)
5407 {
5408 return image->vk.format == VK_FORMAT_D16_UNORM ||
5409 image->vk.format == VK_FORMAT_D16_UNORM_S8_UINT ||
5410 image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
5411 image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
5412 image->vk.format == VK_FORMAT_S8_UINT;
5413 }
5414
5415 /* The ordering of this enum is important */
5416 enum anv_fast_clear_type {
5417 /** Image does not have/support any fast-clear blocks */
5418 ANV_FAST_CLEAR_NONE = 0,
5419 /** Image has/supports fast-clear but only to the default value */
5420 ANV_FAST_CLEAR_DEFAULT_VALUE = 1,
5421 /** Image has/supports fast-clear with an arbitrary fast-clear value */
5422 ANV_FAST_CLEAR_ANY = 2,
5423 };
5424
5425 /**
5426 * Return the aspect's _format_ plane, not its _memory_ plane (using the
5427 * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a
5428 * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain
5429 * VK_IMAGE_ASPECT_MEMORY_PLANE_* .
5430 */
5431 static inline uint32_t
anv_image_aspect_to_plane(const struct anv_image * image,VkImageAspectFlagBits aspect)5432 anv_image_aspect_to_plane(const struct anv_image *image,
5433 VkImageAspectFlagBits aspect)
5434 {
5435 return anv_aspect_to_plane(image->vk.aspects, aspect);
5436 }
5437
5438 /* Returns the number of auxiliary buffer levels attached to an image. */
5439 static inline uint8_t
anv_image_aux_levels(const struct anv_image * const image,VkImageAspectFlagBits aspect)5440 anv_image_aux_levels(const struct anv_image * const image,
5441 VkImageAspectFlagBits aspect)
5442 {
5443 uint32_t plane = anv_image_aspect_to_plane(image, aspect);
5444 if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
5445 return 0;
5446
5447 return image->vk.mip_levels;
5448 }
5449
5450 /* Returns the number of auxiliary buffer layers attached to an image. */
5451 static inline uint32_t
anv_image_aux_layers(const struct anv_image * const image,VkImageAspectFlagBits aspect,const uint8_t miplevel)5452 anv_image_aux_layers(const struct anv_image * const image,
5453 VkImageAspectFlagBits aspect,
5454 const uint8_t miplevel)
5455 {
5456 assert(image);
5457
5458 /* The miplevel must exist in the main buffer. */
5459 assert(miplevel < image->vk.mip_levels);
5460
5461 if (miplevel >= anv_image_aux_levels(image, aspect)) {
5462 /* There are no layers with auxiliary data because the miplevel has no
5463 * auxiliary data.
5464 */
5465 return 0;
5466 }
5467
5468 return MAX2(image->vk.array_layers, image->vk.extent.depth >> miplevel);
5469 }
5470
5471 static inline struct anv_address MUST_CHECK
anv_image_address(const struct anv_image * image,const struct anv_image_memory_range * mem_range)5472 anv_image_address(const struct anv_image *image,
5473 const struct anv_image_memory_range *mem_range)
5474 {
5475 const struct anv_image_binding *binding = &image->bindings[mem_range->binding];
5476 assert(binding->memory_range.offset == 0);
5477
5478 if (mem_range->size == 0)
5479 return ANV_NULL_ADDRESS;
5480
5481 return anv_address_add(binding->address, mem_range->offset);
5482 }
5483
5484 static inline struct anv_address
anv_image_get_clear_color_addr(UNUSED const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)5485 anv_image_get_clear_color_addr(UNUSED const struct anv_device *device,
5486 const struct anv_image *image,
5487 VkImageAspectFlagBits aspect)
5488 {
5489 assert(image->vk.aspects & (VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV |
5490 VK_IMAGE_ASPECT_DEPTH_BIT));
5491
5492 uint32_t plane = anv_image_aspect_to_plane(image, aspect);
5493 const struct anv_image_memory_range *mem_range =
5494 &image->planes[plane].fast_clear_memory_range;
5495
5496 return anv_image_address(image, mem_range);
5497 }
5498
5499 static inline struct anv_address
anv_image_get_fast_clear_type_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)5500 anv_image_get_fast_clear_type_addr(const struct anv_device *device,
5501 const struct anv_image *image,
5502 VkImageAspectFlagBits aspect)
5503 {
5504 /* Xe2+ platforms don't need fast clear type. We shouldn't get here. */
5505 assert(device->info->ver < 20);
5506 struct anv_address addr =
5507 anv_image_get_clear_color_addr(device, image, aspect);
5508
5509 unsigned clear_color_state_size;
5510 if (device->info->ver >= 11) {
5511 /* The fast clear type and the first compression state are stored in the
5512 * last 2 dwords of the clear color struct. Refer to the comment in
5513 * add_aux_state_tracking_buffer().
5514 */
5515 assert(device->isl_dev.ss.clear_color_state_size >= 32);
5516 clear_color_state_size = device->isl_dev.ss.clear_color_state_size - 8;
5517 } else
5518 clear_color_state_size = device->isl_dev.ss.clear_value_size;
5519 return anv_address_add(addr, clear_color_state_size);
5520 }
5521
5522 static inline struct anv_address
anv_image_get_compression_state_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t array_layer)5523 anv_image_get_compression_state_addr(const struct anv_device *device,
5524 const struct anv_image *image,
5525 VkImageAspectFlagBits aspect,
5526 uint32_t level, uint32_t array_layer)
5527 {
5528 /* Xe2+ platforms don't use compression state. We shouldn't get here. */
5529 assert(device->info->ver < 20);
5530 assert(level < anv_image_aux_levels(image, aspect));
5531 assert(array_layer < anv_image_aux_layers(image, aspect, level));
5532 UNUSED uint32_t plane = anv_image_aspect_to_plane(image, aspect);
5533 assert(isl_aux_usage_has_ccs_e(image->planes[plane].aux_usage));
5534
5535 /* Relative to start of the plane's fast clear type */
5536 uint32_t offset;
5537
5538 offset = 4; /* Go past the fast clear type */
5539
5540 if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
5541 for (uint32_t l = 0; l < level; l++)
5542 offset += u_minify(image->vk.extent.depth, l) * 4;
5543 } else {
5544 offset += level * image->vk.array_layers * 4;
5545 }
5546
5547 offset += array_layer * 4;
5548
5549 assert(offset < image->planes[plane].fast_clear_memory_range.size);
5550
5551 return anv_address_add(
5552 anv_image_get_fast_clear_type_addr(device, image, aspect),
5553 offset);
5554 }
5555
5556 static inline const struct anv_image_memory_range *
anv_image_get_aux_memory_range(const struct anv_image * image,uint32_t plane)5557 anv_image_get_aux_memory_range(const struct anv_image *image,
5558 uint32_t plane)
5559 {
5560 if (image->planes[plane].aux_surface.memory_range.size > 0)
5561 return &image->planes[plane].aux_surface.memory_range;
5562 else
5563 return &image->planes[plane].compr_ctrl_memory_range;
5564 }
5565
5566 /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
5567 static inline bool
anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,const struct anv_image * image)5568 anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,
5569 const struct anv_image *image)
5570 {
5571 if (!(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
5572 return false;
5573
5574 /* For Gfx8-11, there are some restrictions around sampling from HiZ.
5575 * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
5576 * say:
5577 *
5578 * "If this field is set to AUX_HIZ, Number of Multisamples must
5579 * be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
5580 */
5581 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
5582 return false;
5583
5584 if (!devinfo->has_sample_with_hiz)
5585 return false;
5586
5587 return image->vk.samples == 1;
5588 }
5589
5590 /* Returns true if an MCS-enabled buffer can be sampled from. */
5591 static inline bool
anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,const struct anv_image * image)5592 anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,
5593 const struct anv_image *image)
5594 {
5595 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
5596 const uint32_t plane =
5597 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_COLOR_BIT);
5598
5599 assert(isl_aux_usage_has_mcs(image->planes[plane].aux_usage));
5600
5601 const struct anv_surface *anv_surf = &image->planes[plane].primary_surface;
5602
5603 /* On TGL, the sampler has an issue with some 8 and 16bpp MSAA fast clears.
5604 * See HSD 1707282275, wa_14013111325. Due to the use of
5605 * format-reinterpretation, a simplified workaround is implemented.
5606 */
5607 if (intel_needs_workaround(devinfo, 14013111325) &&
5608 isl_format_get_layout(anv_surf->isl.format)->bpb <= 16) {
5609 return false;
5610 }
5611
5612 return true;
5613 }
5614
5615 static inline bool
anv_image_plane_uses_aux_map(const struct anv_device * device,const struct anv_image * image,uint32_t plane)5616 anv_image_plane_uses_aux_map(const struct anv_device *device,
5617 const struct anv_image *image,
5618 uint32_t plane)
5619 {
5620 return device->info->has_aux_map &&
5621 isl_aux_usage_has_ccs(image->planes[plane].aux_usage);
5622 }
5623
5624 static inline bool
anv_image_uses_aux_map(const struct anv_device * device,const struct anv_image * image)5625 anv_image_uses_aux_map(const struct anv_device *device,
5626 const struct anv_image *image)
5627 {
5628 for (uint32_t p = 0; p < image->n_planes; ++p) {
5629 if (anv_image_plane_uses_aux_map(device, image, p))
5630 return true;
5631 }
5632
5633 return false;
5634 }
5635
5636 static inline bool
anv_bo_allows_aux_map(const struct anv_device * device,const struct anv_bo * bo)5637 anv_bo_allows_aux_map(const struct anv_device *device,
5638 const struct anv_bo *bo)
5639 {
5640 if (device->aux_map_ctx == NULL)
5641 return false;
5642
5643 return (bo->alloc_flags & ANV_BO_ALLOC_AUX_TT_ALIGNED) != 0;
5644 }
5645
5646 static inline bool
anv_address_allows_aux_map(const struct anv_device * device,struct anv_address addr)5647 anv_address_allows_aux_map(const struct anv_device *device,
5648 struct anv_address addr)
5649 {
5650 if (device->aux_map_ctx == NULL)
5651 return false;
5652
5653 /* Technically, we really only care about what offset the image is bound
5654 * into on the BO, but we don't have that information here. As a heuristic,
5655 * rely on the BO offset instead.
5656 */
5657 if (anv_address_physical(addr) %
5658 intel_aux_map_get_alignment(device->aux_map_ctx) != 0)
5659 return false;
5660
5661 return true;
5662 }
5663
5664 void
5665 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
5666 const struct anv_image *image,
5667 VkImageAspectFlagBits aspect,
5668 enum isl_aux_usage aux_usage,
5669 uint32_t level,
5670 uint32_t base_layer,
5671 uint32_t layer_count);
5672
5673 void
5674 anv_cmd_buffer_mark_image_fast_cleared(struct anv_cmd_buffer *cmd_buffer,
5675 const struct anv_image *image,
5676 const enum isl_format format,
5677 union isl_color_value clear_color);
5678
5679 void
5680 anv_cmd_buffer_load_clear_color_from_image(struct anv_cmd_buffer *cmd_buffer,
5681 struct anv_state state,
5682 const struct anv_image *image);
5683
5684 struct anv_image_binding *
5685 anv_image_aspect_to_binding(struct anv_image *image,
5686 VkImageAspectFlags aspect);
5687
5688 void
5689 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
5690 const struct anv_image *image,
5691 VkImageAspectFlagBits aspect,
5692 enum isl_aux_usage aux_usage,
5693 enum isl_format format, struct isl_swizzle swizzle,
5694 uint32_t level, uint32_t base_layer, uint32_t layer_count,
5695 VkRect2D area, union isl_color_value clear_color);
5696 void
5697 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
5698 const struct anv_image *image,
5699 VkImageAspectFlags aspects,
5700 enum isl_aux_usage depth_aux_usage,
5701 uint32_t level,
5702 uint32_t base_layer, uint32_t layer_count,
5703 VkRect2D area,
5704 const VkClearDepthStencilValue *clear_value);
5705 void
5706 anv_attachment_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
5707 const struct anv_attachment *att,
5708 VkImageLayout layout,
5709 VkImageAspectFlagBits aspect);
5710
5711 static inline union isl_color_value
anv_image_hiz_clear_value(const struct anv_image * image)5712 anv_image_hiz_clear_value(const struct anv_image *image)
5713 {
5714 /* The benchmarks we're tracking tend to prefer clearing depth buffers to
5715 * 0.0f when the depth buffers are part of images with multiple aspects.
5716 * Otherwise, they tend to prefer clearing depth buffers to 1.0f.
5717 */
5718 if (image->n_planes == 2)
5719 return (union isl_color_value) { .f32 = { 0.0f, } };
5720 else
5721 return (union isl_color_value) { .f32 = { 1.0f, } };
5722 }
5723
5724 void
5725 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
5726 const struct anv_image *image,
5727 VkImageAspectFlagBits aspect, uint32_t level,
5728 uint32_t base_layer, uint32_t layer_count,
5729 enum isl_aux_op hiz_op);
5730 void
5731 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
5732 const struct anv_image *image,
5733 VkImageAspectFlags aspects,
5734 uint32_t level,
5735 uint32_t base_layer, uint32_t layer_count,
5736 VkRect2D area,
5737 const VkClearDepthStencilValue *clear_value);
5738 void
5739 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
5740 const struct anv_image *image,
5741 enum isl_format format, struct isl_swizzle swizzle,
5742 VkImageAspectFlagBits aspect,
5743 uint32_t base_layer, uint32_t layer_count,
5744 enum isl_aux_op mcs_op, union isl_color_value *clear_value,
5745 bool predicate);
5746 void
5747 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
5748 const struct anv_image *image,
5749 enum isl_format format, struct isl_swizzle swizzle,
5750 VkImageAspectFlagBits aspect, uint32_t level,
5751 uint32_t base_layer, uint32_t layer_count,
5752 enum isl_aux_op ccs_op, union isl_color_value *clear_value,
5753 bool predicate);
5754
5755 isl_surf_usage_flags_t
5756 anv_image_choose_isl_surf_usage(struct anv_physical_device *device,
5757 VkImageCreateFlags vk_create_flags,
5758 VkImageUsageFlags vk_usage,
5759 isl_surf_usage_flags_t isl_extra_usage,
5760 VkImageAspectFlagBits aspect,
5761 VkImageCompressionFlagsEXT comp_flags);
5762
5763 void
5764 anv_cmd_buffer_fill_area(struct anv_cmd_buffer *cmd_buffer,
5765 struct anv_address address,
5766 VkDeviceSize size,
5767 uint32_t data,
5768 bool protected);
5769
5770 VkResult
5771 anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer *cmd_buffer);
5772
5773 bool
5774 anv_can_hiz_clear_ds_view(struct anv_device *device,
5775 const struct anv_image_view *iview,
5776 VkImageLayout layout,
5777 VkImageAspectFlags clear_aspects,
5778 float depth_clear_value,
5779 VkRect2D render_area,
5780 const VkQueueFlagBits queue_flags);
5781
5782 bool
5783 anv_can_fast_clear_color_view(struct anv_device *device,
5784 struct anv_image_view *iview,
5785 VkImageLayout layout,
5786 union isl_color_value clear_color,
5787 uint32_t num_layers,
5788 VkRect2D render_area,
5789 const VkQueueFlagBits queue_flags);
5790
5791 enum isl_aux_state ATTRIBUTE_PURE
5792 anv_layout_to_aux_state(const struct intel_device_info * const devinfo,
5793 const struct anv_image *image,
5794 const VkImageAspectFlagBits aspect,
5795 const VkImageLayout layout,
5796 const VkQueueFlagBits queue_flags);
5797
5798 enum isl_aux_usage ATTRIBUTE_PURE
5799 anv_layout_to_aux_usage(const struct intel_device_info * const devinfo,
5800 const struct anv_image *image,
5801 const VkImageAspectFlagBits aspect,
5802 const VkImageUsageFlagBits usage,
5803 const VkImageLayout layout,
5804 const VkQueueFlagBits queue_flags);
5805
5806 enum anv_fast_clear_type ATTRIBUTE_PURE
5807 anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo,
5808 const struct anv_image * const image,
5809 const VkImageAspectFlagBits aspect,
5810 const VkImageLayout layout,
5811 const VkQueueFlagBits queue_flags);
5812
5813 bool ATTRIBUTE_PURE
5814 anv_layout_has_untracked_aux_writes(const struct intel_device_info * const devinfo,
5815 const struct anv_image * const image,
5816 const VkImageAspectFlagBits aspect,
5817 const VkImageLayout layout,
5818 const VkQueueFlagBits queue_flags);
5819
5820 static inline bool
anv_image_aspects_compatible(VkImageAspectFlags aspects1,VkImageAspectFlags aspects2)5821 anv_image_aspects_compatible(VkImageAspectFlags aspects1,
5822 VkImageAspectFlags aspects2)
5823 {
5824 if (aspects1 == aspects2)
5825 return true;
5826
5827 /* Only 1 color aspects are compatibles. */
5828 if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
5829 (aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
5830 util_bitcount(aspects1) == util_bitcount(aspects2))
5831 return true;
5832
5833 return false;
5834 }
5835
5836 struct anv_image_view {
5837 struct vk_image_view vk;
5838
5839 const struct anv_image *image; /**< VkImageViewCreateInfo::image */
5840
5841 unsigned n_planes;
5842
5843 /**
5844 * True if the surface states (if any) are owned by some anv_state_stream
5845 * from internal_surface_state_pool.
5846 */
5847 bool use_surface_state_stream;
5848
5849 struct {
5850 struct isl_view isl;
5851
5852 /**
5853 * A version of the image view for storage usage (can apply 3D image
5854 * slicing).
5855 */
5856 struct isl_view isl_storage;
5857
5858 /**
5859 * RENDER_SURFACE_STATE when using image as a sampler surface with an
5860 * image layout of SHADER_READ_ONLY_OPTIMAL or
5861 * DEPTH_STENCIL_READ_ONLY_OPTIMAL.
5862 */
5863 struct anv_surface_state optimal_sampler;
5864
5865 /**
5866 * RENDER_SURFACE_STATE when using image as a sampler surface with an
5867 * image layout of GENERAL.
5868 */
5869 struct anv_surface_state general_sampler;
5870
5871 /**
5872 * RENDER_SURFACE_STATE when using image as a storage image.
5873 */
5874 struct anv_surface_state storage;
5875 } planes[3];
5876 };
5877
5878 enum anv_image_view_state_flags {
5879 ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL = (1 << 0),
5880 };
5881
5882 void anv_image_fill_surface_state(struct anv_device *device,
5883 const struct anv_image *image,
5884 VkImageAspectFlagBits aspect,
5885 const struct isl_view *view,
5886 isl_surf_usage_flags_t view_usage,
5887 enum isl_aux_usage aux_usage,
5888 const union isl_color_value *clear_color,
5889 enum anv_image_view_state_flags flags,
5890 struct anv_surface_state *state_inout);
5891
5892
5893 static inline const struct anv_surface_state *
anv_image_view_texture_surface_state(const struct anv_image_view * iview,uint32_t plane,VkImageLayout layout)5894 anv_image_view_texture_surface_state(const struct anv_image_view *iview,
5895 uint32_t plane, VkImageLayout layout)
5896 {
5897 return layout == VK_IMAGE_LAYOUT_GENERAL ?
5898 &iview->planes[plane].general_sampler :
5899 &iview->planes[plane].optimal_sampler;
5900 }
5901
5902 static inline const struct anv_surface_state *
anv_image_view_storage_surface_state(const struct anv_image_view * iview)5903 anv_image_view_storage_surface_state(const struct anv_image_view *iview)
5904 {
5905 return &iview->planes[0].storage;
5906 }
5907
5908 static inline bool
anv_cmd_graphics_state_has_image_as_attachment(const struct anv_cmd_graphics_state * state,const struct anv_image * image)5909 anv_cmd_graphics_state_has_image_as_attachment(const struct anv_cmd_graphics_state *state,
5910 const struct anv_image *image)
5911 {
5912 for (unsigned a = 0; a < state->color_att_count; a++) {
5913 if (state->color_att[a].iview &&
5914 state->color_att[a].iview->image == image)
5915 return true;
5916 }
5917
5918 if (state->depth_att.iview && state->depth_att.iview->image == image)
5919 return true;
5920 if (state->stencil_att.iview && state->stencil_att.iview->image == image)
5921 return true;
5922
5923 return false;
5924 }
5925
5926 struct anv_image_create_info {
5927 const VkImageCreateInfo *vk_info;
5928
5929 /** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */
5930 isl_tiling_flags_t isl_tiling_flags;
5931
5932 /** These flags will be added to any derived from VkImageCreateInfo. */
5933 isl_surf_usage_flags_t isl_extra_usage_flags;
5934
5935 /** An opt-in stride in pixels, should be 0 for implicit layouts */
5936 uint32_t stride;
5937
5938 /** Whether to allocate private binding */
5939 bool no_private_binding_alloc;
5940 };
5941
5942 VkResult anv_image_init(struct anv_device *device, struct anv_image *image,
5943 const struct anv_image_create_info *create_info);
5944
5945 void anv_image_finish(struct anv_image *image);
5946
5947 void anv_image_get_memory_requirements(struct anv_device *device,
5948 struct anv_image *image,
5949 VkImageAspectFlags aspects,
5950 VkMemoryRequirements2 *pMemoryRequirements);
5951
5952 void anv_image_view_init(struct anv_device *device,
5953 struct anv_image_view *iview,
5954 const VkImageViewCreateInfo *pCreateInfo,
5955 struct anv_state_stream *state_stream);
5956
5957 void anv_image_view_finish(struct anv_image_view *iview);
5958
5959 enum isl_format
5960 anv_isl_format_for_descriptor_type(const struct anv_device *device,
5961 VkDescriptorType type);
5962
5963 static inline isl_surf_usage_flags_t
anv_isl_usage_for_descriptor_type(const VkDescriptorType type)5964 anv_isl_usage_for_descriptor_type(const VkDescriptorType type)
5965 {
5966 switch(type) {
5967 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
5968 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
5969 return ISL_SURF_USAGE_CONSTANT_BUFFER_BIT;
5970 default:
5971 return ISL_SURF_USAGE_STORAGE_BIT;
5972 }
5973 }
5974
5975 static inline uint32_t
anv_rasterization_aa_mode(VkPolygonMode raster_mode,VkLineRasterizationModeKHR line_mode)5976 anv_rasterization_aa_mode(VkPolygonMode raster_mode,
5977 VkLineRasterizationModeKHR line_mode)
5978 {
5979 if (raster_mode == VK_POLYGON_MODE_LINE &&
5980 line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR)
5981 return true;
5982 return false;
5983 }
5984
5985 static inline VkLineRasterizationModeKHR
anv_line_rasterization_mode(VkLineRasterizationModeKHR line_mode,unsigned rasterization_samples)5986 anv_line_rasterization_mode(VkLineRasterizationModeKHR line_mode,
5987 unsigned rasterization_samples)
5988 {
5989 if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_KHR) {
5990 if (rasterization_samples > 1) {
5991 return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_KHR;
5992 } else {
5993 return VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR;
5994 }
5995 }
5996 return line_mode;
5997 }
5998
5999 static inline bool
anv_is_dual_src_blend_factor(VkBlendFactor factor)6000 anv_is_dual_src_blend_factor(VkBlendFactor factor)
6001 {
6002 return factor == VK_BLEND_FACTOR_SRC1_COLOR ||
6003 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR ||
6004 factor == VK_BLEND_FACTOR_SRC1_ALPHA ||
6005 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
6006 }
6007
6008 static inline bool
anv_is_dual_src_blend_equation(const struct vk_color_blend_attachment_state * cb)6009 anv_is_dual_src_blend_equation(const struct vk_color_blend_attachment_state *cb)
6010 {
6011 return anv_is_dual_src_blend_factor(cb->src_color_blend_factor) &&
6012 anv_is_dual_src_blend_factor(cb->dst_color_blend_factor) &&
6013 anv_is_dual_src_blend_factor(cb->src_alpha_blend_factor) &&
6014 anv_is_dual_src_blend_factor(cb->dst_alpha_blend_factor);
6015 }
6016
6017 VkFormatFeatureFlags2
6018 anv_get_image_format_features2(const struct anv_physical_device *physical_device,
6019 VkFormat vk_format,
6020 const struct anv_format *anv_format,
6021 VkImageTiling vk_tiling,
6022 const struct isl_drm_modifier_info *isl_mod_info);
6023
6024 void anv_fill_buffer_surface_state(struct anv_device *device,
6025 void *surface_state_ptr,
6026 enum isl_format format,
6027 struct isl_swizzle swizzle,
6028 isl_surf_usage_flags_t usage,
6029 struct anv_address address,
6030 uint32_t range, uint32_t stride);
6031
6032
6033 struct gfx8_border_color {
6034 union {
6035 float float32[4];
6036 uint32_t uint32[4];
6037 };
6038 /* Pad out to 64 bytes */
6039 uint32_t _pad[12];
6040 };
6041
6042 struct anv_sampler {
6043 struct vk_sampler vk;
6044
6045 /* Hash of the sampler state + border color, useful for embedded samplers
6046 * and included in the descriptor layout hash.
6047 */
6048 unsigned char sha1[20];
6049
6050 uint32_t state[3][4];
6051 /* Packed SAMPLER_STATE without the border color pointer. */
6052 uint32_t state_no_bc[3][4];
6053 uint32_t n_planes;
6054
6055 /* Blob of sampler state data which is guaranteed to be 32-byte aligned
6056 * and with a 32-byte stride for use as bindless samplers.
6057 */
6058 struct anv_state bindless_state;
6059
6060 struct anv_state custom_border_color;
6061 };
6062
6063
6064 struct anv_query_pool {
6065 struct vk_query_pool vk;
6066
6067 /** Stride between queries, in bytes */
6068 uint32_t stride;
6069 /** Number of slots in this query pool */
6070 struct anv_bo * bo;
6071
6072 /** Location for the KHR_performance_query small batch updating
6073 * ANV_PERF_QUERY_OFFSET_REG
6074 */
6075 uint32_t khr_perf_preambles_offset;
6076
6077 /** Size of each small batch */
6078 uint32_t khr_perf_preamble_stride;
6079
6080 /* KHR perf queries : */
6081 /** Query pass size in bytes(availability + padding + query data) */
6082 uint32_t pass_size;
6083 /** Offset of the query data within a pass */
6084 uint32_t data_offset;
6085 /** query data / 2 */
6086 uint32_t snapshot_size;
6087 uint32_t n_counters;
6088 struct intel_perf_counter_pass *counter_pass;
6089 uint32_t n_passes;
6090 struct intel_perf_query_info **pass_query;
6091
6092 /* Video encoding queries */
6093 VkVideoCodecOperationFlagsKHR codec;
6094 };
6095
khr_perf_query_preamble_offset(const struct anv_query_pool * pool,uint32_t pass)6096 static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_pool *pool,
6097 uint32_t pass)
6098 {
6099 return pool->khr_perf_preambles_offset +
6100 pool->khr_perf_preamble_stride * pass;
6101 }
6102
6103 struct anv_vid_mem {
6104 struct anv_device_memory *mem;
6105 VkDeviceSize offset;
6106 VkDeviceSize size;
6107 };
6108
6109 #define ANV_MB_WIDTH 16
6110 #define ANV_MB_HEIGHT 16
6111 #define ANV_VIDEO_H264_MAX_NUM_REF_FRAME 16
6112 #define ANV_VIDEO_H265_MAX_NUM_REF_FRAME 16
6113 #define ANV_VIDEO_H265_HCP_NUM_REF_FRAME 8
6114 #define ANV_MAX_H265_CTB_SIZE 64
6115
6116 enum anv_vid_mem_h264_types {
6117 ANV_VID_MEM_H264_INTRA_ROW_STORE,
6118 ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE,
6119 ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH,
6120 ANV_VID_MEM_H264_MPR_ROW_SCRATCH,
6121 ANV_VID_MEM_H264_MAX,
6122 };
6123
6124 enum anv_vid_mem_h265_types {
6125 ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE,
6126 ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE,
6127 ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN,
6128 ANV_VID_MEM_H265_METADATA_LINE,
6129 ANV_VID_MEM_H265_METADATA_TILE_LINE,
6130 ANV_VID_MEM_H265_METADATA_TILE_COLUMN,
6131 ANV_VID_MEM_H265_SAO_LINE,
6132 ANV_VID_MEM_H265_SAO_TILE_LINE,
6133 ANV_VID_MEM_H265_SAO_TILE_COLUMN,
6134 ANV_VID_MEM_H265_DEC_MAX,
6135 ANV_VID_MEM_H265_SSE_SRC_PIX_ROW_STORE = ANV_VID_MEM_H265_DEC_MAX,
6136 ANV_VID_MEM_H265_ENC_MAX,
6137 };
6138
6139 struct anv_video_session {
6140 struct vk_video_session vk;
6141
6142 /* the decoder needs some private memory allocations */
6143 struct anv_vid_mem vid_mem[ANV_VID_MEM_H265_ENC_MAX];
6144 };
6145
6146 struct anv_video_session_params {
6147 struct vk_video_session_parameters vk;
6148 VkVideoEncodeRateControlModeFlagBitsKHR rc_mode;
6149 };
6150
6151 void
6152 anv_dump_pipe_bits(enum anv_pipe_bits bits, FILE *f);
6153
6154 static inline void
anv_add_pending_pipe_bits(struct anv_cmd_buffer * cmd_buffer,enum anv_pipe_bits bits,const char * reason)6155 anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer,
6156 enum anv_pipe_bits bits,
6157 const char* reason)
6158 {
6159 cmd_buffer->state.pending_pipe_bits |= bits;
6160 if (INTEL_DEBUG(DEBUG_PIPE_CONTROL) && bits) {
6161 fputs("pc: add ", stdout);
6162 anv_dump_pipe_bits(bits, stdout);
6163 fprintf(stdout, "reason: %s\n", reason);
6164 }
6165 if (cmd_buffer->batch.pc_reasons_count < ARRAY_SIZE(cmd_buffer->batch.pc_reasons)) {
6166 cmd_buffer->batch.pc_reasons[cmd_buffer->batch.pc_reasons_count++] = reason;
6167 }
6168 }
6169
6170 struct anv_performance_configuration_intel {
6171 struct vk_object_base base;
6172
6173 struct intel_perf_registers *register_config;
6174
6175 uint64_t config_id;
6176 };
6177
6178 void anv_physical_device_init_va_ranges(struct anv_physical_device *device);
6179 void anv_physical_device_init_perf(struct anv_physical_device *device, int fd);
6180 void anv_device_perf_init(struct anv_device *device);
6181 void anv_device_perf_close(struct anv_device *device);
6182 void anv_perf_write_pass_results(struct intel_perf_config *perf,
6183 struct anv_query_pool *pool, uint32_t pass,
6184 const struct intel_perf_query_result *accumulated_results,
6185 union VkPerformanceCounterResultKHR *results);
6186
6187 void anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir,
6188 struct nir_shader *fs_nir,
6189 struct anv_device *device,
6190 const VkGraphicsPipelineCreateInfo *info);
6191
6192 /* Use to emit a series of memcpy operations */
6193 struct anv_memcpy_state {
6194 struct anv_device *device;
6195 struct anv_cmd_buffer *cmd_buffer;
6196 struct anv_batch *batch;
6197
6198 /* Configuration programmed by the memcpy operation */
6199 struct intel_urb_config urb_cfg;
6200
6201 struct anv_vb_cache_range vb_bound;
6202 struct anv_vb_cache_range vb_dirty;
6203 };
6204
6205 VkResult anv_device_init_internal_kernels(struct anv_device *device);
6206 void anv_device_finish_internal_kernels(struct anv_device *device);
6207 VkResult anv_device_get_internal_shader(struct anv_device *device,
6208 enum anv_internal_kernel_name name,
6209 struct anv_shader_bin **out_bin);
6210
6211 VkResult anv_device_init_astc_emu(struct anv_device *device);
6212 void anv_device_finish_astc_emu(struct anv_device *device);
6213 void anv_astc_emu_process(struct anv_cmd_buffer *cmd_buffer,
6214 struct anv_image *image,
6215 VkImageLayout layout,
6216 const VkImageSubresourceLayers *subresource,
6217 VkOffset3D block_offset,
6218 VkExtent3D block_extent);
6219
6220 /* This structure is used in 2 scenarios :
6221 *
6222 * - copy utrace timestamps from command buffer so that command buffer can
6223 * be resubmitted multiple times without the recorded timestamps being
6224 * overwritten before they're read back
6225 *
6226 * - emit trace points for queue debug tagging
6227 * (vkQueueBeginDebugUtilsLabelEXT/vkQueueEndDebugUtilsLabelEXT)
6228 */
6229 struct anv_utrace_submit {
6230 struct anv_async_submit base;
6231
6232 /* structure used by the perfetto glue */
6233 struct intel_ds_flush_data ds;
6234
6235 /* Stream for temporary allocations */
6236 struct anv_state_stream dynamic_state_stream;
6237 struct anv_state_stream general_state_stream;
6238
6239 /* Last fully read 64bit timestamp (used to rebuild the upper bits of 32bit
6240 * timestamps)
6241 */
6242 uint64_t last_full_timestamp;
6243
6244 /* Memcpy state tracking (only used for timestamp copies on render engine) */
6245 struct anv_memcpy_state memcpy_state;
6246
6247 /* Memcpy state tracking (only used for timestamp copies on compute engine) */
6248 struct anv_simple_shader simple_state;
6249 };
6250
6251 void anv_device_utrace_init(struct anv_device *device);
6252 void anv_device_utrace_finish(struct anv_device *device);
6253 VkResult
6254 anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
6255 uint32_t cmd_buffer_count,
6256 struct anv_cmd_buffer **cmd_buffers,
6257 struct anv_utrace_submit **out_submit);
6258
6259 void
6260 anv_device_utrace_emit_gfx_copy_buffer(struct u_trace_context *utctx,
6261 void *cmdstream,
6262 void *ts_from, uint64_t from_offset_B,
6263 void *ts_to, uint64_t to_offset_B,
6264 uint64_t size_B);
6265
6266 static bool
anv_has_cooperative_matrix(const struct anv_physical_device * device)6267 anv_has_cooperative_matrix(const struct anv_physical_device *device)
6268 {
6269 return device->has_cooperative_matrix;
6270 }
6271
6272 #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
6273 VK_FROM_HANDLE(__anv_type, __name, __handle)
6274
6275 VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, vk.base, VkCommandBuffer,
6276 VK_OBJECT_TYPE_COMMAND_BUFFER)
6277 VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
6278 VK_DEFINE_HANDLE_CASTS(anv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
6279 VK_DEFINE_HANDLE_CASTS(anv_physical_device, vk.base, VkPhysicalDevice,
6280 VK_OBJECT_TYPE_PHYSICAL_DEVICE)
6281 VK_DEFINE_HANDLE_CASTS(anv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
6282
6283 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, vk.base, VkBuffer,
6284 VK_OBJECT_TYPE_BUFFER)
6285 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, vk.base, VkBufferView,
6286 VK_OBJECT_TYPE_BUFFER_VIEW)
6287 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool,
6288 VK_OBJECT_TYPE_DESCRIPTOR_POOL)
6289 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet,
6290 VK_OBJECT_TYPE_DESCRIPTOR_SET)
6291 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base,
6292 VkDescriptorSetLayout,
6293 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
6294 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, vk.base, VkDeviceMemory,
6295 VK_OBJECT_TYPE_DEVICE_MEMORY)
6296 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
6297 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
6298 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, vk.base, VkImageView,
6299 VK_OBJECT_TYPE_IMAGE_VIEW);
6300 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,
6301 VK_OBJECT_TYPE_PIPELINE)
6302 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,
6303 VK_OBJECT_TYPE_PIPELINE_LAYOUT)
6304 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, vk.base, VkQueryPool,
6305 VK_OBJECT_TYPE_QUERY_POOL)
6306 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, vk.base, VkSampler,
6307 VK_OBJECT_TYPE_SAMPLER)
6308 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
6309 VkPerformanceConfigurationINTEL,
6310 VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL)
6311 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_video_session, vk.base,
6312 VkVideoSessionKHR,
6313 VK_OBJECT_TYPE_VIDEO_SESSION_KHR)
6314 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_video_session_params, vk.base,
6315 VkVideoSessionParametersKHR,
6316 VK_OBJECT_TYPE_VIDEO_SESSION_PARAMETERS_KHR)
6317
6318 #define anv_genX(devinfo, thing) ({ \
6319 __typeof(&gfx9_##thing) genX_thing; \
6320 switch ((devinfo)->verx10) { \
6321 case 90: \
6322 genX_thing = &gfx9_##thing; \
6323 break; \
6324 case 110: \
6325 genX_thing = &gfx11_##thing; \
6326 break; \
6327 case 120: \
6328 genX_thing = &gfx12_##thing; \
6329 break; \
6330 case 125: \
6331 genX_thing = &gfx125_##thing; \
6332 break; \
6333 case 200: \
6334 genX_thing = &gfx20_##thing; \
6335 break; \
6336 default: \
6337 unreachable("Unknown hardware generation"); \
6338 } \
6339 genX_thing; \
6340 })
6341
6342 /* Gen-specific function declarations */
6343 #ifdef genX
6344 # include "anv_genX.h"
6345 #else
6346 # define genX(x) gfx9_##x
6347 # include "anv_genX.h"
6348 # undef genX
6349 # define genX(x) gfx11_##x
6350 # include "anv_genX.h"
6351 # undef genX
6352 # define genX(x) gfx12_##x
6353 # include "anv_genX.h"
6354 # undef genX
6355 # define genX(x) gfx125_##x
6356 # include "anv_genX.h"
6357 # undef genX
6358 # define genX(x) gfx20_##x
6359 # include "anv_genX.h"
6360 # undef genX
6361 #endif
6362
6363 #ifdef __cplusplus
6364 }
6365 #endif
6366
6367 #endif /* ANV_PRIVATE_H */
6368