xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/anv_private.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef ANV_PRIVATE_H
25 #define ANV_PRIVATE_H
26 
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdbool.h>
30 #include <pthread.h>
31 #include <assert.h>
32 #include <stdint.h>
33 #include "drm-uapi/drm_fourcc.h"
34 
35 #ifdef HAVE_VALGRIND
36 #include <valgrind.h>
37 #include <memcheck.h>
38 #define VG(x) x
39 #else
40 #define VG(x) ((void)0)
41 #endif
42 
43 #include "common/intel_aux_map.h"
44 #include "common/intel_bind_timeline.h"
45 #include "common/intel_engine.h"
46 #include "common/intel_gem.h"
47 #include "common/intel_l3_config.h"
48 #include "common/intel_measure.h"
49 #include "common/intel_mem.h"
50 #include "common/intel_sample_positions.h"
51 #include "decoder/intel_decoder.h"
52 #include "dev/intel_device_info.h"
53 #include "blorp/blorp.h"
54 #include "compiler/brw_compiler.h"
55 #include "compiler/brw_kernel.h"
56 #include "compiler/brw_rt.h"
57 #include "ds/intel_driver_ds.h"
58 #include "util/bitset.h"
59 #include "util/bitscan.h"
60 #include "util/detect_os.h"
61 #include "util/macros.h"
62 #include "util/hash_table.h"
63 #include "util/list.h"
64 #include "util/perf/u_trace.h"
65 #include "util/set.h"
66 #include "util/sparse_array.h"
67 #include "util/u_atomic.h"
68 #if DETECT_OS_ANDROID
69 #include "util/u_gralloc/u_gralloc.h"
70 #endif
71 #include "util/u_vector.h"
72 #include "util/u_math.h"
73 #include "util/vma.h"
74 #include "util/xmlconfig.h"
75 #include "vk_acceleration_structure.h"
76 #include "vk_alloc.h"
77 #include "vk_buffer.h"
78 #include "vk_buffer_view.h"
79 #include "vk_command_buffer.h"
80 #include "vk_command_pool.h"
81 #include "vk_debug_report.h"
82 #include "vk_descriptor_update_template.h"
83 #include "vk_device.h"
84 #include "vk_device_memory.h"
85 #include "vk_drm_syncobj.h"
86 #include "vk_enum_defines.h"
87 #include "vk_format.h"
88 #include "vk_framebuffer.h"
89 #include "vk_graphics_state.h"
90 #include "vk_image.h"
91 #include "vk_instance.h"
92 #include "vk_pipeline_cache.h"
93 #include "vk_physical_device.h"
94 #include "vk_sampler.h"
95 #include "vk_shader_module.h"
96 #include "vk_sync.h"
97 #include "vk_sync_timeline.h"
98 #include "vk_texcompress_astc.h"
99 #include "vk_util.h"
100 #include "vk_query_pool.h"
101 #include "vk_queue.h"
102 #include "vk_log.h"
103 #include "vk_ycbcr_conversion.h"
104 #include "vk_video.h"
105 
106 #ifdef __cplusplus
107 extern "C" {
108 #endif
109 
110 /* Pre-declarations needed for WSI entrypoints */
111 struct wl_surface;
112 struct wl_display;
113 typedef struct xcb_connection_t xcb_connection_t;
114 typedef uint32_t xcb_visualid_t;
115 typedef uint32_t xcb_window_t;
116 
117 struct anv_batch;
118 struct anv_buffer;
119 struct anv_buffer_view;
120 struct anv_image_view;
121 struct anv_instance;
122 
123 struct intel_aux_map_context;
124 struct intel_perf_config;
125 struct intel_perf_counter_pass;
126 struct intel_perf_query_result;
127 
128 #include <vulkan/vulkan.h>
129 #include <vulkan/vk_icd.h>
130 
131 #include "anv_android.h"
132 #include "anv_entrypoints.h"
133 #include "anv_kmd_backend.h"
134 #include "anv_rmv.h"
135 #include "isl/isl.h"
136 
137 #include "dev/intel_debug.h"
138 #undef MESA_LOG_TAG
139 #define MESA_LOG_TAG "MESA-INTEL"
140 #include "util/log.h"
141 #include "wsi_common.h"
142 
143 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
144 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
145 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
146 #endif
147 
148 #define NSEC_PER_SEC 1000000000ull
149 
150 #define BINDING_TABLE_POOL_BLOCK_SIZE (65536)
151 
152 /* 3DSTATE_VERTEX_BUFFER supports 33 VBs, we use 2 for base & drawid SGVs */
153 #define MAX_VBS         (33 - 2)
154 
155 /* 3DSTATE_VERTEX_ELEMENTS supports up to 34 VEs, but our backend compiler
156  * only supports the push model of VS inputs, and we only have 128 GRFs,
157  * minus the g0 and g1 payload, which gives us a maximum of 31 VEs.  Plus,
158  * we use two of them for SGVs.
159  */
160 #define MAX_VES         (31 - 2)
161 
162 #define MAX_XFB_BUFFERS  4
163 #define MAX_XFB_STREAMS  4
164 #define MAX_SETS         8
165 #define MAX_RTS          8
166 #define MAX_VIEWPORTS   16
167 #define MAX_SCISSORS    16
168 #define MAX_PUSH_CONSTANTS_SIZE 128
169 #define MAX_DYNAMIC_BUFFERS 16
170 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
171 #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
172 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
173 #define MAX_EMBEDDED_SAMPLERS 2048
174 #define MAX_CUSTOM_BORDER_COLORS 4096
175 /* We need 16 for UBO block reads to work and 32 for push UBOs. However, we
176  * use 64 here to avoid cache issues. This could most likely bring it back to
177  * 32 if we had different virtual addresses for the different views on a given
178  * GEM object.
179  */
180 #define ANV_UBO_ALIGNMENT 64
181 #define ANV_SSBO_ALIGNMENT 4
182 #define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4
183 #define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16
184 #define MAX_SAMPLE_LOCATIONS 16
185 
186 /* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64
187  * and we can't put anything else there we use 64b.
188  */
189 #define ANV_SURFACE_STATE_SIZE (64)
190 
191 /* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
192  *
193  *    "The surface state model is used when a Binding Table Index (specified
194  *    in the message descriptor) of less than 240 is specified. In this model,
195  *    the Binding Table Index is used to index into the binding table, and the
196  *    binding table entry contains a pointer to the SURFACE_STATE."
197  *
198  * Binding table values above 240 are used for various things in the hardware
199  * such as stateless, stateless with incoherent cache, SLM, and bindless.
200  */
201 #define MAX_BINDING_TABLE_SIZE 240
202 
203 #define ANV_SVGS_VB_INDEX    MAX_VBS
204 #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
205 
206 /* We reserve this MI ALU register for the purpose of handling predication.
207  * Other code which uses the MI ALU should leave it alone.
208  */
209 #define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
210 
211 /* We reserve this MI ALU register to pass around an offset computed from
212  * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.
213  * Other code which uses the MI ALU should leave it alone.
214  */
215 #define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
216 
217 /* We reserve this MI ALU register to hold the last programmed bindless
218  * surface state base address so that we can predicate STATE_BASE_ADDRESS
219  * emissions if the address doesn't change.
220  */
221 #define ANV_BINDLESS_SURFACE_BASE_ADDR_REG 0x2668 /* MI_ALU_REG13 */
222 
223 #define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1)
224 
225 /* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64
226  * and we can't put anything else there we use 64b.
227  */
228 #define ANV_SURFACE_STATE_SIZE (64)
229 #define ANV_SAMPLER_STATE_SIZE (32)
230 
231 /* For gfx12 we set the streamout buffers using 4 separate commands
232  * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
233  * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
234  * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the
235  * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode.
236  * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for
237  * 3DSTATE_SO_BUFFER_INDEX_0.
238  */
239 #define SO_BUFFER_INDEX_0_CMD 0x60
240 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
241 
242 /* The TR-TT L1 page table entries may contain these values instead of actual
243  * pointers to indicate the regions are either NULL or invalid. We program
244  * these values to TR-TT registers, so we could change them, but it's super
245  * convenient to have the NULL value be 0 because everything is
246  * zero-initialized when allocated.
247  *
248  * Since we reserve these values for NULL/INVALID, then we can't use them as
249  * destinations for TR-TT address translation. Both values are shifted by 16
250  * bits, wich results in graphic addresses 0 and 64k. On Anv the first vma
251  * starts at 2MB, so we already don't use 0 and 64k for anything, so there's
252  * nothing really to reserve. We could instead just reserve random 64kb
253  * ranges from any of the non-TR-TT vmas and use their addresses.
254  */
255 #define ANV_TRTT_L1_NULL_TILE_VAL 0
256 #define ANV_TRTT_L1_INVALID_TILE_VAL 1
257 
258 static inline uint32_t
align_down_npot_u32(uint32_t v,uint32_t a)259 align_down_npot_u32(uint32_t v, uint32_t a)
260 {
261    return v - (v % a);
262 }
263 
264 /** Alignment must be a power of 2. */
265 static inline bool
anv_is_aligned(uintmax_t n,uintmax_t a)266 anv_is_aligned(uintmax_t n, uintmax_t a)
267 {
268    assert(a == (a & -a));
269    return (n & (a - 1)) == 0;
270 }
271 
272 static inline union isl_color_value
vk_to_isl_color(VkClearColorValue color)273 vk_to_isl_color(VkClearColorValue color)
274 {
275    return (union isl_color_value) {
276       .u32 = {
277          color.uint32[0],
278          color.uint32[1],
279          color.uint32[2],
280          color.uint32[3],
281       },
282    };
283 }
284 
285 static inline union isl_color_value
vk_to_isl_color_with_format(VkClearColorValue color,enum isl_format format)286 vk_to_isl_color_with_format(VkClearColorValue color, enum isl_format format)
287 {
288    const struct isl_format_layout *fmtl = isl_format_get_layout(format);
289    union isl_color_value isl_color = { .u32 = {0, } };
290 
291 #define COPY_COLOR_CHANNEL(c, i) \
292    if (fmtl->channels.c.bits) \
293       isl_color.u32[i] = color.uint32[i]
294 
295    COPY_COLOR_CHANNEL(r, 0);
296    COPY_COLOR_CHANNEL(g, 1);
297    COPY_COLOR_CHANNEL(b, 2);
298    COPY_COLOR_CHANNEL(a, 3);
299 
300 #undef COPY_COLOR_CHANNEL
301 
302    return isl_color;
303 }
304 
305 void __anv_perf_warn(struct anv_device *device,
306                      const struct vk_object_base *object,
307                      const char *file, int line, const char *format, ...)
308    anv_printflike(5, 6);
309 
310 /**
311  * Print a FINISHME message, including its source location.
312  */
313 #define anv_finishme(format, ...) \
314    do { \
315       static bool reported = false; \
316       if (!reported) { \
317          mesa_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \
318                     ##__VA_ARGS__); \
319          reported = true; \
320       } \
321    } while (0)
322 
323 /**
324  * Print a perf warning message.  Set INTEL_DEBUG=perf to see these.
325  */
326 #define anv_perf_warn(objects_macro, format, ...)   \
327    do { \
328       static bool reported = false; \
329       if (!reported && INTEL_DEBUG(DEBUG_PERF)) { \
330          __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT,      \
331                   VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,      \
332                   objects_macro, __FILE__, __LINE__,                    \
333                   format, ## __VA_ARGS__);                              \
334          reported = true; \
335       } \
336    } while (0)
337 
338 /* A non-fatal assert.  Useful for debugging. */
339 #if MESA_DEBUG
340 #define anv_assert(x) ({ \
341    if (unlikely(!(x))) \
342       mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
343 })
344 #else
345 #define anv_assert(x)
346 #endif
347 
348 enum anv_bo_alloc_flags {
349    /** Specifies that the BO must have a 32-bit address
350     *
351     * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
352     */
353    ANV_BO_ALLOC_32BIT_ADDRESS =           (1 << 0),
354 
355    /** Specifies that the BO may be shared externally */
356    ANV_BO_ALLOC_EXTERNAL =                (1 << 1),
357 
358    /** Specifies that the BO should be mapped */
359    ANV_BO_ALLOC_MAPPED =                  (1 << 2),
360 
361    /** Specifies that the BO should be coherent.
362     *
363     * Note: In platforms with LLC where HOST_CACHED + HOST_COHERENT is free,
364     * bo can get upgraded to HOST_CACHED_COHERENT
365     */
366    ANV_BO_ALLOC_HOST_COHERENT =           (1 << 3),
367 
368    /** Specifies that the BO should be captured in error states */
369    ANV_BO_ALLOC_CAPTURE =                 (1 << 4),
370 
371    /** Specifies that the BO will have an address assigned by the caller
372     *
373     * Such BOs do not exist in any VMA heap.
374     */
375    ANV_BO_ALLOC_FIXED_ADDRESS =           (1 << 5),
376 
377    /** Enables implicit synchronization on the BO
378     *
379     * This is the opposite of EXEC_OBJECT_ASYNC.
380     */
381    ANV_BO_ALLOC_IMPLICIT_SYNC =           (1 << 6),
382 
383    /** Enables implicit synchronization on the BO
384     *
385     * This is equivalent to EXEC_OBJECT_WRITE.
386     */
387    ANV_BO_ALLOC_IMPLICIT_WRITE =          (1 << 7),
388 
389    /** Has an address which is visible to the client */
390    ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS =  (1 << 8),
391 
392    /** Align the BO's virtual address to match AUX-TT requirements */
393    ANV_BO_ALLOC_AUX_TT_ALIGNED =          (1 << 9),
394 
395    /** This buffer is allocated from local memory and should be cpu visible */
396    ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE =   (1 << 10),
397 
398    /** For non device local allocations */
399    ANV_BO_ALLOC_NO_LOCAL_MEM =            (1 << 11),
400 
401    /** This buffer will be scanout to display */
402    ANV_BO_ALLOC_SCANOUT =                 (1 << 12),
403 
404    /** For descriptor pools */
405    ANV_BO_ALLOC_DESCRIPTOR_POOL =         (1 << 13),
406 
407    /** For buffers that will be bound using TR-TT.
408     *
409     * Not for buffers used as the TR-TT page tables.
410     */
411    ANV_BO_ALLOC_TRTT =                    (1 << 14),
412 
413    /** Protected buffer */
414    ANV_BO_ALLOC_PROTECTED =               (1 << 15),
415 
416    /** Specifies that the BO should be cached and incoherent. */
417    ANV_BO_ALLOC_HOST_CACHED =             (1 << 16),
418 
419    /** For buffer addressable from the dynamic state heap */
420    ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL =    (1 << 17),
421 
422    /** Specifies that the BO is imported.
423     *
424     * Imported BOs must also be marked as ANV_BO_ALLOC_EXTERNAL
425     */
426    ANV_BO_ALLOC_IMPORTED =                (1 << 18),
427 
428    /** Specify whether this BO is internal to the driver */
429    ANV_BO_ALLOC_INTERNAL =                (1 << 19),
430 
431    /** Allocate with CCS AUX requirements
432     *
433     * This pads the BO include CCS data mapppable through the AUX-TT and
434     * aligned to the AUX-TT requirements.
435     */
436    ANV_BO_ALLOC_AUX_CCS =                 (1 << 20),
437 
438    /** Compressed buffer, only supported in Xe2+ */
439    ANV_BO_ALLOC_COMPRESSED =              (1 << 21),
440 };
441 
442 /** Specifies that the BO should be cached and coherent. */
443 #define ANV_BO_ALLOC_HOST_CACHED_COHERENT (ANV_BO_ALLOC_HOST_COHERENT | \
444                                            ANV_BO_ALLOC_HOST_CACHED)
445 
446 
447 struct anv_bo {
448    const char *name;
449 
450    /* The VMA heap in anv_device from which this BO takes its offset.
451     *
452     * This can only be NULL when has_fixed_address is true.
453     */
454    struct util_vma_heap *vma_heap;
455 
456    /* All userptr bos in Xe KMD has gem_handle set to workaround_bo->gem_handle */
457    uint32_t gem_handle;
458 
459    uint32_t refcount;
460 
461    /* Index into the current validation list.  This is used by the
462     * validation list building algorithm to track which buffers are already
463     * in the validation list so that we can ensure uniqueness.
464     */
465    uint32_t exec_obj_index;
466 
467    /* Index for use with util_sparse_array_free_list */
468    uint32_t free_index;
469 
470    /* Last known offset.  This value is provided by the kernel when we
471     * execbuf and is used as the presumed offset for the next bunch of
472     * relocations, in canonical address format.
473     */
474    uint64_t offset;
475 
476    /** Size of the buffer */
477    uint64_t size;
478 
479    /** Offset at which the CCS data is stored */
480    uint64_t ccs_offset;
481 
482    /* Map for internally mapped BOs.
483     *
484     * If ANV_BO_ALLOC_MAPPED is set in flags, this is the map for the whole
485     * BO.
486     */
487    void *map;
488 
489    /* The actual size of bo allocated by kmd, basically:
490     * align(size, mem_alignment)
491     */
492    uint64_t actual_size;
493 
494    /** Flags to pass to the kernel through drm_i915_exec_object2::flags */
495    uint32_t flags;
496 
497    enum anv_bo_alloc_flags alloc_flags;
498 
499    /** True if this BO wraps a host pointer */
500    bool from_host_ptr:1;
501 
502    /** True if this BO is mapped in the GTT (only used for RMV) */
503    bool gtt_mapped:1;
504 };
505 
506 static inline bool
anv_bo_is_external(const struct anv_bo * bo)507 anv_bo_is_external(const struct anv_bo *bo)
508 {
509    return bo->alloc_flags & ANV_BO_ALLOC_EXTERNAL;
510 }
511 
512 static inline bool
anv_bo_is_vram_only(const struct anv_bo * bo)513 anv_bo_is_vram_only(const struct anv_bo *bo)
514 {
515    return !(bo->alloc_flags & (ANV_BO_ALLOC_NO_LOCAL_MEM |
516                                ANV_BO_ALLOC_MAPPED |
517                                ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE |
518                                ANV_BO_ALLOC_IMPORTED));
519 }
520 
521 static inline struct anv_bo *
anv_bo_ref(struct anv_bo * bo)522 anv_bo_ref(struct anv_bo *bo)
523 {
524    p_atomic_inc(&bo->refcount);
525    return bo;
526 }
527 
528 enum intel_device_info_mmap_mode
529 anv_bo_get_mmap_mode(struct anv_device *device, struct anv_bo *bo);
530 
531 static inline bool
anv_bo_needs_host_cache_flush(enum anv_bo_alloc_flags alloc_flags)532 anv_bo_needs_host_cache_flush(enum anv_bo_alloc_flags alloc_flags)
533 {
534    return (alloc_flags & (ANV_BO_ALLOC_HOST_CACHED | ANV_BO_ALLOC_HOST_COHERENT)) ==
535           ANV_BO_ALLOC_HOST_CACHED;
536 }
537 
538 struct anv_address {
539    struct anv_bo *bo;
540    int64_t offset;
541 };
542 
543 #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
544 
545 static inline struct anv_address
anv_address_from_u64(uint64_t addr_u64)546 anv_address_from_u64(uint64_t addr_u64)
547 {
548    assert(addr_u64 == intel_canonical_address(addr_u64));
549    return (struct anv_address) {
550       .bo = NULL,
551       .offset = addr_u64,
552    };
553 }
554 
555 static inline bool
anv_address_is_null(struct anv_address addr)556 anv_address_is_null(struct anv_address addr)
557 {
558    return addr.bo == NULL && addr.offset == 0;
559 }
560 
561 static inline uint64_t
anv_address_physical(struct anv_address addr)562 anv_address_physical(struct anv_address addr)
563 {
564    uint64_t address = (addr.bo ? addr.bo->offset : 0ull) + addr.offset;
565    return intel_canonical_address(address);
566 }
567 
568 static inline struct u_trace_address
anv_address_utrace(struct anv_address addr)569 anv_address_utrace(struct anv_address addr)
570 {
571    return (struct u_trace_address) {
572       .bo = addr.bo,
573       .offset = addr.offset,
574    };
575 }
576 
577 static inline struct anv_address
anv_address_add(struct anv_address addr,uint64_t offset)578 anv_address_add(struct anv_address addr, uint64_t offset)
579 {
580    addr.offset += offset;
581    return addr;
582 }
583 
584 static inline struct anv_address
anv_address_add_aligned(struct anv_address addr,uint64_t offset,uint32_t alignment)585 anv_address_add_aligned(struct anv_address addr, uint64_t offset, uint32_t alignment)
586 {
587    addr.offset = align(addr.offset + offset, alignment);
588    return addr;
589 }
590 
591 static inline void *
anv_address_map(struct anv_address addr)592 anv_address_map(struct anv_address addr)
593 {
594    if (addr.bo == NULL)
595       return NULL;
596 
597    if (addr.bo->map == NULL)
598       return NULL;
599 
600    return addr.bo->map + addr.offset;
601 }
602 
603 /* Represent a virtual address range */
604 struct anv_va_range {
605    uint64_t addr;
606    uint64_t size;
607 };
608 
609 /* Represents a lock-free linked list of "free" things.  This is used by
610  * both the block pool and the state pools.  Unfortunately, in order to
611  * solve the ABA problem, we can't use a single uint32_t head.
612  */
613 union anv_free_list {
614    struct {
615       uint32_t offset;
616 
617       /* A simple count that is incremented every time the head changes. */
618       uint32_t count;
619    };
620    /* Make sure it's aligned to 64 bits. This will make atomic operations
621     * faster on 32 bit platforms.
622     */
623    alignas(8) uint64_t u64;
624 };
625 
626 #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
627 
628 struct anv_block_state {
629    union {
630       struct {
631          uint32_t next;
632          uint32_t end;
633       };
634       /* Make sure it's aligned to 64 bits. This will make atomic operations
635        * faster on 32 bit platforms.
636        */
637       alignas(8) uint64_t u64;
638    };
639 };
640 
641 #define anv_block_pool_foreach_bo(bo, pool)  \
642    for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \
643         _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \
644         _pp_bo++)
645 
646 #define ANV_MAX_BLOCK_POOL_BOS 20
647 
648 struct anv_block_pool {
649    const char *name;
650 
651    struct anv_device *device;
652 
653    struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS];
654    struct anv_bo *bo;
655    uint32_t nbos;
656 
657    /* Maximum size of the pool */
658    uint64_t max_size;
659 
660    /* Current size of the pool */
661    uint64_t size;
662 
663    /* The canonical address where the start of the pool is pinned. The various bos that
664     * are created as the pool grows will have addresses in the range
665     * [start_address, start_address + BLOCK_POOL_MEMFD_SIZE).
666     */
667    uint64_t start_address;
668 
669    /* The offset from the start of the bo to the "center" of the block
670     * pool.  Pointers to allocated blocks are given by
671     * bo.map + center_bo_offset + offsets.
672     */
673    uint32_t center_bo_offset;
674 
675    struct anv_block_state state;
676 
677    enum anv_bo_alloc_flags bo_alloc_flags;
678 };
679 
680 /* Block pools are backed by a fixed-size 1GB memfd */
681 #define BLOCK_POOL_MEMFD_SIZE (1ul << 30)
682 
683 /* The center of the block pool is also the middle of the memfd.  This may
684  * change in the future if we decide differently for some reason.
685  */
686 #define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2)
687 
688 static inline uint32_t
anv_block_pool_size(struct anv_block_pool * pool)689 anv_block_pool_size(struct anv_block_pool *pool)
690 {
691    return pool->state.end;
692 }
693 
694 struct anv_state {
695    int64_t offset;
696    uint32_t alloc_size;
697    uint32_t idx;
698    void *map;
699 };
700 
701 #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
702 
703 struct anv_fixed_size_state_pool {
704    union anv_free_list free_list;
705    struct anv_block_state block;
706 };
707 
708 #define ANV_MIN_STATE_SIZE_LOG2 6
709 #define ANV_MAX_STATE_SIZE_LOG2 24
710 
711 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
712 
713 struct anv_free_entry {
714    uint32_t next;
715    struct anv_state state;
716 };
717 
718 struct anv_state_table {
719    struct anv_device *device;
720    int fd;
721    struct anv_free_entry *map;
722    uint32_t size;
723    uint64_t max_size;
724    struct anv_block_state state;
725    struct u_vector cleanups;
726 };
727 
728 struct anv_state_pool {
729    struct anv_block_pool block_pool;
730 
731    /* Offset into the relevant state base address where the state pool starts
732     * allocating memory.
733     */
734    int64_t start_offset;
735 
736    struct anv_state_table table;
737 
738    /* The size of blocks which will be allocated from the block pool */
739    uint32_t block_size;
740 
741    struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
742 };
743 
744 struct anv_state_reserved_pool {
745    struct anv_state_pool *pool;
746    union anv_free_list reserved_blocks;
747    uint32_t count;
748 };
749 
750 struct anv_state_reserved_array_pool {
751    struct anv_state_pool *pool;
752    simple_mtx_t mutex;
753    /* Bitfield of usable elements */
754    BITSET_WORD *states;
755    /* Backing store */
756    struct anv_state state;
757    /* Number of elements */
758    uint32_t count;
759    /* Stride between each element */
760    uint32_t stride;
761    /* Size of each element */
762    uint32_t size;
763 };
764 
765 struct anv_state_stream {
766    struct anv_state_pool *state_pool;
767 
768    /* The size of blocks to allocate from the state pool */
769    uint32_t block_size;
770 
771    /* Current block we're allocating from */
772    struct anv_state block;
773 
774    /* Offset into the current block at which to allocate the next state */
775    uint32_t next;
776 
777    /* Sum of all the blocks in all_blocks */
778    uint32_t total_size;
779 
780    /* List of all blocks allocated from this pool */
781    struct util_dynarray all_blocks;
782 };
783 
784 /* The block_pool functions exported for testing only.  The block pool should
785  * only be used via a state pool (see below).
786  */
787 VkResult anv_block_pool_init(struct anv_block_pool *pool,
788                              struct anv_device *device,
789                              const char *name,
790                              uint64_t start_address,
791                              uint32_t initial_size,
792                              uint32_t max_size);
793 void anv_block_pool_finish(struct anv_block_pool *pool);
794 VkResult anv_block_pool_alloc(struct anv_block_pool *pool,
795                               uint32_t block_size,
796                               int64_t *offset,
797                               uint32_t *padding);
798 void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t
799 size);
800 
801 struct anv_state_pool_params {
802    const char *name;
803    uint64_t    base_address;
804    int64_t     start_offset;
805    uint32_t    block_size;
806    uint32_t    max_size;
807 };
808 
809 VkResult anv_state_pool_init(struct anv_state_pool *pool,
810                              struct anv_device *device,
811                              const struct anv_state_pool_params *params);
812 void anv_state_pool_finish(struct anv_state_pool *pool);
813 struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
814                                       uint32_t state_size, uint32_t alignment);
815 void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state);
816 
817 static inline struct anv_address
anv_state_pool_state_address(struct anv_state_pool * pool,struct anv_state state)818 anv_state_pool_state_address(struct anv_state_pool *pool, struct anv_state state)
819 {
820    return (struct anv_address) {
821       .bo = pool->block_pool.bo,
822       .offset = state.offset - pool->start_offset,
823    };
824 }
825 
826 static inline struct anv_state
anv_state_pool_emit_data(struct anv_state_pool * pool,size_t size,size_t align,const void * p)827 anv_state_pool_emit_data(struct anv_state_pool *pool,
828                          size_t size, size_t align,
829                          const void *p)
830 {
831    struct anv_state state;
832 
833    state = anv_state_pool_alloc(pool, size, align);
834    memcpy(state.map, p, size);
835 
836    return state;
837 }
838 
839 void anv_state_stream_init(struct anv_state_stream *stream,
840                            struct anv_state_pool *state_pool,
841                            uint32_t block_size);
842 void anv_state_stream_finish(struct anv_state_stream *stream);
843 struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
844                                         uint32_t size, uint32_t alignment);
845 
846 void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool,
847                                       struct anv_state_pool *parent,
848                                       uint32_t count, uint32_t size,
849                                       uint32_t alignment);
850 void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool);
851 struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool);
852 void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,
853                                   struct anv_state state);
854 
855 VkResult anv_state_reserved_array_pool_init(struct anv_state_reserved_array_pool *pool,
856                                             struct anv_state_pool *parent,
857                                             uint32_t count, uint32_t size,
858                                             uint32_t alignment);
859 void anv_state_reserved_array_pool_finish(struct anv_state_reserved_array_pool *pool);
860 struct anv_state anv_state_reserved_array_pool_alloc(struct anv_state_reserved_array_pool *pool,
861                                                      bool alloc_back);
862 struct anv_state anv_state_reserved_array_pool_alloc_index(struct anv_state_reserved_array_pool *pool,
863                                                            unsigned idx);
864 uint32_t anv_state_reserved_array_pool_state_index(struct anv_state_reserved_array_pool *pool,
865                                                    struct anv_state state);
866 void anv_state_reserved_array_pool_free(struct anv_state_reserved_array_pool *pool,
867                                         struct anv_state state);
868 
869 VkResult anv_state_table_init(struct anv_state_table *table,
870                              struct anv_device *device,
871                              uint32_t initial_entries);
872 void anv_state_table_finish(struct anv_state_table *table);
873 VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
874                              uint32_t count);
875 void anv_free_list_push(union anv_free_list *list,
876                         struct anv_state_table *table,
877                         uint32_t idx, uint32_t count);
878 struct anv_state* anv_free_list_pop(union anv_free_list *list,
879                                     struct anv_state_table *table);
880 
881 
882 static inline struct anv_state *
anv_state_table_get(struct anv_state_table * table,uint32_t idx)883 anv_state_table_get(struct anv_state_table *table, uint32_t idx)
884 {
885    return &table->map[idx].state;
886 }
887 /**
888  * Implements a pool of re-usable BOs.  The interface is identical to that
889  * of block_pool except that each block is its own BO.
890  */
891 struct anv_bo_pool {
892    const char *name;
893 
894    struct anv_device *device;
895 
896    enum anv_bo_alloc_flags bo_alloc_flags;
897 
898    struct util_sparse_array_free_list free_list[16];
899 };
900 
901 void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
902                       const char *name, enum anv_bo_alloc_flags alloc_flags);
903 void anv_bo_pool_finish(struct anv_bo_pool *pool);
904 VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
905                            struct anv_bo **bo_out);
906 void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
907 
908 struct anv_scratch_pool {
909    enum anv_bo_alloc_flags alloc_flags;
910    /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
911    struct anv_bo *bos[16][MESA_SHADER_STAGES];
912    uint32_t surfs[16];
913    struct anv_state surf_states[16];
914 };
915 
916 void anv_scratch_pool_init(struct anv_device *device,
917                            struct anv_scratch_pool *pool,
918                            bool protected);
919 void anv_scratch_pool_finish(struct anv_device *device,
920                              struct anv_scratch_pool *pool);
921 struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
922                                       struct anv_scratch_pool *pool,
923                                       gl_shader_stage stage,
924                                       unsigned per_thread_scratch);
925 uint32_t anv_scratch_pool_get_surf(struct anv_device *device,
926                                    struct anv_scratch_pool *pool,
927                                    unsigned per_thread_scratch);
928 
929 /* Note that on Gfx12HP we pass a scratch space surface state offset
930  * shifted by 2 relative to the value specified on the BSpec, since
931  * that allows the compiler to save a shift instruction while
932  * constructing the extended descriptor for SS addressing.  That
933  * worked because we limit the scratch surface state pool to 8 MB and
934  * because we relied on the legacy (ExBSO=0) encoding of the extended
935  * descriptor in order to save the shift, which is no longer supported
936  * for the UGM shared function on Xe2 platforms, so we no longer
937  * attempt to do that trick.
938  */
939 #define ANV_SCRATCH_SPACE_SHIFT(ver) ((ver) >= 20 ? 6 : 4)
940 
941 /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
942 struct anv_bo_cache {
943    struct util_sparse_array bo_map;
944    pthread_mutex_t mutex;
945 };
946 
947 VkResult anv_bo_cache_init(struct anv_bo_cache *cache,
948                            struct anv_device *device);
949 void anv_bo_cache_finish(struct anv_bo_cache *cache);
950 
951 struct anv_queue_family {
952    /* Standard bits passed on to the client */
953    VkQueueFlags   queueFlags;
954    uint32_t       queueCount;
955 
956    enum intel_engine_class engine_class;
957    bool supports_perf;
958 };
959 
960 #define ANV_MAX_QUEUE_FAMILIES 5
961 
962 struct anv_memory_type {
963    /* Standard bits passed on to the client */
964    VkMemoryPropertyFlags   propertyFlags;
965    uint32_t                heapIndex;
966    /* Whether this is the dynamic visible memory type */
967    bool                    dynamic_visible;
968    bool                    compressed;
969 };
970 
971 struct anv_memory_heap {
972    /* Standard bits passed on to the client */
973    VkDeviceSize      size;
974    VkMemoryHeapFlags flags;
975 
976    /** Driver-internal book-keeping.
977     *
978     * Align it to 64 bits to make atomic operations faster on 32 bit platforms.
979     */
980    alignas(8) VkDeviceSize used;
981 
982    bool              is_local_mem;
983 };
984 
985 struct anv_memregion {
986    const struct intel_memory_class_instance *region;
987    uint64_t size;
988    uint64_t available;
989 };
990 
991 enum anv_timestamp_capture_type {
992     ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE,
993     ANV_TIMESTAMP_CAPTURE_END_OF_PIPE,
994     ANV_TIMESTAMP_CAPTURE_AT_CS_STALL,
995     ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER,
996     ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH,
997 };
998 
999 struct anv_physical_device {
1000     struct vk_physical_device                   vk;
1001 
1002     /* Link in anv_instance::physical_devices */
1003     struct list_head                            link;
1004 
1005     struct anv_instance *                       instance;
1006     char                                        path[20];
1007     struct intel_device_info                      info;
1008 
1009     bool                                        video_decode_enabled;
1010     bool                                        video_encode_enabled;
1011 
1012     struct brw_compiler *                       compiler;
1013     struct isl_device                           isl_dev;
1014     struct intel_perf_config *                    perf;
1015     /*
1016      * Number of commands required to implement a performance query begin +
1017      * end.
1018      */
1019     uint32_t                                    n_perf_query_commands;
1020     bool                                        has_exec_async;
1021     bool                                        has_exec_capture;
1022     VkQueueGlobalPriorityKHR                    max_context_priority;
1023     uint64_t                                    gtt_size;
1024 
1025     bool                                        always_use_bindless;
1026     bool                                        use_call_secondary;
1027 
1028     /** True if we can use timeline semaphores through execbuf */
1029     bool                                        has_exec_timeline;
1030 
1031     /** True if we can read the GPU timestamp register
1032      *
1033      * When running in a virtual context, the timestamp register is unreadable
1034      * on Gfx12+.
1035      */
1036     bool                                        has_reg_timestamp;
1037 
1038     /** True if we can create protected contexts. */
1039     bool                                        has_protected_contexts;
1040 
1041     /** Whether KMD has the ability to create VM objects */
1042     bool                                        has_vm_control;
1043 
1044     /** True if we have the means to do sparse binding (e.g., a Kernel driver
1045      * a vm_bind ioctl).
1046      */
1047     enum anv_sparse_type {
1048       ANV_SPARSE_TYPE_NOT_SUPPORTED = 0,
1049       ANV_SPARSE_TYPE_VM_BIND,
1050       ANV_SPARSE_TYPE_TRTT,
1051       ANV_SPARSE_TYPE_FAKE,
1052     } sparse_type;
1053 
1054     /** True if HW supports ASTC LDR */
1055     bool                                        has_astc_ldr;
1056     /** True if denorms in void extents should be flushed to zero */
1057     bool                                        flush_astc_ldr_void_extent_denorms;
1058     /** True if ASTC LDR is supported via emulation */
1059     bool                                        emu_astc_ldr;
1060     /* true if FCV optimization should be disabled. */
1061     bool                                        disable_fcv;
1062     /**/
1063     bool                                        uses_ex_bso;
1064 
1065     bool                                        always_flush_cache;
1066 
1067     /** True if application memory is allocated with extra AUX memory
1068      *
1069      * Applications quite often pool image allocations together in a single
1070      * VkDeviceMemory object. On platforms like MTL, the alignment of images
1071      * with compression mapped through the AUX translation tables is large :
1072      * 1MB. This can create a lot of wasted space in the application memory
1073      * objects.
1074      *
1075      * To workaround this problem, we allocate CCS data at the end of
1076      * VkDeviceMemory objects. This would not work well for TGL-like platforms
1077      * because the AUX translation tables also contain the format of the
1078      * images, but on MTL the HW ignore those values. So we can share the AUX
1079      * TT entries between different images without problem.
1080      *
1081      * This should be only true for platforms with AUX TT.
1082      */
1083     bool                                         alloc_aux_tt_mem;
1084 
1085     /**
1086      * True if the descriptors buffers are holding one of the following :
1087      *    - anv_sampled_image_descriptor
1088      *    - anv_storage_image_descriptor
1089      *    - anv_address_range_descriptor
1090      *
1091      * Accessing the descriptors in a bindless fashion from the shader
1092      * requires an indirection in the shader, first fetch one of the structure
1093      * listed above from the descriptor buffer, then emit the send message to
1094      * the fixed function (sampler, dataport, etc...) with the handle fetched
1095      * above.
1096      *
1097      * We need to do things this way prior to DG2 because the bindless surface
1098      * state space is limited to 64Mb and some application will allocate more
1099      * than what HW can support. On DG2+ we get 4Gb of bindless surface state
1100      * and so we can reference directly RENDER_SURFACE_STATE/SAMPLER_STATE
1101      * structures instead.
1102      */
1103     bool                                        indirect_descriptors;
1104 
1105     bool                                        uses_relocs;
1106 
1107     /** Can the platform support cooperative matrices and is it enabled? */
1108     bool                                        has_cooperative_matrix;
1109 
1110     struct {
1111       uint32_t                                  family_count;
1112       struct anv_queue_family                   families[ANV_MAX_QUEUE_FAMILIES];
1113     } queue;
1114 
1115     struct {
1116       uint32_t                                  type_count;
1117       struct anv_memory_type                    types[VK_MAX_MEMORY_TYPES];
1118       uint32_t                                  heap_count;
1119       struct anv_memory_heap                    heaps[VK_MAX_MEMORY_HEAPS];
1120 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
1121       bool                                      need_flush;
1122 #endif
1123       /** Mask of memory types of normal allocations */
1124       uint32_t                                  default_buffer_mem_types;
1125       /** Mask of memory types of data indexable from the dynamic heap */
1126       uint32_t                                  dynamic_visible_mem_types;
1127       /** Mask of memory types of protected buffers/images */
1128       uint32_t                                  protected_mem_types;
1129       /** Mask of memory types of compressed buffers/images */
1130       uint32_t                                  compressed_mem_types;
1131     } memory;
1132 
1133     struct {
1134        /**
1135         * General state pool
1136         */
1137        struct anv_va_range                      general_state_pool;
1138        /**
1139         * Low 32bit heap
1140         */
1141        struct anv_va_range                      low_heap;
1142        /**
1143         * Binding table pool
1144         */
1145        struct anv_va_range                      binding_table_pool;
1146        /**
1147         * Internal surface states for blorp & push descriptors.
1148         */
1149        struct anv_va_range                      internal_surface_state_pool;
1150        /**
1151         * Scratch surfaces (overlaps with internal_surface_state_pool).
1152         */
1153        struct anv_va_range                      scratch_surface_state_pool;
1154        /**
1155         * Bindless surface states (indirectly referred to by indirect
1156         * descriptors or for direct descriptors)
1157         */
1158        struct anv_va_range                      bindless_surface_state_pool;
1159        /**
1160         * Dynamic state pool
1161         */
1162        struct anv_va_range                      dynamic_state_pool;
1163        /**
1164         * Buffer pool that can be index from the dynamic state heap
1165         */
1166        struct anv_va_range                      dynamic_visible_pool;
1167        /**
1168         * Indirect descriptor pool
1169         */
1170        struct anv_va_range                      indirect_descriptor_pool;
1171        /**
1172         * Indirect push descriptor pool
1173         */
1174        struct anv_va_range                      indirect_push_descriptor_pool;
1175        /**
1176         * Instruction state pool
1177         */
1178        struct anv_va_range                      instruction_state_pool;
1179        /**
1180         * Push descriptor with descriptor buffers
1181         */
1182        struct anv_va_range                      push_descriptor_buffer_pool;
1183        /**
1184         * AUX-TT
1185         */
1186        struct anv_va_range                      aux_tt_pool;
1187        /**
1188         * Client heap
1189         */
1190        struct anv_va_range                      high_heap;
1191        struct anv_va_range                      trtt;
1192     } va;
1193 
1194     /* Either we have a single vram region and it's all mappable, or we have
1195      * both mappable & non-mappable parts. System memory is always available.
1196      */
1197     struct anv_memregion                        vram_mappable;
1198     struct anv_memregion                        vram_non_mappable;
1199     struct anv_memregion                        sys;
1200     uint8_t                                     driver_build_sha1[20];
1201     uint8_t                                     pipeline_cache_uuid[VK_UUID_SIZE];
1202     uint8_t                                     driver_uuid[VK_UUID_SIZE];
1203     uint8_t                                     device_uuid[VK_UUID_SIZE];
1204     uint8_t                                     rt_uuid[VK_UUID_SIZE];
1205 
1206     /* Maximum amount of scratch space used by all the GRL kernels */
1207     uint32_t                                    max_grl_scratch_size;
1208 
1209     struct vk_sync_type                         sync_syncobj_type;
1210     struct vk_sync_timeline_type                sync_timeline_type;
1211     const struct vk_sync_type *                 sync_types[4];
1212 
1213     struct wsi_device                       wsi_device;
1214     int                                         local_fd;
1215     bool                                        has_local;
1216     int64_t                                     local_major;
1217     int64_t                                     local_minor;
1218     int                                         master_fd;
1219     bool                                        has_master;
1220     int64_t                                     master_major;
1221     int64_t                                     master_minor;
1222     struct intel_query_engine_info *            engine_info;
1223 
1224     void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address,
1225                                enum anv_timestamp_capture_type, void *);
1226     void (*cmd_capture_data)(struct anv_batch *, struct anv_device *,
1227                              struct anv_address, struct anv_address,
1228                              uint32_t);
1229     struct intel_measure_device                 measure_device;
1230 
1231     /* Value of PIPELINE_SELECT::PipelineSelection == GPGPU */
1232     uint32_t                                    gpgpu_pipeline_value;
1233 
1234     /** A pre packed VERTEX_ELEMENT_STATE feeding 0s to the VS stage
1235      *
1236      * For use when a pipeline has no VS input
1237      */
1238     uint32_t                                    empty_vs_input[2];
1239 };
1240 
1241 VkResult anv_physical_device_try_create(struct vk_instance *vk_instance,
1242                                         struct _drmDevice *drm_device,
1243                                         struct vk_physical_device **out);
1244 
1245 void anv_physical_device_destroy(struct vk_physical_device *vk_device);
1246 
1247 static inline uint32_t
anv_physical_device_bindless_heap_size(const struct anv_physical_device * device,bool descriptor_buffer)1248 anv_physical_device_bindless_heap_size(const struct anv_physical_device *device,
1249                                        bool descriptor_buffer)
1250 {
1251    /* Pre-Gfx12.5, the HW bindless surface heap is only 64MB. After it's 4GB,
1252     * but we have some workarounds that require 2 heaps to overlap, so the
1253     * size is dictated by our VA allocation.
1254     */
1255    return device->uses_ex_bso ?
1256       (descriptor_buffer ?
1257        device->va.dynamic_visible_pool.size :
1258        device->va.bindless_surface_state_pool.size) :
1259       64 * 1024 * 1024 /* 64 MiB */;
1260 }
1261 
1262 static inline bool
anv_physical_device_has_vram(const struct anv_physical_device * device)1263 anv_physical_device_has_vram(const struct anv_physical_device *device)
1264 {
1265    return device->vram_mappable.size > 0;
1266 }
1267 
1268 struct anv_instance {
1269     struct vk_instance                          vk;
1270 
1271     struct driOptionCache                       dri_options;
1272     struct driOptionCache                       available_dri_options;
1273 
1274     int                                         mesh_conv_prim_attrs_to_vert_attrs;
1275     bool                                        enable_tbimr;
1276     bool                                        external_memory_implicit_sync;
1277     bool                                        force_guc_low_latency;
1278 
1279     /**
1280      * Workarounds for game bugs.
1281      */
1282     uint8_t                                     assume_full_subgroups;
1283     bool                                        assume_full_subgroups_with_barrier;
1284     bool                                        limit_trig_input_range;
1285     bool                                        sample_mask_out_opengl_behaviour;
1286     bool                                        force_filter_addr_rounding;
1287     bool                                        fp64_workaround_enabled;
1288     float                                       lower_depth_range_rate;
1289     unsigned                                    generated_indirect_threshold;
1290     unsigned                                    generated_indirect_ring_threshold;
1291     unsigned                                    query_clear_with_blorp_threshold;
1292     unsigned                                    query_copy_with_shader_threshold;
1293     unsigned                                    force_vk_vendor;
1294     bool                                        has_fake_sparse;
1295     bool                                        disable_fcv;
1296     bool                                        disable_xe2_ccs;
1297     bool                                        compression_control_enabled;
1298     bool                                        anv_fake_nonlocal_memory;
1299 
1300     /* HW workarounds */
1301     bool                                        no_16bit;
1302     bool                                        intel_enable_wa_14018912822;
1303 
1304     /**
1305      * Ray tracing configuration.
1306      */
1307     unsigned                                    stack_ids;
1308 };
1309 
1310 VkResult anv_init_wsi(struct anv_physical_device *physical_device);
1311 void anv_finish_wsi(struct anv_physical_device *physical_device);
1312 
1313 struct anv_queue {
1314    struct vk_queue                           vk;
1315 
1316    struct anv_device *                       device;
1317 
1318    const struct anv_queue_family *           family;
1319 
1320    struct intel_batch_decode_ctx *           decoder;
1321 
1322    union {
1323       uint32_t                               exec_flags; /* i915 */
1324       uint32_t                               context_id; /* i915 */
1325       uint32_t                               exec_queue_id; /* Xe */
1326    };
1327 
1328    /** Context/Engine id which executes companion RCS command buffer */
1329    uint32_t                                  companion_rcs_id;
1330 
1331    /** Synchronization object for debug purposes (DEBUG_SYNC) */
1332    struct vk_sync                           *sync;
1333 
1334    /** Companion synchronization object
1335     *
1336     * Vulkan command buffers can be destroyed as soon as their lifecycle moved
1337     * from the Pending state to the Invalid/Executable state. This transition
1338     * happens when the VkFence/VkSemaphore associated with the completion of
1339     * the command buffer work is signaled.
1340     *
1341     * When we're using a companion command buffer to execute part of another
1342     * command buffer, we need to tie the 2 work submissions together to ensure
1343     * when the associated VkFence/VkSemaphore is signaled, both command
1344     * buffers are actually unused by the HW. To do this, we run an empty batch
1345     * buffer that we use to signal after both submissions :
1346     *
1347     *   CCS -->    main   ---> empty_batch (with wait on companion) --> signal
1348     *   RCS --> companion -|
1349     *
1350     * When companion batch completes, it signals companion_sync and allow
1351     * empty_batch to execute. Since empty_batch is running on the main engine,
1352     * we're guaranteed that upon completion both main & companion command
1353     * buffers are not used by HW anymore.
1354     */
1355    struct vk_sync                           *companion_sync;
1356 
1357    struct intel_ds_queue                     ds;
1358 
1359    struct anv_async_submit                  *init_submit;
1360    struct anv_async_submit                  *init_companion_submit;
1361 };
1362 
1363 struct nir_xfb_info;
1364 struct anv_pipeline_bind_map;
1365 struct anv_pipeline_sets_layout;
1366 struct anv_push_descriptor_info;
1367 enum anv_dynamic_push_bits;
1368 
1369 void anv_device_init_embedded_samplers(struct anv_device *device);
1370 void anv_device_finish_embedded_samplers(struct anv_device *device);
1371 
1372 extern const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2];
1373 
1374 struct anv_shader_bin *
1375 anv_device_search_for_kernel(struct anv_device *device,
1376                              struct vk_pipeline_cache *cache,
1377                              const void *key_data, uint32_t key_size,
1378                              bool *user_cache_bit);
1379 
1380 struct anv_shader_upload_params;
1381 
1382 struct anv_shader_bin *
1383 anv_device_upload_kernel(struct anv_device *device,
1384                          struct vk_pipeline_cache *cache,
1385                          const struct anv_shader_upload_params *params);
1386 
1387 struct nir_shader;
1388 struct nir_shader_compiler_options;
1389 
1390 struct nir_shader *
1391 anv_device_search_for_nir(struct anv_device *device,
1392                           struct vk_pipeline_cache *cache,
1393                           const struct nir_shader_compiler_options *nir_options,
1394                           unsigned char sha1_key[20],
1395                           void *mem_ctx);
1396 
1397 void
1398 anv_device_upload_nir(struct anv_device *device,
1399                       struct vk_pipeline_cache *cache,
1400                       const struct nir_shader *nir,
1401                       unsigned char sha1_key[20]);
1402 
1403 void
1404 anv_load_fp64_shader(struct anv_device *device);
1405 
1406 /**
1407  * This enum tracks the various HW instructions that hold graphics state
1408  * needing to be reprogrammed. Some instructions are grouped together as they
1409  * pretty much need to be emitted together (like 3DSTATE_URB_*).
1410  *
1411  * Not all bits apply to all platforms. We build a dirty state based on
1412  * enabled extensions & generation on anv_device.
1413  */
1414 enum anv_gfx_state_bits {
1415    /* Pipeline states */
1416    ANV_GFX_STATE_URB, /* All legacy stages, including mesh */
1417    ANV_GFX_STATE_VF_STATISTICS,
1418    ANV_GFX_STATE_VF_SGVS,
1419    ANV_GFX_STATE_VF_SGVS_2,
1420    ANV_GFX_STATE_VF_SGVS_VI, /* 3DSTATE_VERTEX_ELEMENTS for sgvs elements */
1421    ANV_GFX_STATE_VF_SGVS_INSTANCING, /* 3DSTATE_VF_INSTANCING for sgvs elements */
1422    ANV_GFX_STATE_PRIMITIVE_REPLICATION,
1423    ANV_GFX_STATE_SBE,
1424    ANV_GFX_STATE_SBE_SWIZ,
1425    ANV_GFX_STATE_SO_DECL_LIST,
1426    ANV_GFX_STATE_VS,
1427    ANV_GFX_STATE_HS,
1428    ANV_GFX_STATE_DS,
1429    ANV_GFX_STATE_GS,
1430    ANV_GFX_STATE_PS,
1431    ANV_GFX_STATE_SBE_MESH,
1432    ANV_GFX_STATE_CLIP_MESH,
1433    ANV_GFX_STATE_MESH_CONTROL,
1434    ANV_GFX_STATE_MESH_SHADER,
1435    ANV_GFX_STATE_MESH_DISTRIB,
1436    ANV_GFX_STATE_TASK_CONTROL,
1437    ANV_GFX_STATE_TASK_SHADER,
1438    ANV_GFX_STATE_TASK_REDISTRIB,
1439    /* Dynamic states */
1440    ANV_GFX_STATE_BLEND_STATE, /* Just the dynamic state structure */
1441    ANV_GFX_STATE_BLEND_STATE_PTR, /* The pointer to the dynamic state */
1442    ANV_GFX_STATE_CLIP,
1443    ANV_GFX_STATE_CC_STATE,
1444    ANV_GFX_STATE_CC_STATE_PTR,
1445    ANV_GFX_STATE_CPS,
1446    ANV_GFX_STATE_DEPTH_BOUNDS,
1447    ANV_GFX_STATE_INDEX_BUFFER,
1448    ANV_GFX_STATE_LINE_STIPPLE,
1449    ANV_GFX_STATE_MULTISAMPLE,
1450    ANV_GFX_STATE_PS_BLEND,
1451    ANV_GFX_STATE_RASTER,
1452    ANV_GFX_STATE_SAMPLE_MASK,
1453    ANV_GFX_STATE_SAMPLE_PATTERN,
1454    ANV_GFX_STATE_SCISSOR,
1455    ANV_GFX_STATE_SF,
1456    ANV_GFX_STATE_STREAMOUT,
1457    ANV_GFX_STATE_TE,
1458    ANV_GFX_STATE_VERTEX_INPUT,
1459    ANV_GFX_STATE_VF,
1460    ANV_GFX_STATE_VF_TOPOLOGY,
1461    ANV_GFX_STATE_VFG,
1462    ANV_GFX_STATE_VIEWPORT_CC,
1463    ANV_GFX_STATE_VIEWPORT_CC_PTR,
1464    ANV_GFX_STATE_VIEWPORT_SF_CLIP,
1465    ANV_GFX_STATE_WM,
1466    ANV_GFX_STATE_WM_DEPTH_STENCIL,
1467    ANV_GFX_STATE_PS_EXTRA,
1468    ANV_GFX_STATE_PMA_FIX, /* Fake state to implement workaround */
1469    ANV_GFX_STATE_WA_18019816803, /* Fake state to implement workaround */
1470    ANV_GFX_STATE_WA_14018283232, /* Fake state to implement workaround */
1471    ANV_GFX_STATE_TBIMR_TILE_PASS_INFO,
1472 
1473    ANV_GFX_STATE_MAX,
1474 };
1475 
1476 const char *anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state);
1477 
1478 /* This structure tracks the values to program in HW instructions for
1479  * corresponding to dynamic states of the Vulkan API. Only fields that need to
1480  * be reemitted outside of the VkPipeline object are tracked here.
1481  */
1482 struct anv_gfx_dynamic_state {
1483    /* 3DSTATE_BLEND_STATE_POINTERS */
1484    struct {
1485       bool AlphaToCoverageEnable;
1486       bool AlphaToOneEnable;
1487       bool IndependentAlphaBlendEnable;
1488       bool ColorDitherEnable;
1489       struct {
1490          bool     WriteDisableAlpha;
1491          bool     WriteDisableRed;
1492          bool     WriteDisableGreen;
1493          bool     WriteDisableBlue;
1494 
1495          uint32_t LogicOpFunction;
1496          bool     LogicOpEnable;
1497 
1498          bool     ColorBufferBlendEnable;
1499          uint32_t ColorClampRange;
1500          bool     PreBlendColorClampEnable;
1501          bool     PostBlendColorClampEnable;
1502          uint32_t SourceBlendFactor;
1503          uint32_t DestinationBlendFactor;
1504          uint32_t ColorBlendFunction;
1505          uint32_t SourceAlphaBlendFactor;
1506          uint32_t DestinationAlphaBlendFactor;
1507          uint32_t AlphaBlendFunction;
1508       } rts[MAX_RTS];
1509 
1510       struct anv_state state;
1511    } blend;
1512 
1513    /* 3DSTATE_CC_STATE_POINTERS */
1514    struct {
1515       float BlendConstantColorRed;
1516       float BlendConstantColorGreen;
1517       float BlendConstantColorBlue;
1518       float BlendConstantColorAlpha;
1519 
1520       struct anv_state state;
1521    } cc;
1522 
1523    /* 3DSTATE_CLIP */
1524    struct {
1525       uint32_t APIMode;
1526       uint32_t ViewportXYClipTestEnable;
1527       uint32_t MaximumVPIndex;
1528       uint32_t TriangleStripListProvokingVertexSelect;
1529       uint32_t LineStripListProvokingVertexSelect;
1530       uint32_t TriangleFanProvokingVertexSelect;
1531    } clip;
1532 
1533    /* 3DSTATE_CPS/3DSTATE_CPS_POINTERS */
1534    struct {
1535       /* Gfx11 */
1536       uint32_t CoarsePixelShadingMode;
1537       float    MinCPSizeX;
1538       float    MinCPSizeY;
1539       /* Gfx12+ */
1540       uint32_t CoarsePixelShadingStateArrayPointer;
1541    } cps;
1542 
1543    /* 3DSTATE_DEPTH_BOUNDS */
1544    struct {
1545       bool     DepthBoundsTestEnable;
1546       float    DepthBoundsTestMinValue;
1547       float    DepthBoundsTestMaxValue;
1548    } db;
1549 
1550    /* 3DSTATE_GS */
1551    struct {
1552       uint32_t ReorderMode;
1553    } gs;
1554 
1555    /* 3DSTATE_LINE_STIPPLE */
1556    struct {
1557       uint32_t LineStipplePattern;
1558       float    LineStippleInverseRepeatCount;
1559       uint32_t LineStippleRepeatCount;
1560    } ls;
1561 
1562    /* 3DSTATE_MULTISAMPLE */
1563    struct {
1564       uint32_t NumberofMultisamples;
1565    } ms;
1566 
1567    /* 3DSTATE_PS */
1568    struct {
1569       uint32_t PositionXYOffsetSelect;
1570 
1571       uint32_t KernelStartPointer0;
1572       uint32_t KernelStartPointer1;
1573       uint32_t KernelStartPointer2;
1574 
1575       uint32_t DispatchGRFStartRegisterForConstantSetupData0;
1576       uint32_t DispatchGRFStartRegisterForConstantSetupData1;
1577       uint32_t DispatchGRFStartRegisterForConstantSetupData2;
1578 
1579       /* Pre-Gfx20 only */
1580       bool     _8PixelDispatchEnable;
1581       bool     _16PixelDispatchEnable;
1582       bool     _32PixelDispatchEnable;
1583 
1584       /* Gfx20+ only */
1585       bool     Kernel0Enable;
1586       bool     Kernel1Enable;
1587       uint32_t Kernel0SIMDWidth;
1588       uint32_t Kernel1SIMDWidth;
1589       uint32_t Kernel0PolyPackingPolicy;
1590    } ps;
1591 
1592    /* 3DSTATE_PS_EXTRA */
1593    struct {
1594       bool PixelShaderHasUAV;
1595       bool PixelShaderIsPerSample;
1596       bool PixelShaderKillsPixel;
1597       bool PixelShaderIsPerCoarsePixel;
1598       bool EnablePSDependencyOnCPsizeChange;
1599    } ps_extra;
1600 
1601    /* 3DSTATE_PS_BLEND */
1602    struct {
1603       bool     HasWriteableRT;
1604       bool     ColorBufferBlendEnable;
1605       uint32_t SourceAlphaBlendFactor;
1606       uint32_t DestinationAlphaBlendFactor;
1607       uint32_t SourceBlendFactor;
1608       uint32_t DestinationBlendFactor;
1609       bool     AlphaTestEnable;
1610       bool     IndependentAlphaBlendEnable;
1611       bool     AlphaToCoverageEnable;
1612    } ps_blend;
1613 
1614    /* 3DSTATE_RASTER */
1615    struct {
1616       uint32_t APIMode;
1617       bool     DXMultisampleRasterizationEnable;
1618       bool     AntialiasingEnable;
1619       uint32_t CullMode;
1620       uint32_t FrontWinding;
1621       bool     GlobalDepthOffsetEnableSolid;
1622       bool     GlobalDepthOffsetEnableWireframe;
1623       bool     GlobalDepthOffsetEnablePoint;
1624       float    GlobalDepthOffsetConstant;
1625       float    GlobalDepthOffsetScale;
1626       float    GlobalDepthOffsetClamp;
1627       uint32_t FrontFaceFillMode;
1628       uint32_t BackFaceFillMode;
1629       bool     ViewportZFarClipTestEnable;
1630       bool     ViewportZNearClipTestEnable;
1631       bool     ConservativeRasterizationEnable;
1632    } raster;
1633 
1634    /* 3DSTATE_SCISSOR_STATE_POINTERS */
1635    struct {
1636       uint32_t count;
1637       struct {
1638          uint32_t ScissorRectangleYMin;
1639          uint32_t ScissorRectangleXMin;
1640          uint32_t ScissorRectangleYMax;
1641          uint32_t ScissorRectangleXMax;
1642       } elem[MAX_SCISSORS];
1643    } scissor;
1644 
1645    /* 3DSTATE_SF */
1646    struct {
1647       float    LineWidth;
1648       uint32_t TriangleStripListProvokingVertexSelect;
1649       uint32_t LineStripListProvokingVertexSelect;
1650       uint32_t TriangleFanProvokingVertexSelect;
1651       bool     LegacyGlobalDepthBiasEnable;
1652    } sf;
1653 
1654    /* 3DSTATE_STREAMOUT */
1655    struct {
1656       bool     RenderingDisable;
1657       uint32_t RenderStreamSelect;
1658       uint32_t ReorderMode;
1659       uint32_t ForceRendering;
1660    } so;
1661 
1662    /* 3DSTATE_SAMPLE_MASK */
1663    struct {
1664       uint32_t SampleMask;
1665    } sm;
1666 
1667    /* 3DSTATE_TE */
1668    struct {
1669       uint32_t OutputTopology;
1670    } te;
1671 
1672    /* 3DSTATE_VF */
1673    struct {
1674       bool     IndexedDrawCutIndexEnable;
1675       uint32_t CutIndex;
1676    } vf;
1677 
1678    /* 3DSTATE_VFG */
1679    struct {
1680       uint32_t DistributionMode;
1681       bool     ListCutIndexEnable;
1682    } vfg;
1683 
1684    /* 3DSTATE_VF_TOPOLOGY */
1685    struct {
1686       uint32_t PrimitiveTopologyType;
1687    } vft;
1688 
1689    /* 3DSTATE_VIEWPORT_STATE_POINTERS_CC */
1690    struct {
1691       uint32_t count;
1692       struct {
1693          float MinimumDepth;
1694          float MaximumDepth;
1695       } elem[MAX_VIEWPORTS];
1696 
1697       struct anv_state state;
1698    } vp_cc;
1699 
1700    /* 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP */
1701    struct {
1702       uint32_t count;
1703       struct {
1704          float ViewportMatrixElementm00;
1705          float ViewportMatrixElementm11;
1706          float ViewportMatrixElementm22;
1707          float ViewportMatrixElementm30;
1708          float ViewportMatrixElementm31;
1709          float ViewportMatrixElementm32;
1710          float XMinClipGuardband;
1711          float XMaxClipGuardband;
1712          float YMinClipGuardband;
1713          float YMaxClipGuardband;
1714          float XMinViewPort;
1715          float XMaxViewPort;
1716          float YMinViewPort;
1717          float YMaxViewPort;
1718       } elem[MAX_VIEWPORTS];
1719    } vp_sf_clip;
1720 
1721    /* 3DSTATE_WM */
1722    struct {
1723       bool     LineStippleEnable;
1724       uint32_t BarycentricInterpolationMode;
1725    } wm;
1726 
1727    /* 3DSTATE_WM_DEPTH_STENCIL */
1728    struct {
1729       bool     DoubleSidedStencilEnable;
1730       uint32_t StencilTestMask;
1731       uint32_t StencilWriteMask;
1732       uint32_t BackfaceStencilTestMask;
1733       uint32_t BackfaceStencilWriteMask;
1734       uint32_t StencilReferenceValue;
1735       uint32_t BackfaceStencilReferenceValue;
1736       bool     DepthTestEnable;
1737       bool     DepthBufferWriteEnable;
1738       uint32_t DepthTestFunction;
1739       bool     StencilTestEnable;
1740       bool     StencilBufferWriteEnable;
1741       uint32_t StencilFailOp;
1742       uint32_t StencilPassDepthPassOp;
1743       uint32_t StencilPassDepthFailOp;
1744       uint32_t StencilTestFunction;
1745       uint32_t BackfaceStencilFailOp;
1746       uint32_t BackfaceStencilPassDepthPassOp;
1747       uint32_t BackfaceStencilPassDepthFailOp;
1748       uint32_t BackfaceStencilTestFunction;
1749    } ds;
1750 
1751    /* 3DSTATE_TBIMR_TILE_PASS_INFO */
1752    struct {
1753       unsigned TileRectangleHeight;
1754       unsigned TileRectangleWidth;
1755       unsigned VerticalTileCount;
1756       unsigned HorizontalTileCount;
1757       unsigned TBIMRBatchSize;
1758       unsigned TileBoxCheck;
1759    } tbimr;
1760    bool use_tbimr;
1761 
1762    bool pma_fix;
1763 
1764    /**
1765     * DEPTH and STENCIL attachment write state for Wa_18019816803.
1766     */
1767    bool ds_write_state;
1768 
1769    /**
1770     * Toggle tracking for Wa_14018283232.
1771     */
1772    bool wa_14018283232_toggle;
1773 
1774    BITSET_DECLARE(dirty, ANV_GFX_STATE_MAX);
1775 };
1776 
1777 enum anv_internal_kernel_name {
1778    ANV_INTERNAL_KERNEL_GENERATED_DRAWS,
1779    ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE,
1780    ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT,
1781    ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE,
1782 
1783    ANV_INTERNAL_KERNEL_COUNT,
1784 };
1785 
1786 enum anv_rt_bvh_build_method {
1787    ANV_BVH_BUILD_METHOD_TRIVIAL,
1788    ANV_BVH_BUILD_METHOD_NEW_SAH,
1789 };
1790 
1791 struct anv_device_astc_emu {
1792     struct vk_texcompress_astc_state           *texcompress;
1793 
1794     /* for flush_astc_ldr_void_extent_denorms */
1795     simple_mtx_t mutex;
1796     VkDescriptorSetLayout ds_layout;
1797     VkPipelineLayout pipeline_layout;
1798     VkPipeline pipeline;
1799 };
1800 
1801 struct anv_device {
1802     struct vk_device                            vk;
1803 
1804     struct anv_physical_device *                physical;
1805     const struct intel_device_info *            info;
1806     const struct anv_kmd_backend *              kmd_backend;
1807     struct isl_device                           isl_dev;
1808     union {
1809        uint32_t                                 context_id; /* i915 */
1810        uint32_t                                 vm_id; /* Xe */
1811     };
1812     int                                         fd;
1813 
1814     pthread_mutex_t                             vma_mutex;
1815     struct util_vma_heap                        vma_lo;
1816     struct util_vma_heap                        vma_hi;
1817     struct util_vma_heap                        vma_desc;
1818     struct util_vma_heap                        vma_dynamic_visible;
1819     struct util_vma_heap                        vma_trtt;
1820 
1821     /** List of all anv_device_memory objects */
1822     struct list_head                            memory_objects;
1823 
1824     /** List of anv_image objects with a private binding for implicit CCS */
1825     struct list_head                            image_private_objects;
1826 
1827     /** Memory pool for batch buffers */
1828     struct anv_bo_pool                          batch_bo_pool;
1829     /** Memory pool for utrace timestamp buffers */
1830     struct anv_bo_pool                          utrace_bo_pool;
1831     /**
1832      * Size of the timestamp captured for utrace.
1833      */
1834     uint32_t                                     utrace_timestamp_size;
1835     /** Memory pool for BVH build buffers */
1836     struct anv_bo_pool                          bvh_bo_pool;
1837 
1838     struct anv_bo_cache                         bo_cache;
1839 
1840     struct anv_state_pool                       general_state_pool;
1841     struct anv_state_pool                       aux_tt_pool;
1842     struct anv_state_pool                       dynamic_state_pool;
1843     struct anv_state_pool                       instruction_state_pool;
1844     struct anv_state_pool                       binding_table_pool;
1845     struct anv_state_pool                       scratch_surface_state_pool;
1846     struct anv_state_pool                       internal_surface_state_pool;
1847     struct anv_state_pool                       bindless_surface_state_pool;
1848     struct anv_state_pool                       indirect_push_descriptor_pool;
1849     struct anv_state_pool                       push_descriptor_buffer_pool;
1850 
1851     struct anv_state_reserved_array_pool        custom_border_colors;
1852 
1853     /** BO used for various workarounds
1854      *
1855      * There are a number of workarounds on our hardware which require writing
1856      * data somewhere and it doesn't really matter where.  For that, we use
1857      * this BO and just write to the first dword or so.
1858      *
1859      * We also need to be able to handle NULL buffers bound as pushed UBOs.
1860      * For that, we use the high bytes (>= 1024) of the workaround BO.
1861      */
1862     struct anv_bo *                             workaround_bo;
1863     struct anv_address                          workaround_address;
1864 
1865     struct anv_bo *                             dummy_aux_bo;
1866 
1867     /**
1868      * Workarounds for game bugs.
1869      */
1870     struct {
1871        struct set *                             doom64_images;
1872     } workarounds;
1873 
1874     struct anv_bo *                             trivial_batch_bo;
1875     struct anv_state                            null_surface_state;
1876 
1877     /**
1878      * NULL surface state copy stored in host memory for use as a fast
1879      * memcpy() source.
1880      */
1881     char                                        host_null_surface_state[ANV_SURFACE_STATE_SIZE];
1882 
1883     struct vk_pipeline_cache *                  internal_cache;
1884 
1885     struct {
1886        struct blorp_context                     context;
1887        struct anv_state                         dynamic_states[BLORP_DYNAMIC_STATE_COUNT];
1888     }                                           blorp;
1889 
1890     struct anv_state                            border_colors;
1891 
1892     struct anv_state                            slice_hash;
1893 
1894     /** An array of CPS_STATE structures grouped by MAX_VIEWPORTS elements
1895      *
1896      * We need to emit CPS_STATE structures for each viewport accessible by a
1897      * pipeline. So rather than write many identical CPS_STATE structures
1898      * dynamically, we can enumerate all possible combinaisons and then just
1899      * emit a 3DSTATE_CPS_POINTERS instruction with the right offset into this
1900      * array.
1901      */
1902     struct anv_state                            cps_states;
1903 
1904     uint32_t                                    queue_count;
1905     struct anv_queue  *                         queues;
1906 
1907     struct anv_scratch_pool                     scratch_pool;
1908     struct anv_scratch_pool                     protected_scratch_pool;
1909     struct anv_bo                              *rt_scratch_bos[16];
1910     struct anv_bo                              *btd_fifo_bo;
1911     struct anv_address                          rt_uuid_addr;
1912 
1913     bool                                        robust_buffer_access;
1914 
1915     uint32_t                                    protected_session_id;
1916 
1917     /** Shadow ray query BO
1918      *
1919      * The ray_query_bo only holds the current ray being traced. When using
1920      * more than 1 ray query per thread, we cannot fit all the queries in
1921      * there, so we need a another buffer to hold query data that is not
1922      * currently being used by the HW for tracing, similar to a scratch space.
1923      *
1924      * The size of the shadow buffer depends on the number of queries per
1925      * shader.
1926      */
1927     struct anv_bo                              *ray_query_shadow_bos[16];
1928     /** Ray query buffer used to communicated with HW unit.
1929      */
1930     struct anv_bo                              *ray_query_bo;
1931 
1932     struct anv_shader_bin                      *rt_trampoline;
1933     struct anv_shader_bin                      *rt_trivial_return;
1934 
1935     enum anv_rt_bvh_build_method                bvh_build_method;
1936 
1937     /** Draw generation shader
1938      *
1939      * Generates direct draw calls out of indirect parameters. Used to
1940      * workaround slowness with indirect draw calls.
1941      */
1942     struct anv_shader_bin                      *internal_kernels[ANV_INTERNAL_KERNEL_COUNT];
1943     const struct intel_l3_config               *internal_kernels_l3_config;
1944 
1945     pthread_mutex_t                             mutex;
1946     pthread_cond_t                              queue_submit;
1947 
1948     struct intel_batch_decode_ctx               decoder[ANV_MAX_QUEUE_FAMILIES];
1949     /*
1950      * When decoding a anv_cmd_buffer, we might need to search for BOs through
1951      * the cmd_buffer's list.
1952      */
1953     struct anv_cmd_buffer                      *cmd_buffer_being_decoded;
1954 
1955     int                                         perf_fd; /* -1 if no opened */
1956     struct anv_queue                            *perf_queue;
1957 
1958     struct intel_aux_map_context                *aux_map_ctx;
1959 
1960     const struct intel_l3_config                *l3_config;
1961 
1962     struct intel_debug_block_frame              *debug_frame_desc;
1963 
1964     struct intel_ds_device                       ds;
1965 
1966     nir_shader                                  *fp64_nir;
1967 
1968     uint32_t                                    draw_call_count;
1969     struct anv_state                            breakpoint;
1970 #if DETECT_OS_ANDROID
1971     struct u_gralloc                            *u_gralloc;
1972 #endif
1973 
1974     /** Precompute all dirty graphics bits
1975      *
1976      * Depending on platforms, some of the dirty bits don't apply (for example
1977      * 3DSTATE_PRIMITIVE_REPLICATION is only Gfx12.0+). Disabling some
1978      * extensions like Mesh shaders also allow us to avoid emitting any
1979      * mesh/task related instructions (we only initialize them once at device
1980      * initialization).
1981      */
1982     BITSET_DECLARE(gfx_dirty_state, ANV_GFX_STATE_MAX);
1983 
1984     /*
1985      * Command pool for companion RCS command buffer.
1986      */
1987     VkCommandPool                               companion_rcs_cmd_pool;
1988 
1989     struct anv_trtt {
1990        simple_mtx_t mutex;
1991 
1992        /* Sometimes we need to run batches from places where we don't have a
1993         * queue coming from the API, so we use this.
1994         */
1995        struct anv_queue *queue;
1996 
1997        /* There's only one L3 table, so if l3_addr is zero that means we
1998         * didn't initialize the TR-TT context yet (i.e., we're not using TR-TT
1999         * yet in this context).
2000         */
2001        uint64_t l3_addr;
2002 
2003        /* We don't want to access the page tables from the CPU, so just
2004         * maintain a mirror that we can use.
2005         */
2006        uint64_t *l3_mirror;
2007        uint64_t *l2_mirror;
2008 
2009        /* We keep a dynamic list of page table bos, and each bo can store
2010         * multiple page tables.
2011         */
2012        struct anv_bo **page_table_bos;
2013        int num_page_table_bos;
2014        int page_table_bos_capacity;
2015 
2016        /* These are used to keep track of space available for more page tables
2017         * within a bo.
2018         */
2019        struct anv_bo *cur_page_table_bo;
2020        uint64_t next_page_table_bo_offset;
2021 
2022        struct vk_sync *timeline;
2023        uint64_t timeline_val;
2024 
2025        /* List of struct anv_trtt_submission that are in flight and can be
2026         * freed once their vk_sync gets signaled.
2027         */
2028        struct list_head in_flight_batches;
2029     } trtt;
2030 
2031     /* Number of sparse resources that currently exist. This is used for a
2032      * workaround that makes every memoryBarrier flush more things than it
2033      * should. Some workloads create and then immediately destroy sparse
2034      * resources when they start, so just counting if a sparse resource was
2035      * ever created is not enough.
2036      */
2037     uint32_t num_sparse_resources;
2038 
2039     struct anv_device_astc_emu                   astc_emu;
2040 
2041     struct intel_bind_timeline bind_timeline; /* Xe only */
2042 
2043     struct {
2044        simple_mtx_t                              mutex;
2045        struct hash_table                        *map;
2046     }                                            embedded_samplers;
2047 
2048     struct {
2049        /**
2050         * Mutex for the printfs array
2051         */
2052        simple_mtx_t                              mutex;
2053        /**
2054         * Buffer in which the shader printfs are stored
2055         */
2056        struct anv_bo                            *bo;
2057        /**
2058         * Array of pointers to u_printf_info
2059         */
2060        struct util_dynarray                      prints;
2061     } printf;
2062 };
2063 
2064 static inline uint32_t
anv_get_first_render_queue_index(struct anv_physical_device * pdevice)2065 anv_get_first_render_queue_index(struct anv_physical_device *pdevice)
2066 {
2067    assert(pdevice != NULL);
2068 
2069    for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2070       if (pdevice->queue.families[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) {
2071          return i;
2072       }
2073    }
2074 
2075    unreachable("Graphics capable queue family not found");
2076 }
2077 
2078 static inline struct anv_state
anv_binding_table_pool_alloc(struct anv_device * device)2079 anv_binding_table_pool_alloc(struct anv_device *device)
2080 {
2081    return anv_state_pool_alloc(&device->binding_table_pool,
2082                                device->binding_table_pool.block_size, 0);
2083 }
2084 
2085 static inline void
anv_binding_table_pool_free(struct anv_device * device,struct anv_state state)2086 anv_binding_table_pool_free(struct anv_device *device, struct anv_state state)
2087 {
2088    anv_state_pool_free(&device->binding_table_pool, state);
2089 }
2090 
2091 static inline struct anv_state
anv_null_surface_state_for_binding_table(struct anv_device * device)2092 anv_null_surface_state_for_binding_table(struct anv_device *device)
2093 {
2094    struct anv_state state = device->null_surface_state;
2095    if (device->physical->indirect_descriptors) {
2096       state.offset += device->physical->va.bindless_surface_state_pool.addr -
2097                       device->physical->va.internal_surface_state_pool.addr;
2098    }
2099    return state;
2100 }
2101 
2102 static inline struct anv_state
anv_bindless_state_for_binding_table(struct anv_device * device,struct anv_state state)2103 anv_bindless_state_for_binding_table(struct anv_device *device,
2104                                      struct anv_state state)
2105 {
2106    state.offset += device->physical->va.bindless_surface_state_pool.addr -
2107                    device->physical->va.internal_surface_state_pool.addr;
2108    return state;
2109 }
2110 
2111 static inline struct anv_state
anv_device_maybe_alloc_surface_state(struct anv_device * device,struct anv_state_stream * surface_state_stream)2112 anv_device_maybe_alloc_surface_state(struct anv_device *device,
2113                                      struct anv_state_stream *surface_state_stream)
2114 {
2115    if (device->physical->indirect_descriptors) {
2116       if (surface_state_stream)
2117          return anv_state_stream_alloc(surface_state_stream, 64, 64);
2118       return anv_state_pool_alloc(&device->bindless_surface_state_pool, 64, 64);
2119    } else {
2120       return ANV_STATE_NULL;
2121    }
2122 }
2123 
2124 static inline uint32_t
anv_mocs(const struct anv_device * device,const struct anv_bo * bo,isl_surf_usage_flags_t usage)2125 anv_mocs(const struct anv_device *device,
2126          const struct anv_bo *bo,
2127          isl_surf_usage_flags_t usage)
2128 {
2129    return isl_mocs(&device->isl_dev, usage, bo && anv_bo_is_external(bo));
2130 }
2131 
2132 static inline uint32_t
anv_mocs_for_address(const struct anv_device * device,const struct anv_address * addr)2133 anv_mocs_for_address(const struct anv_device *device,
2134                      const struct anv_address *addr)
2135 {
2136    return anv_mocs(device, addr->bo, 0);
2137 }
2138 
2139 void anv_device_init_blorp(struct anv_device *device);
2140 void anv_device_finish_blorp(struct anv_device *device);
2141 
2142 VkResult anv_device_alloc_bo(struct anv_device *device,
2143                              const char *name, uint64_t size,
2144                              enum anv_bo_alloc_flags alloc_flags,
2145                              uint64_t explicit_address,
2146                              struct anv_bo **bo);
2147 VkResult anv_device_map_bo(struct anv_device *device,
2148                            struct anv_bo *bo,
2149                            uint64_t offset,
2150                            size_t size,
2151                            void *placed_addr,
2152                            void **map_out);
2153 VkResult anv_device_unmap_bo(struct anv_device *device,
2154                              struct anv_bo *bo,
2155                              void *map, size_t map_size,
2156                              bool replace);
2157 VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
2158                                             void *host_ptr, uint32_t size,
2159                                             enum anv_bo_alloc_flags alloc_flags,
2160                                             uint64_t client_address,
2161                                             struct anv_bo **bo_out);
2162 VkResult anv_device_import_bo(struct anv_device *device, int fd,
2163                               enum anv_bo_alloc_flags alloc_flags,
2164                               uint64_t client_address,
2165                               struct anv_bo **bo);
2166 VkResult anv_device_export_bo(struct anv_device *device,
2167                               struct anv_bo *bo, int *fd_out);
2168 VkResult anv_device_get_bo_tiling(struct anv_device *device,
2169                                   struct anv_bo *bo,
2170                                   enum isl_tiling *tiling_out);
2171 VkResult anv_device_set_bo_tiling(struct anv_device *device,
2172                                   struct anv_bo *bo,
2173                                   uint32_t row_pitch_B,
2174                                   enum isl_tiling tiling);
2175 void anv_device_release_bo(struct anv_device *device,
2176                            struct anv_bo *bo);
2177 
anv_device_set_physical(struct anv_device * device,struct anv_physical_device * physical_device)2178 static inline void anv_device_set_physical(struct anv_device *device,
2179                                            struct anv_physical_device *physical_device)
2180 {
2181    device->physical = physical_device;
2182    device->info = &physical_device->info;
2183    device->isl_dev = physical_device->isl_dev;
2184 }
2185 
2186 static inline struct anv_bo *
anv_device_lookup_bo(struct anv_device * device,uint32_t gem_handle)2187 anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
2188 {
2189    return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);
2190 }
2191 
2192 VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
2193                          int64_t timeout);
2194 
2195 VkResult anv_device_print_init(struct anv_device *device);
2196 void anv_device_print_fini(struct anv_device *device);
2197 void anv_device_print_shader_prints(struct anv_device *device);
2198 
2199 VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue,
2200                         const VkDeviceQueueCreateInfo *pCreateInfo,
2201                         uint32_t index_in_family);
2202 void anv_queue_finish(struct anv_queue *queue);
2203 
2204 VkResult anv_queue_submit(struct vk_queue *queue,
2205                           struct vk_queue_submit *submit);
2206 
2207 void anv_queue_trace(struct anv_queue *queue, const char *label,
2208                      bool frame, bool begin);
2209 
2210 static inline VkResult
anv_queue_post_submit(struct anv_queue * queue,VkResult submit_result)2211 anv_queue_post_submit(struct anv_queue *queue, VkResult submit_result)
2212 {
2213    if (submit_result != VK_SUCCESS)
2214       return submit_result;
2215 
2216    VkResult result = VK_SUCCESS;
2217    if (queue->sync) {
2218       result = vk_sync_wait(&queue->device->vk, queue->sync, 0,
2219                             VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
2220       if (result != VK_SUCCESS)
2221          result = vk_queue_set_lost(&queue->vk, "sync wait failed");
2222    }
2223 
2224    if (INTEL_DEBUG(DEBUG_SHADER_PRINT))
2225       anv_device_print_shader_prints(queue->device);
2226 
2227    return result;
2228 }
2229 
2230 int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
2231 int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
2232                        uint32_t stride, uint32_t tiling);
2233 int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
2234 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
2235 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
2236 int anv_gem_set_context_param(int fd, uint32_t context, uint32_t param,
2237                               uint64_t value);
2238 VkResult
2239 anv_gem_import_bo_alloc_flags_to_bo_flags(struct anv_device *device,
2240                                           struct anv_bo *bo,
2241                                           enum anv_bo_alloc_flags alloc_flags,
2242                                           uint32_t *bo_flags);
2243 const struct intel_device_info_pat_entry *
2244 anv_device_get_pat_entry(struct anv_device *device,
2245                          enum anv_bo_alloc_flags alloc_flags);
2246 
2247 uint64_t anv_vma_alloc(struct anv_device *device,
2248                        uint64_t size, uint64_t align,
2249                        enum anv_bo_alloc_flags alloc_flags,
2250                        uint64_t client_address,
2251                        struct util_vma_heap **out_vma_heap);
2252 void anv_vma_free(struct anv_device *device,
2253                   struct util_vma_heap *vma_heap,
2254                   uint64_t address, uint64_t size);
2255 
2256 struct anv_reloc_list {
2257    bool                                         uses_relocs;
2258    uint32_t                                     dep_words;
2259    BITSET_WORD *                                deps;
2260    const VkAllocationCallbacks                  *alloc;
2261 };
2262 
2263 VkResult anv_reloc_list_init(struct anv_reloc_list *list,
2264                              const VkAllocationCallbacks *alloc,
2265                              bool uses_relocs);
2266 void anv_reloc_list_finish(struct anv_reloc_list *list);
2267 
2268 VkResult
2269 anv_reloc_list_add_bo_impl(struct anv_reloc_list *list, struct anv_bo *target_bo);
2270 
2271 static inline VkResult
anv_reloc_list_add_bo(struct anv_reloc_list * list,struct anv_bo * target_bo)2272 anv_reloc_list_add_bo(struct anv_reloc_list *list, struct anv_bo *target_bo)
2273 {
2274    return list->uses_relocs ? anv_reloc_list_add_bo_impl(list, target_bo) : VK_SUCCESS;
2275 }
2276 
2277 VkResult anv_reloc_list_append(struct anv_reloc_list *list,
2278                                struct anv_reloc_list *other);
2279 
2280 struct anv_batch_bo {
2281    /* Link in the anv_cmd_buffer.owned_batch_bos list */
2282    struct list_head                             link;
2283 
2284    struct anv_bo *                              bo;
2285 
2286    /* Bytes actually consumed in this batch BO */
2287    uint32_t                                     length;
2288 
2289    /* When this batch BO is used as part of a primary batch buffer, this
2290     * tracked whether it is chained to another primary batch buffer.
2291     *
2292     * If this is the case, the relocation list's last entry points the
2293     * location of the MI_BATCH_BUFFER_START chaining to the next batch.
2294     */
2295    bool                                         chained;
2296 
2297    struct anv_reloc_list                        relocs;
2298 };
2299 
2300 struct anv_batch {
2301    const VkAllocationCallbacks *                alloc;
2302 
2303    /**
2304     * Sum of all the anv_batch_bo sizes allocated for this command buffer.
2305     * Used to increase allocation size for long command buffers.
2306     */
2307    size_t                                       allocated_batch_size;
2308 
2309    struct anv_address                           start_addr;
2310 
2311    void *                                       start;
2312    void *                                       end;
2313    void *                                       next;
2314 
2315    struct anv_reloc_list *                      relocs;
2316 
2317    /* This callback is called (with the associated user data) in the event
2318     * that the batch runs out of space.
2319     */
2320    VkResult (*extend_cb)(struct anv_batch *, uint32_t, void *);
2321    void *                                       user_data;
2322 
2323    /**
2324     * Current error status of the command buffer. Used to track inconsistent
2325     * or incomplete command buffer states that are the consequence of run-time
2326     * errors such as out of memory scenarios. We want to track this in the
2327     * batch because the command buffer object is not visible to some parts
2328     * of the driver.
2329     */
2330    VkResult                                     status;
2331 
2332    enum intel_engine_class                      engine_class;
2333 
2334    /**
2335     * Write fencing status for mi_builder.
2336     */
2337    bool write_fence_status;
2338 
2339    /**
2340     * Number of 3DPRIMITIVE's emitted for WA 16014538804
2341     */
2342    uint8_t num_3d_primitives_emitted;
2343 
2344    struct u_trace * trace;
2345    const char * pc_reasons[4];
2346    uint32_t pc_reasons_count;
2347 
2348 };
2349 
2350 void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
2351 VkResult anv_batch_emit_ensure_space(struct anv_batch *batch, uint32_t size);
2352 void anv_batch_advance(struct anv_batch *batch, uint32_t size);
2353 void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
2354 struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
2355 
2356 static inline struct anv_address
anv_batch_current_address(struct anv_batch * batch)2357 anv_batch_current_address(struct anv_batch *batch)
2358 {
2359    return anv_batch_address(batch, batch->next);
2360 }
2361 
2362 static inline void
anv_batch_set_storage(struct anv_batch * batch,struct anv_address addr,void * map,size_t size)2363 anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr,
2364                       void *map, size_t size)
2365 {
2366    batch->start_addr = addr;
2367    batch->next = batch->start = map;
2368    batch->end = map + size;
2369 }
2370 
2371 static inline VkResult
anv_batch_set_error(struct anv_batch * batch,VkResult error)2372 anv_batch_set_error(struct anv_batch *batch, VkResult error)
2373 {
2374    assert(error != VK_SUCCESS);
2375    if (batch->status == VK_SUCCESS)
2376       batch->status = error;
2377    return batch->status;
2378 }
2379 
2380 static inline bool
anv_batch_has_error(struct anv_batch * batch)2381 anv_batch_has_error(struct anv_batch *batch)
2382 {
2383    return batch->status != VK_SUCCESS;
2384 }
2385 
2386 static inline uint64_t
_anv_combine_address(struct anv_batch * batch,void * location,const struct anv_address address,uint32_t delta)2387 _anv_combine_address(struct anv_batch *batch, void *location,
2388                      const struct anv_address address, uint32_t delta)
2389 {
2390    if (address.bo == NULL)
2391       return address.offset + delta;
2392 
2393    if (batch)
2394       anv_reloc_list_add_bo(batch->relocs, address.bo);
2395 
2396    return anv_address_physical(anv_address_add(address, delta));
2397 }
2398 
2399 #define __gen_address_type struct anv_address
2400 #define __gen_user_data struct anv_batch
2401 #define __gen_combine_address _anv_combine_address
2402 
2403 /* Wrapper macros needed to work around preprocessor argument issues.  In
2404  * particular, arguments don't get pre-evaluated if they are concatenated.
2405  * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the
2406  * GENX macro won't get evaluated if the emit macro contains "cmd ## foo".
2407  * We can work around this easily enough with these helpers.
2408  */
2409 #define __anv_cmd_length(cmd) cmd ## _length
2410 #define __anv_cmd_length_bias(cmd) cmd ## _length_bias
2411 #define __anv_cmd_header(cmd) cmd ## _header
2412 #define __anv_cmd_pack(cmd) cmd ## _pack
2413 #define __anv_reg_num(reg) reg ## _num
2414 
2415 #define anv_pack_struct(dst, struc, ...) do {                              \
2416       struct struc __template = {                                          \
2417          __VA_ARGS__                                                       \
2418       };                                                                   \
2419       __anv_cmd_pack(struc)(NULL, dst, &__template);                       \
2420       VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \
2421    } while (0)
2422 
2423 #define anv_batch_emitn(batch, n, cmd, ...) ({             \
2424       void *__dst = anv_batch_emit_dwords(batch, n);       \
2425       if (__dst) {                                         \
2426          struct cmd __template = {                         \
2427             __anv_cmd_header(cmd),                         \
2428            .DWordLength = n - __anv_cmd_length_bias(cmd),  \
2429             __VA_ARGS__                                    \
2430          };                                                \
2431          __anv_cmd_pack(cmd)(batch, __dst, &__template);   \
2432       }                                                    \
2433       __dst;                                               \
2434    })
2435 
2436 #define anv_batch_emit_merge(batch, cmd, pipeline, state, name)         \
2437    for (struct cmd name = { 0 },                                        \
2438         *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd));    \
2439         __builtin_expect(_dst != NULL, 1);                              \
2440         ({ uint32_t _partial[__anv_cmd_length(cmd)];                    \
2441            assert((pipeline)->state.len == __anv_cmd_length(cmd));      \
2442            __anv_cmd_pack(cmd)(batch, _partial, &name);                 \
2443            for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) {       \
2444               assert((_partial[i] &                                     \
2445                       (pipeline)->batch_data[                           \
2446                          (pipeline)->state.offset + i]) == 0);          \
2447               ((uint32_t *)_dst)[i] = _partial[i] |                     \
2448                  (pipeline)->batch_data[(pipeline)->state.offset + i];  \
2449            }                                                            \
2450            VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
2451            _dst = NULL;                                                 \
2452          }))
2453 
2454 #define anv_batch_emit_merge_protected(batch, cmd, pipeline, state,     \
2455                                        name, protected)                 \
2456    for (struct cmd name = { 0 },                                        \
2457         *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd));    \
2458         __builtin_expect(_dst != NULL, 1);                              \
2459         ({ struct anv_gfx_state_ptr *_cmd_state = protected ?           \
2460               &(pipeline)->state##_protected :                          \
2461               &(pipeline)->state;                                       \
2462            uint32_t _partial[__anv_cmd_length(cmd)];                    \
2463            assert(_cmd_state->len == __anv_cmd_length(cmd));            \
2464            __anv_cmd_pack(cmd)(batch, _partial, &name);                 \
2465            for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) {       \
2466               assert((_partial[i] &                                     \
2467                       (pipeline)->batch_data[                           \
2468                          (pipeline)->state.offset + i]) == 0);          \
2469               ((uint32_t *)_dst)[i] = _partial[i] |                     \
2470                  (pipeline)->batch_data[_cmd_state->offset + i];        \
2471            }                                                            \
2472            VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
2473            _dst = NULL;                                                 \
2474          }))
2475 
2476 #define anv_batch_emit(batch, cmd, name)                            \
2477    for (struct cmd name = { __anv_cmd_header(cmd) },                    \
2478         *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd));    \
2479         __builtin_expect(_dst != NULL, 1);                              \
2480         ({ __anv_cmd_pack(cmd)(batch, _dst, &name);                     \
2481            VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
2482            _dst = NULL;                                                 \
2483          }))
2484 
2485 #define anv_batch_write_reg(batch, reg, name)                           \
2486    for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL;  \
2487         ({                                                              \
2488             uint32_t _dw[__anv_cmd_length(reg)];                        \
2489             __anv_cmd_pack(reg)(NULL, _dw, &name);                      \
2490             for (unsigned i = 0; i < __anv_cmd_length(reg); i++) {      \
2491                anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \
2492                   lri.RegisterOffset   = __anv_reg_num(reg);            \
2493                   lri.DataDWord        = _dw[i];                        \
2494                }                                                        \
2495             }                                                           \
2496            _cont = NULL;                                                \
2497          }))
2498 
2499 /* #define __gen_get_batch_dwords anv_batch_emit_dwords */
2500 /* #define __gen_get_batch_address anv_batch_address */
2501 /* #define __gen_address_value anv_address_physical */
2502 /* #define __gen_address_offset anv_address_add */
2503 
2504 /* Base structure used to track a submission that needs some clean operations
2505  * upon completion. Should be embedded into a larger structure.
2506  */
2507 struct anv_async_submit {
2508    struct anv_queue *queue;
2509 
2510    struct anv_bo_pool *bo_pool;
2511 
2512    bool use_companion_rcs;
2513 
2514    bool owns_sync;
2515    struct vk_sync_signal signal;
2516 
2517    struct anv_reloc_list relocs;
2518    struct anv_batch batch;
2519    struct util_dynarray batch_bos;
2520 };
2521 
2522 VkResult
2523 anv_async_submit_init(struct anv_async_submit *submit,
2524                       struct anv_queue *queue,
2525                       struct anv_bo_pool *bo_pool,
2526                       bool use_companion_rcs,
2527                       bool create_signal_sync);
2528 
2529 void
2530 anv_async_submit_fini(struct anv_async_submit *submit);
2531 
2532 VkResult
2533 anv_async_submit_create(struct anv_queue *queue,
2534                         struct anv_bo_pool *bo_pool,
2535                         bool use_companion_rcs,
2536                         bool create_signal_sync,
2537                         struct anv_async_submit **out_submit);
2538 
2539 void
2540 anv_async_submit_destroy(struct anv_async_submit *submit);
2541 
2542 bool
2543 anv_async_submit_done(struct anv_async_submit *submit);
2544 
2545 bool
2546 anv_async_submit_wait(struct anv_async_submit *submit);
2547 
2548 struct anv_sparse_submission {
2549    struct anv_queue *queue;
2550 
2551    struct anv_vm_bind *binds;
2552    int binds_len;
2553    int binds_capacity;
2554 
2555    uint32_t wait_count;
2556    uint32_t signal_count;
2557 
2558    struct vk_sync_wait *waits;
2559    struct vk_sync_signal *signals;
2560 };
2561 
2562 struct anv_trtt_bind {
2563    uint64_t pte_addr;
2564    uint64_t entry_addr;
2565 };
2566 
2567 struct anv_trtt_submission {
2568    struct anv_async_submit base;
2569 
2570    struct anv_sparse_submission *sparse;
2571 
2572    struct list_head link;
2573 };
2574 
2575 struct anv_device_memory {
2576    struct vk_device_memory                      vk;
2577 
2578    struct list_head                             link;
2579 
2580    struct anv_bo *                              bo;
2581    const struct anv_memory_type *               type;
2582 
2583    void *                                       map;
2584    size_t                                       map_size;
2585 
2586    /* The map, from the user PoV is map + map_delta */
2587    uint64_t                                     map_delta;
2588 };
2589 
2590 /**
2591  * Header for Vertex URB Entry (VUE)
2592  */
2593 struct anv_vue_header {
2594    uint32_t Reserved;
2595    uint32_t RTAIndex; /* RenderTargetArrayIndex */
2596    uint32_t ViewportIndex;
2597    float PointWidth;
2598 };
2599 
2600 /** Struct representing a sampled image descriptor
2601  *
2602  * This descriptor layout is used for sampled images, bare sampler, and
2603  * combined image/sampler descriptors.
2604  */
2605 struct anv_sampled_image_descriptor {
2606    /** Bindless image handle
2607     *
2608     * This is expected to already be shifted such that the 20-bit
2609     * SURFACE_STATE table index is in the top 20 bits.
2610     */
2611    uint32_t image;
2612 
2613    /** Bindless sampler handle
2614     *
2615     * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
2616     * to the dynamic state base address.
2617     */
2618    uint32_t sampler;
2619 };
2620 
2621 /** Struct representing a storage image descriptor */
2622 struct anv_storage_image_descriptor {
2623    /** Bindless image handles
2624     *
2625     * These are expected to already be shifted such that the 20-bit
2626     * SURFACE_STATE table index is in the top 20 bits.
2627     */
2628    uint32_t vanilla;
2629 
2630    /** Image depth
2631     *
2632     * By default the HW RESINFO message allows us to query the depth of an image :
2633     *
2634     * From the Kaby Lake docs for the RESINFO message:
2635     *
2636     *    "Surface Type | ... | Blue
2637     *    --------------+-----+----------------
2638     *    SURFTYPE_3D  | ... | (Depth+1)»LOD"
2639     *
2640     * With VK_EXT_sliced_view_of_3d, we have to support a slice of a 3D image,
2641     * meaning at a depth offset with a new depth value potentially reduced
2642     * from the original image. Unfortunately if we change the Depth value of
2643     * the image, we then run into issues with Yf/Ys tilings where the HW fetch
2644     * data at incorrect locations.
2645     *
2646     * To solve this, we put the slice depth in the descriptor and recompose
2647     * the vec3 (width, height, depth) using this field for z and xy using the
2648     * RESINFO result.
2649     */
2650    uint32_t image_depth;
2651 };
2652 
2653 /** Struct representing a address/range descriptor
2654  *
2655  * The fields of this struct correspond directly to the data layout of
2656  * nir_address_format_64bit_bounded_global addresses.  The last field is the
2657  * offset in the NIR address so it must be zero so that when you load the
2658  * descriptor you get a pointer to the start of the range.
2659  */
2660 struct anv_address_range_descriptor {
2661    uint64_t address;
2662    uint32_t range;
2663    uint32_t zero;
2664 };
2665 
2666 enum anv_descriptor_data {
2667    /** The descriptor contains a BTI reference to a surface state */
2668    ANV_DESCRIPTOR_BTI_SURFACE_STATE       = BITFIELD_BIT(0),
2669    /** The descriptor contains a BTI reference to a sampler state */
2670    ANV_DESCRIPTOR_BTI_SAMPLER_STATE       = BITFIELD_BIT(1),
2671    /** The descriptor contains an actual buffer view */
2672    ANV_DESCRIPTOR_BUFFER_VIEW             = BITFIELD_BIT(2),
2673    /** The descriptor contains inline uniform data */
2674    ANV_DESCRIPTOR_INLINE_UNIFORM          = BITFIELD_BIT(3),
2675    /** anv_address_range_descriptor with a buffer address and range */
2676    ANV_DESCRIPTOR_INDIRECT_ADDRESS_RANGE  = BITFIELD_BIT(4),
2677    /** Bindless surface handle (through anv_sampled_image_descriptor) */
2678    ANV_DESCRIPTOR_INDIRECT_SAMPLED_IMAGE  = BITFIELD_BIT(5),
2679    /** Storage image handles (through anv_storage_image_descriptor) */
2680    ANV_DESCRIPTOR_INDIRECT_STORAGE_IMAGE  = BITFIELD_BIT(6),
2681    /** The descriptor contains a single RENDER_SURFACE_STATE */
2682    ANV_DESCRIPTOR_SURFACE                 = BITFIELD_BIT(7),
2683    /** The descriptor contains a SAMPLER_STATE */
2684    ANV_DESCRIPTOR_SAMPLER                 = BITFIELD_BIT(8),
2685    /** A tuple of RENDER_SURFACE_STATE & SAMPLER_STATE */
2686    ANV_DESCRIPTOR_SURFACE_SAMPLER         = BITFIELD_BIT(9),
2687 };
2688 
2689 struct anv_descriptor_set_binding_layout {
2690    /* The type of the descriptors in this binding */
2691    VkDescriptorType type;
2692 
2693    /* Flags provided when this binding was created */
2694    VkDescriptorBindingFlags flags;
2695 
2696    /* Bitfield representing the type of data this descriptor contains */
2697    enum anv_descriptor_data data;
2698 
2699    /* Maximum number of YCbCr texture/sampler planes */
2700    uint8_t max_plane_count;
2701 
2702    /* Number of array elements in this binding (or size in bytes for inline
2703     * uniform data)
2704     */
2705    uint32_t array_size;
2706 
2707    /* Index into the flattened descriptor set */
2708    uint32_t descriptor_index;
2709 
2710    /* Index into the dynamic state array for a dynamic buffer, relative to the
2711     * set.
2712     */
2713    int16_t dynamic_offset_index;
2714 
2715    /* Computed surface size from data (for one plane) */
2716    uint16_t descriptor_data_surface_size;
2717 
2718    /* Computed sampler size from data (for one plane) */
2719    uint16_t descriptor_data_sampler_size;
2720 
2721    /* Index into the descriptor set buffer views */
2722    int32_t buffer_view_index;
2723 
2724    /* Offset into the descriptor buffer where the surface descriptor lives */
2725    uint32_t descriptor_surface_offset;
2726 
2727    /* Offset into the descriptor buffer where the sampler descriptor lives */
2728    uint16_t descriptor_sampler_offset;
2729 
2730    /* Pre computed surface stride (with multiplane descriptor, the descriptor
2731     * includes all the planes)
2732     */
2733    uint16_t descriptor_surface_stride;
2734 
2735    /* Pre computed sampler stride (with multiplane descriptor, the descriptor
2736     * includes all the planes)
2737     */
2738    uint16_t descriptor_sampler_stride;
2739 
2740    /* Immutable samplers (or NULL if no immutable samplers) */
2741    struct anv_sampler **immutable_samplers;
2742 };
2743 
2744 enum anv_descriptor_set_layout_type {
2745    ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_UNKNOWN,
2746    ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT,
2747    ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT,
2748    ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER,
2749 };
2750 
2751 struct anv_descriptor_set_layout {
2752    struct vk_object_base base;
2753 
2754    VkDescriptorSetLayoutCreateFlags flags;
2755 
2756    /* Type of descriptor set layout */
2757    enum anv_descriptor_set_layout_type type;
2758 
2759    /* Descriptor set layouts can be destroyed at almost any time */
2760    uint32_t ref_cnt;
2761 
2762    /* Number of bindings in this descriptor set */
2763    uint32_t binding_count;
2764 
2765    /* Total number of descriptors */
2766    uint32_t descriptor_count;
2767 
2768    /* Shader stages affected by this descriptor set */
2769    uint16_t shader_stages;
2770 
2771    /* Number of buffer views in this descriptor set */
2772    uint32_t buffer_view_count;
2773 
2774    /* Number of dynamic offsets used by this descriptor set */
2775    uint16_t dynamic_offset_count;
2776 
2777    /* For each dynamic buffer, which VkShaderStageFlagBits stages are using
2778     * this buffer
2779     */
2780    VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];
2781 
2782    /* Size of the descriptor buffer dedicated to surface states for this
2783     * descriptor set
2784     */
2785    uint32_t descriptor_buffer_surface_size;
2786 
2787    /* Size of the descriptor buffer dedicated to sampler states for this
2788     * descriptor set
2789     */
2790    uint32_t descriptor_buffer_sampler_size;
2791 
2792    /* Number of embedded sampler count */
2793    uint32_t embedded_sampler_count;
2794 
2795    /* Bindings in this descriptor set */
2796    struct anv_descriptor_set_binding_layout binding[0];
2797 };
2798 
2799 bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,
2800                                       const struct anv_descriptor_set_layout *set,
2801                                       const struct anv_descriptor_set_binding_layout *binding);
2802 
2803 bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
2804                                       const struct anv_descriptor_set_layout *set,
2805                                       const struct anv_descriptor_set_binding_layout *binding);
2806 
2807 void anv_descriptor_set_layout_destroy(struct anv_device *device,
2808                                        struct anv_descriptor_set_layout *layout);
2809 
2810 void anv_descriptor_set_layout_print(const struct anv_descriptor_set_layout *layout);
2811 
2812 static inline struct anv_descriptor_set_layout *
anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout * layout)2813 anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)
2814 {
2815    assert(layout && layout->ref_cnt >= 1);
2816    p_atomic_inc(&layout->ref_cnt);
2817 
2818    return layout;
2819 }
2820 
2821 static inline void
anv_descriptor_set_layout_unref(struct anv_device * device,struct anv_descriptor_set_layout * layout)2822 anv_descriptor_set_layout_unref(struct anv_device *device,
2823                                 struct anv_descriptor_set_layout *layout)
2824 {
2825    assert(layout && layout->ref_cnt >= 1);
2826    if (p_atomic_dec_zero(&layout->ref_cnt))
2827       anv_descriptor_set_layout_destroy(device, layout);
2828 }
2829 
2830 struct anv_descriptor {
2831    VkDescriptorType type;
2832 
2833    union {
2834       struct {
2835          VkImageLayout layout;
2836          struct anv_image_view *image_view;
2837          struct anv_sampler *sampler;
2838       };
2839 
2840       struct {
2841          struct anv_buffer_view *set_buffer_view;
2842          struct anv_buffer *buffer;
2843          uint64_t offset;
2844          uint64_t range;
2845          uint64_t bind_range;
2846       };
2847 
2848       struct anv_buffer_view *buffer_view;
2849 
2850       struct vk_acceleration_structure *accel_struct;
2851    };
2852 };
2853 
2854 struct anv_descriptor_set {
2855    struct vk_object_base base;
2856 
2857    struct anv_descriptor_pool *pool;
2858    struct anv_descriptor_set_layout *layout;
2859 
2860    /* Amount of space occupied in the the pool by this descriptor set. It can
2861     * be larger than the size of the descriptor set.
2862     */
2863    uint32_t size;
2864 
2865    /* Is this descriptor set a push descriptor */
2866    bool is_push;
2867 
2868    /* Bitfield of descriptors for which we need to generate surface states.
2869     * Only valid for push descriptors
2870     */
2871    uint32_t generate_surface_states;
2872 
2873    /* State relative to anv_descriptor_pool::surface_bo */
2874    struct anv_state desc_surface_mem;
2875    /* State relative to anv_descriptor_pool::sampler_bo */
2876    struct anv_state desc_sampler_mem;
2877    /* Surface state for the descriptor buffer */
2878    struct anv_state desc_surface_state;
2879 
2880    /* Descriptor set address pointing to desc_surface_mem (we don't need one
2881     * for sampler because they're never accessed other than by the HW through
2882     * the shader sampler handle).
2883     */
2884    struct anv_address desc_surface_addr;
2885 
2886    struct anv_address desc_sampler_addr;
2887 
2888    /* Descriptor offset from the
2889     * device->va.internal_surface_state_pool.addr
2890     *
2891     * It just needs to be added to the binding table offset to be put into the
2892     * HW BTI entry.
2893     */
2894    uint32_t desc_offset;
2895 
2896    uint32_t buffer_view_count;
2897    struct anv_buffer_view *buffer_views;
2898 
2899    /* Link to descriptor pool's desc_sets list . */
2900    struct list_head pool_link;
2901 
2902    uint32_t descriptor_count;
2903    struct anv_descriptor descriptors[0];
2904 };
2905 
2906 static inline bool
anv_descriptor_set_is_push(struct anv_descriptor_set * set)2907 anv_descriptor_set_is_push(struct anv_descriptor_set *set)
2908 {
2909    return set->pool == NULL;
2910 }
2911 
2912 struct anv_surface_state_data {
2913    uint8_t data[ANV_SURFACE_STATE_SIZE];
2914 };
2915 
2916 struct anv_buffer_state {
2917    /** Surface state allocated from the bindless heap
2918     *
2919     * Only valid if anv_physical_device::indirect_descriptors is true
2920     */
2921    struct anv_state state;
2922 
2923    /** Surface state after genxml packing
2924     *
2925     * Only valid if anv_physical_device::indirect_descriptors is false
2926     */
2927    struct anv_surface_state_data state_data;
2928 };
2929 
2930 struct anv_buffer_view {
2931    struct vk_buffer_view vk;
2932 
2933    struct anv_address address;
2934 
2935    struct anv_buffer_state general;
2936    struct anv_buffer_state storage;
2937 };
2938 
2939 struct anv_push_descriptor_set {
2940    struct anv_descriptor_set set;
2941 
2942    /* Put this field right behind anv_descriptor_set so it fills up the
2943     * descriptors[0] field. */
2944    struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
2945 
2946    /** True if the descriptor set buffer has been referenced by a draw or
2947     * dispatch command.
2948     */
2949    bool set_used_on_gpu;
2950 
2951    struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
2952 };
2953 
2954 static inline struct anv_address
anv_descriptor_set_address(struct anv_descriptor_set * set)2955 anv_descriptor_set_address(struct anv_descriptor_set *set)
2956 {
2957    if (anv_descriptor_set_is_push(set)) {
2958       /* We have to flag push descriptor set as used on the GPU
2959        * so that the next time we push descriptors, we grab a new memory.
2960        */
2961       struct anv_push_descriptor_set *push_set =
2962          (struct anv_push_descriptor_set *)set;
2963       push_set->set_used_on_gpu = true;
2964    }
2965 
2966    return set->desc_surface_addr;
2967 }
2968 
2969 struct anv_descriptor_pool_heap {
2970    /* BO allocated to back the pool (unused for host pools) */
2971    struct anv_bo        *bo;
2972 
2973    /* Host memory allocated to back a host pool */
2974    void                 *host_mem;
2975 
2976    /* Heap tracking allocations in bo/host_mem */
2977    struct util_vma_heap  heap;
2978 
2979    /* Size of the heap */
2980    uint32_t              size;
2981 };
2982 
2983 struct anv_descriptor_pool {
2984    struct vk_object_base base;
2985 
2986    struct anv_descriptor_pool_heap surfaces;
2987    struct anv_descriptor_pool_heap samplers;
2988 
2989    struct anv_state_stream surface_state_stream;
2990    void *surface_state_free_list;
2991 
2992    /** List of anv_descriptor_set. */
2993    struct list_head desc_sets;
2994 
2995    /** Heap over host_mem */
2996    struct util_vma_heap host_heap;
2997 
2998    /** Allocated size of host_mem */
2999    uint32_t host_mem_size;
3000 
3001    /**
3002     * VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT. If set, then
3003     * surface_state_stream is unused.
3004     */
3005    bool host_only;
3006 
3007    char host_mem[0];
3008 };
3009 
3010 bool
3011 anv_push_descriptor_set_init(struct anv_cmd_buffer *cmd_buffer,
3012                              struct anv_push_descriptor_set *push_set,
3013                              struct anv_descriptor_set_layout *layout);
3014 
3015 void
3016 anv_push_descriptor_set_finish(struct anv_push_descriptor_set *push_set);
3017 
3018 void
3019 anv_descriptor_set_write_image_view(struct anv_device *device,
3020                                     struct anv_descriptor_set *set,
3021                                     const VkDescriptorImageInfo * const info,
3022                                     VkDescriptorType type,
3023                                     uint32_t binding,
3024                                     uint32_t element);
3025 
3026 void
3027 anv_descriptor_set_write_buffer_view(struct anv_device *device,
3028                                      struct anv_descriptor_set *set,
3029                                      VkDescriptorType type,
3030                                      struct anv_buffer_view *buffer_view,
3031                                      uint32_t binding,
3032                                      uint32_t element);
3033 
3034 void
3035 anv_descriptor_set_write_buffer(struct anv_device *device,
3036                                 struct anv_descriptor_set *set,
3037                                 VkDescriptorType type,
3038                                 struct anv_buffer *buffer,
3039                                 uint32_t binding,
3040                                 uint32_t element,
3041                                 VkDeviceSize offset,
3042                                 VkDeviceSize range);
3043 
3044 void
3045 anv_descriptor_write_surface_state(struct anv_device *device,
3046                                    struct anv_descriptor *desc,
3047                                    struct anv_state surface_state);
3048 
3049 void
3050 anv_descriptor_set_write_acceleration_structure(struct anv_device *device,
3051                                                 struct anv_descriptor_set *set,
3052                                                 struct vk_acceleration_structure *accel,
3053                                                 uint32_t binding,
3054                                                 uint32_t element);
3055 
3056 void
3057 anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,
3058                                              struct anv_descriptor_set *set,
3059                                              uint32_t binding,
3060                                              const void *data,
3061                                              size_t offset,
3062                                              size_t size);
3063 
3064 void
3065 anv_descriptor_set_write(struct anv_device *device,
3066                          struct anv_descriptor_set *set_override,
3067                          uint32_t write_count,
3068                          const VkWriteDescriptorSet *writes);
3069 
3070 void
3071 anv_descriptor_set_write_template(struct anv_device *device,
3072                                   struct anv_descriptor_set *set,
3073                                   const struct vk_descriptor_update_template *template,
3074                                   const void *data);
3075 
3076 #define ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER (UINT8_MAX - 5)
3077 #define ANV_DESCRIPTOR_SET_NULL               (UINT8_MAX - 4)
3078 #define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS     (UINT8_MAX - 3)
3079 #define ANV_DESCRIPTOR_SET_DESCRIPTORS        (UINT8_MAX - 2)
3080 #define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS    (UINT8_MAX - 1)
3081 #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS   UINT8_MAX
3082 
3083 struct anv_pipeline_binding {
3084    /** Index in the descriptor set
3085     *
3086     * This is a flattened index; the descriptor set layout is already taken
3087     * into account.
3088     */
3089    uint32_t index;
3090 
3091    /** Binding in the descriptor set. Not valid for any of the
3092     * ANV_DESCRIPTOR_SET_*
3093     */
3094    uint32_t binding;
3095 
3096    /** Offset in the descriptor buffer
3097     *
3098     * Relative to anv_descriptor_set::desc_addr. This is useful for
3099     * ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT, to generate the binding
3100     * table entry.
3101     */
3102    uint32_t set_offset;
3103 
3104    /** The descriptor set this surface corresponds to.
3105     *
3106     * The special ANV_DESCRIPTOR_SET_* values above indicates that this
3107     * binding is not a normal descriptor set but something else.
3108     */
3109    uint8_t set;
3110 
3111    union {
3112       /** Plane in the binding index for images */
3113       uint8_t plane;
3114 
3115       /** Input attachment index (relative to the subpass) */
3116       uint8_t input_attachment_index;
3117 
3118       /** Dynamic offset index
3119        *
3120        * For dynamic UBOs and SSBOs, relative to set.
3121        */
3122       uint8_t dynamic_offset_index;
3123    };
3124 };
3125 
3126 struct anv_embedded_sampler_key {
3127    /** No need to track binding elements for embedded samplers as :
3128     *
3129     *    VUID-VkDescriptorSetLayoutBinding-flags-08006:
3130     *
3131     *       "If VkDescriptorSetLayoutCreateInfo:flags contains
3132     *        VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT,
3133     *        descriptorCount must: less than or equal to 1"
3134     *
3135     * The following struct can be safely hash as it doesn't include in
3136     * address/offset.
3137     */
3138    uint32_t sampler[4];
3139    uint32_t color[4];
3140 };
3141 
3142 struct anv_pipeline_embedded_sampler_binding {
3143    /** The descriptor set this sampler belongs to */
3144    uint8_t set;
3145 
3146    /** The binding in the set this sampler belongs to */
3147    uint32_t binding;
3148 
3149    /** The data configuring the sampler */
3150    struct anv_embedded_sampler_key key;
3151 };
3152 
3153 struct anv_push_range {
3154    /** Index in the descriptor set */
3155    uint32_t index;
3156 
3157    /** Descriptor set index */
3158    uint8_t set;
3159 
3160    /** Dynamic offset index (for dynamic UBOs), relative to set. */
3161    uint8_t dynamic_offset_index;
3162 
3163    /** Start offset in units of 32B */
3164    uint8_t start;
3165 
3166    /** Range in units of 32B */
3167    uint8_t length;
3168 };
3169 
3170 struct anv_pipeline_sets_layout {
3171    struct anv_device *device;
3172 
3173    struct {
3174       struct anv_descriptor_set_layout *layout;
3175       uint32_t dynamic_offset_start;
3176    } set[MAX_SETS];
3177 
3178    enum anv_descriptor_set_layout_type type;
3179 
3180    uint32_t num_sets;
3181    uint32_t num_dynamic_buffers;
3182    int push_descriptor_set_index;
3183 
3184    bool independent_sets;
3185 
3186    unsigned char sha1[20];
3187 };
3188 
3189 void anv_pipeline_sets_layout_init(struct anv_pipeline_sets_layout *layout,
3190                                    struct anv_device *device,
3191                                    bool independent_sets);
3192 
3193 void anv_pipeline_sets_layout_fini(struct anv_pipeline_sets_layout *layout);
3194 
3195 void anv_pipeline_sets_layout_add(struct anv_pipeline_sets_layout *layout,
3196                                   uint32_t set_idx,
3197                                   struct anv_descriptor_set_layout *set_layout);
3198 
3199 uint32_t
3200 anv_pipeline_sets_layout_embedded_sampler_count(const struct anv_pipeline_sets_layout *layout);
3201 
3202 void anv_pipeline_sets_layout_hash(struct anv_pipeline_sets_layout *layout);
3203 
3204 void anv_pipeline_sets_layout_print(const struct anv_pipeline_sets_layout *layout);
3205 
3206 struct anv_pipeline_layout {
3207    struct vk_object_base base;
3208 
3209    struct anv_pipeline_sets_layout sets_layout;
3210 };
3211 
3212 const struct anv_descriptor_set_layout *
3213 anv_pipeline_layout_get_push_set(const struct anv_pipeline_sets_layout *layout,
3214                                  uint8_t *desc_idx);
3215 
3216 struct anv_sparse_binding_data {
3217    uint64_t address;
3218    uint64_t size;
3219 
3220    /* This is kept only because it's given to us by vma_alloc() and need to be
3221     * passed back to vma_free(), we have no other particular use for it
3222     */
3223    struct util_vma_heap *vma_heap;
3224 };
3225 
3226 #define ANV_SPARSE_BLOCK_SIZE (64 * 1024)
3227 
3228 static inline bool
anv_sparse_binding_is_enabled(struct anv_device * device)3229 anv_sparse_binding_is_enabled(struct anv_device *device)
3230 {
3231    return device->vk.enabled_features.sparseBinding;
3232 }
3233 
3234 static inline bool
anv_sparse_residency_is_enabled(struct anv_device * device)3235 anv_sparse_residency_is_enabled(struct anv_device *device)
3236 {
3237    return device->vk.enabled_features.sparseResidencyBuffer ||
3238           device->vk.enabled_features.sparseResidencyImage2D ||
3239           device->vk.enabled_features.sparseResidencyImage3D ||
3240           device->vk.enabled_features.sparseResidency2Samples ||
3241           device->vk.enabled_features.sparseResidency4Samples ||
3242           device->vk.enabled_features.sparseResidency8Samples ||
3243           device->vk.enabled_features.sparseResidency16Samples ||
3244           device->vk.enabled_features.sparseResidencyAliased;
3245 }
3246 
3247 VkResult anv_init_sparse_bindings(struct anv_device *device,
3248                                   uint64_t size,
3249                                   struct anv_sparse_binding_data *sparse,
3250                                   enum anv_bo_alloc_flags alloc_flags,
3251                                   uint64_t client_address,
3252                                   struct anv_address *out_address);
3253 void anv_free_sparse_bindings(struct anv_device *device,
3254                               struct anv_sparse_binding_data *sparse);
3255 VkResult anv_sparse_bind_buffer(struct anv_device *device,
3256                                 struct anv_buffer *buffer,
3257                                 const VkSparseMemoryBind *vk_bind,
3258                                 struct anv_sparse_submission *submit);
3259 VkResult anv_sparse_bind_image_opaque(struct anv_device *device,
3260                                       struct anv_image *image,
3261                                       const VkSparseMemoryBind *vk_bind,
3262                                       struct anv_sparse_submission *submit);
3263 VkResult anv_sparse_bind_image_memory(struct anv_queue *queue,
3264                                       struct anv_image *image,
3265                                       const VkSparseImageMemoryBind *bind,
3266                                       struct anv_sparse_submission *submit);
3267 VkResult anv_sparse_bind(struct anv_device *device,
3268                          struct anv_sparse_submission *sparse_submit);
3269 
3270 VkResult anv_sparse_trtt_garbage_collect_batches(struct anv_device *device,
3271                                                  bool wait_completion);
3272 
3273 VkSparseImageFormatProperties
3274 anv_sparse_calc_image_format_properties(struct anv_physical_device *pdevice,
3275                                         VkImageAspectFlags aspect,
3276                                         VkImageType vk_image_type,
3277                                         VkSampleCountFlagBits vk_samples,
3278                                         struct isl_surf *surf);
3279 void anv_sparse_calc_miptail_properties(struct anv_device *device,
3280                                         struct anv_image *image,
3281                                         VkImageAspectFlags vk_aspect,
3282                                         uint32_t *imageMipTailFirstLod,
3283                                         VkDeviceSize *imageMipTailSize,
3284                                         VkDeviceSize *imageMipTailOffset,
3285                                         VkDeviceSize *imageMipTailStride);
3286 VkResult anv_sparse_image_check_support(struct anv_physical_device *pdevice,
3287                                         VkImageCreateFlags flags,
3288                                         VkImageTiling tiling,
3289                                         VkSampleCountFlagBits samples,
3290                                         VkImageType type,
3291                                         VkFormat format);
3292 
3293 struct anv_buffer {
3294    struct vk_buffer vk;
3295 
3296    /* Set when bound */
3297    struct anv_address address;
3298 
3299    struct anv_sparse_binding_data sparse_data;
3300 };
3301 
3302 static inline bool
anv_buffer_is_protected(const struct anv_buffer * buffer)3303 anv_buffer_is_protected(const struct anv_buffer *buffer)
3304 {
3305    return buffer->vk.create_flags & VK_BUFFER_CREATE_PROTECTED_BIT;
3306 }
3307 
3308 static inline bool
anv_buffer_is_sparse(const struct anv_buffer * buffer)3309 anv_buffer_is_sparse(const struct anv_buffer *buffer)
3310 {
3311    return buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
3312 }
3313 
3314 enum anv_cmd_dirty_bits {
3315    ANV_CMD_DIRTY_PIPELINE                            = 1 << 0,
3316    ANV_CMD_DIRTY_INDEX_BUFFER                        = 1 << 1,
3317    ANV_CMD_DIRTY_RENDER_AREA                         = 1 << 2,
3318    ANV_CMD_DIRTY_RENDER_TARGETS                      = 1 << 3,
3319    ANV_CMD_DIRTY_XFB_ENABLE                          = 1 << 4,
3320    ANV_CMD_DIRTY_RESTART_INDEX                       = 1 << 5,
3321    ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE              = 1 << 6,
3322    ANV_CMD_DIRTY_FS_MSAA_FLAGS                       = 1 << 7,
3323    ANV_CMD_DIRTY_COARSE_PIXEL_ACTIVE                 = 1 << 8,
3324    ANV_CMD_DIRTY_INDIRECT_DATA_STRIDE                = 1 << 9,
3325 };
3326 typedef enum anv_cmd_dirty_bits anv_cmd_dirty_mask_t;
3327 
3328 enum anv_pipe_bits {
3329    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT            = (1 << 0),
3330    ANV_PIPE_STALL_AT_SCOREBOARD_BIT          = (1 << 1),
3331    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT       = (1 << 2),
3332    ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT    = (1 << 3),
3333    ANV_PIPE_VF_CACHE_INVALIDATE_BIT          = (1 << 4),
3334    ANV_PIPE_DATA_CACHE_FLUSH_BIT             = (1 << 5),
3335    ANV_PIPE_TILE_CACHE_FLUSH_BIT             = (1 << 6),
3336    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT     = (1 << 10),
3337    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
3338    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT    = (1 << 12),
3339    ANV_PIPE_DEPTH_STALL_BIT                  = (1 << 13),
3340 
3341    /* ANV_PIPE_HDC_PIPELINE_FLUSH_BIT is a precise way to ensure prior data
3342     * cache work has completed.  Available on Gfx12+.  For earlier Gfx we
3343     * must reinterpret this flush as ANV_PIPE_DATA_CACHE_FLUSH_BIT.
3344     */
3345    ANV_PIPE_HDC_PIPELINE_FLUSH_BIT           = (1 << 14),
3346    ANV_PIPE_PSS_STALL_SYNC_BIT               = (1 << 15),
3347 
3348    /*
3349     * This bit flush data-port's Untyped L1 data cache (LSC L1).
3350     */
3351    ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT = (1 << 16),
3352 
3353    /* This bit controls the flushing of the engine (Render, Compute) specific
3354     * entries from the compression cache.
3355     */
3356    ANV_PIPE_CCS_CACHE_FLUSH_BIT              = (1 << 17),
3357 
3358    ANV_PIPE_TLB_INVALIDATE_BIT               = (1 << 18),
3359 
3360    ANV_PIPE_CS_STALL_BIT                     = (1 << 20),
3361    ANV_PIPE_END_OF_PIPE_SYNC_BIT             = (1 << 21),
3362 
3363    /* This bit does not exist directly in PIPE_CONTROL.  Instead it means that
3364     * a flush has happened but not a CS stall.  The next time we do any sort
3365     * of invalidation we need to insert a CS stall at that time.  Otherwise,
3366     * we would have to CS stall on every flush which could be bad.
3367     */
3368    ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT       = (1 << 22),
3369 
3370    /* This bit does not exist directly in PIPE_CONTROL. It means that Gfx12
3371     * AUX-TT data has changed and we need to invalidate AUX-TT data.  This is
3372     * done by writing the AUX-TT register.
3373     */
3374    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT         = (1 << 23),
3375 
3376    /* This bit does not exist directly in PIPE_CONTROL. It means that a
3377     * PIPE_CONTROL with a post-sync operation will follow. This is used to
3378     * implement a workaround for Gfx9.
3379     */
3380    ANV_PIPE_POST_SYNC_BIT                    = (1 << 24),
3381 };
3382 
3383 /* These bits track the state of buffer writes for queries. They get cleared
3384  * based on PIPE_CONTROL emissions.
3385  */
3386 enum anv_query_bits {
3387    ANV_QUERY_WRITES_RT_FLUSH      = (1 << 0),
3388 
3389    ANV_QUERY_WRITES_TILE_FLUSH    = (1 << 1),
3390 
3391    ANV_QUERY_WRITES_CS_STALL      = (1 << 2),
3392 
3393    ANV_QUERY_WRITES_DATA_FLUSH    = (1 << 3),
3394 };
3395 
3396 /* It's not clear why DG2 doesn't have issues with L3/CS coherency. But it's
3397  * likely related to performance workaround 14015868140.
3398  *
3399  * For now we enable this only on DG2 and platform prior to Gfx12 where there
3400  * is no tile cache.
3401  */
3402 #define ANV_DEVINFO_HAS_COHERENT_L3_CS(devinfo) \
3403    (intel_device_info_is_dg2(devinfo))
3404 
3405 /* Things we need to flush before accessing query data using the command
3406  * streamer.
3407  *
3408  * Prior to DG2 experiments show that the command streamer is not coherent
3409  * with the tile cache so we need to flush it to make any data visible to CS.
3410  *
3411  * Otherwise we want to flush the RT cache which is where blorp writes, either
3412  * for clearing the query buffer or for clearing the destination buffer in
3413  * vkCopyQueryPoolResults().
3414  */
3415 #define ANV_QUERY_RENDER_TARGET_WRITES_PENDING_BITS(devinfo) \
3416    (((!ANV_DEVINFO_HAS_COHERENT_L3_CS(devinfo) && \
3417       (devinfo)->ver >= 12) ? \
3418      ANV_QUERY_WRITES_TILE_FLUSH : 0) | \
3419     ANV_QUERY_WRITES_RT_FLUSH | \
3420     ANV_QUERY_WRITES_CS_STALL)
3421 #define ANV_QUERY_COMPUTE_WRITES_PENDING_BITS \
3422    (ANV_QUERY_WRITES_DATA_FLUSH | \
3423     ANV_QUERY_WRITES_CS_STALL)
3424 
3425 #define ANV_PIPE_QUERY_BITS(pending_query_bits) ( \
3426    ((pending_query_bits & ANV_QUERY_WRITES_RT_FLUSH) ?   \
3427     ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT : 0) | \
3428    ((pending_query_bits & ANV_QUERY_WRITES_TILE_FLUSH) ?   \
3429     ANV_PIPE_TILE_CACHE_FLUSH_BIT : 0) | \
3430    ((pending_query_bits & ANV_QUERY_WRITES_CS_STALL) ?   \
3431     ANV_PIPE_CS_STALL_BIT : 0) | \
3432    ((pending_query_bits & ANV_QUERY_WRITES_DATA_FLUSH) ?  \
3433     (ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
3434      ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
3435      ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) : 0))
3436 
3437 #define ANV_PIPE_FLUSH_BITS ( \
3438    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
3439    ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
3440    ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
3441    ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT | \
3442    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
3443    ANV_PIPE_TILE_CACHE_FLUSH_BIT)
3444 
3445 #define ANV_PIPE_STALL_BITS ( \
3446    ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
3447    ANV_PIPE_DEPTH_STALL_BIT | \
3448    ANV_PIPE_CS_STALL_BIT | \
3449    ANV_PIPE_PSS_STALL_SYNC_BIT)
3450 
3451 #define ANV_PIPE_INVALIDATE_BITS ( \
3452    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
3453    ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
3454    ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
3455    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
3456    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
3457    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
3458 
3459 /* PIPE_CONTROL bits that should be set only in 3D RCS mode.
3460  * For more details see genX(emit_apply_pipe_flushes).
3461  */
3462 #define ANV_PIPE_GFX_BITS ( \
3463    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
3464    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
3465    ANV_PIPE_TILE_CACHE_FLUSH_BIT | \
3466    ANV_PIPE_DEPTH_STALL_BIT | \
3467    ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
3468    (GFX_VERx10 >= 125 ? ANV_PIPE_PSS_STALL_SYNC_BIT : 0) | \
3469    ANV_PIPE_VF_CACHE_INVALIDATE_BIT)
3470 
3471 /* PIPE_CONTROL bits that should be set only in Media/GPGPU RCS mode.
3472  * For more details see genX(emit_apply_pipe_flushes).
3473  *
3474  * Documentation says that untyped L1 dataport cache flush is controlled by
3475  * HDC pipeline flush in 3D mode according to HDC_CHICKEN0 register:
3476  *
3477  * BSpec 47112: PIPE_CONTROL::HDC Pipeline Flush:
3478  *
3479  *    "When the "Pipeline Select" mode in PIPELINE_SELECT command is set to
3480  *     "3D", HDC Pipeline Flush can also flush/invalidate the LSC Untyped L1
3481  *     cache based on the programming of HDC_Chicken0 register bits 13:11."
3482  *
3483  *    "When the 'Pipeline Select' mode is set to 'GPGPU', the LSC Untyped L1
3484  *     cache flush is controlled by 'Untyped Data-Port Cache Flush' bit in the
3485  *     PIPE_CONTROL command."
3486  *
3487  *    As part of Wa_22010960976 & Wa_14013347512, i915 is programming
3488  *    HDC_CHICKEN0[11:13] = 0 ("Untyped L1 is flushed, for both 3D Pipecontrol
3489  *    Dataport flush, and UAV coherency barrier event"). So there is no need
3490  *    to set "Untyped Data-Port Cache" in 3D mode.
3491  *
3492  * On MTL the HDC_CHICKEN0 default values changed to match what was programmed
3493  * by Wa_22010960976 & Wa_14013347512 on DG2, but experiments show that the
3494  * change runs a bit deeper. Even manually writing to the HDC_CHICKEN0
3495  * register to force L1 untyped flush with HDC pipeline flush has no effect on
3496  * MTL.
3497  *
3498  * It seems like the HW change completely disconnected L1 untyped flush from
3499  * HDC pipeline flush with no way to bring that behavior back. So leave the L1
3500  * untyped flush active in 3D mode on all platforms since it doesn't seems to
3501  * cause issues there too.
3502  *
3503  * Maybe we'll have some GPGPU only bits here at some point.
3504  */
3505 #define ANV_PIPE_GPGPU_BITS (0)
3506 
3507 enum intel_ds_stall_flag
3508 anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits);
3509 
3510 #define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \
3511    VK_IMAGE_ASPECT_PLANE_0_BIT | \
3512    VK_IMAGE_ASPECT_PLANE_1_BIT | \
3513    VK_IMAGE_ASPECT_PLANE_2_BIT)
3514 
3515 #define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV (         \
3516    VK_IMAGE_ASPECT_COLOR_BIT | \
3517    VK_IMAGE_ASPECT_PLANES_BITS_ANV)
3518 
3519 struct anv_vertex_binding {
3520    struct anv_buffer *                          buffer;
3521    VkDeviceSize                                 offset;
3522    VkDeviceSize                                 size;
3523 };
3524 
3525 struct anv_xfb_binding {
3526    struct anv_buffer *                          buffer;
3527    VkDeviceSize                                 offset;
3528    VkDeviceSize                                 size;
3529 };
3530 
3531 struct anv_push_constants {
3532    /** Push constant data provided by the client through vkPushConstants */
3533    uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
3534 
3535 #define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint32_t)ANV_UBO_ALIGNMENT - 1)
3536 #define ANV_DESCRIPTOR_SET_OFFSET_MASK        (~(uint32_t)(ANV_UBO_ALIGNMENT - 1))
3537 
3538    /**
3539     * Base offsets for descriptor sets from
3540     *
3541     * The offset has different meaning depending on a number of factors :
3542     *
3543     *    - with descriptor sets (direct or indirect), this relative
3544     *      pdevice->va.descriptor_pool
3545     *
3546     *    - with descriptor buffers on DG2+, relative
3547     *      device->va.descriptor_buffer_pool
3548     *
3549     *    - with descriptor buffers prior to DG2, relative the programmed value
3550     *      in STATE_BASE_ADDRESS::BindlessSurfaceStateBaseAddress
3551     */
3552    uint32_t desc_surface_offsets[MAX_SETS];
3553 
3554    /**
3555     * Base offsets for descriptor sets from
3556     */
3557    uint32_t desc_sampler_offsets[MAX_SETS];
3558 
3559    /** Dynamic offsets for dynamic UBOs and SSBOs */
3560    uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
3561 
3562    /** Surface buffer base offset
3563     *
3564     * Only used prior to DG2 with descriptor buffers.
3565     *
3566     * (surfaces_base_offset + desc_offsets[set_index]) is relative to
3567     * device->va.descriptor_buffer_pool and can be used to compute a 64bit
3568     * address to the descriptor buffer (using load_desc_set_address_intel).
3569     */
3570    uint32_t surfaces_base_offset;
3571 
3572    /* Robust access pushed registers. */
3573    uint64_t push_reg_mask[MESA_SHADER_STAGES];
3574 
3575    /** Ray query globals (RT_DISPATCH_GLOBALS) */
3576    uint64_t ray_query_globals;
3577 
3578    union {
3579       struct {
3580          /** Dynamic MSAA value */
3581          uint32_t fs_msaa_flags;
3582 
3583          /** Dynamic TCS input vertices */
3584          uint32_t tcs_input_vertices;
3585       } gfx;
3586 
3587       struct {
3588          /** Base workgroup ID
3589           *
3590           * Used for vkCmdDispatchBase.
3591           */
3592          uint32_t base_work_group_id[3];
3593 
3594          /** Subgroup ID
3595           *
3596           * This is never set by software but is implicitly filled out when
3597           * uploading the push constants for compute shaders.
3598           *
3599           * This *MUST* be the last field of the anv_push_constants structure.
3600           */
3601          uint32_t subgroup_id;
3602       } cs;
3603    };
3604 };
3605 
3606 struct anv_surface_state {
3607    /** Surface state allocated from the bindless heap
3608     *
3609     * Can be NULL if unused.
3610     */
3611    struct anv_state state;
3612 
3613    /** Surface state after genxml packing
3614     *
3615     * Same data as in state.
3616     */
3617    struct anv_surface_state_data state_data;
3618 
3619    /** Address of the surface referred to by this state
3620     *
3621     * This address is relative to the start of the BO.
3622     */
3623    struct anv_address address;
3624    /* Address of the aux surface, if any
3625     *
3626     * This field is ANV_NULL_ADDRESS if and only if no aux surface exists.
3627     *
3628     * With the exception of gfx8, the bottom 12 bits of this address' offset
3629     * include extra aux information.
3630     */
3631    struct anv_address aux_address;
3632    /* Address of the clear color, if any
3633     *
3634     * This address is relative to the start of the BO.
3635     */
3636    struct anv_address clear_address;
3637 };
3638 
3639 struct anv_attachment {
3640    VkFormat vk_format;
3641    const struct anv_image_view *iview;
3642    VkImageLayout layout;
3643    enum isl_aux_usage aux_usage;
3644    struct anv_surface_state surface_state;
3645 
3646    VkResolveModeFlagBits resolve_mode;
3647    const struct anv_image_view *resolve_iview;
3648    VkImageLayout resolve_layout;
3649 };
3650 
3651 /** State tracking for vertex buffer flushes
3652  *
3653  * On Gfx8-9, the VF cache only considers the bottom 32 bits of memory
3654  * addresses.  If you happen to have two vertex buffers which get placed
3655  * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
3656  * collisions.  In order to solve this problem, we track vertex address ranges
3657  * which are live in the cache and invalidate the cache if one ever exceeds 32
3658  * bits.
3659  */
3660 struct anv_vb_cache_range {
3661    /* Virtual address at which the live vertex buffer cache range starts for
3662     * this vertex buffer index.
3663     */
3664    uint64_t start;
3665 
3666    /* Virtual address of the byte after where vertex buffer cache range ends.
3667     * This is exclusive such that end - start is the size of the range.
3668     */
3669    uint64_t end;
3670 };
3671 
3672 static inline void
anv_merge_vb_cache_range(struct anv_vb_cache_range * dirty,const struct anv_vb_cache_range * bound)3673 anv_merge_vb_cache_range(struct anv_vb_cache_range *dirty,
3674                          const struct anv_vb_cache_range *bound)
3675 {
3676    if (dirty->start == dirty->end) {
3677       *dirty = *bound;
3678    } else if (bound->start != bound->end) {
3679       dirty->start = MIN2(dirty->start, bound->start);
3680       dirty->end = MAX2(dirty->end, bound->end);
3681    }
3682 }
3683 
3684 /* Check whether we need to apply the Gfx8-9 vertex buffer workaround*/
3685 static inline bool
anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range * bound,struct anv_vb_cache_range * dirty,struct anv_address vb_address,uint32_t vb_size)3686 anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range *bound,
3687                                            struct anv_vb_cache_range *dirty,
3688                                            struct anv_address vb_address,
3689                                            uint32_t vb_size)
3690 {
3691    if (vb_size == 0) {
3692       bound->start = 0;
3693       bound->end = 0;
3694       return false;
3695    }
3696 
3697    bound->start = intel_48b_address(anv_address_physical(vb_address));
3698    bound->end = bound->start + vb_size;
3699    assert(bound->end > bound->start); /* No overflow */
3700 
3701    /* Align everything to a cache line */
3702    bound->start &= ~(64ull - 1ull);
3703    bound->end = align64(bound->end, 64);
3704 
3705    anv_merge_vb_cache_range(dirty, bound);
3706 
3707    /* If our range is larger than 32 bits, we have to flush */
3708    assert(bound->end - bound->start <= (1ull << 32));
3709    return (dirty->end - dirty->start) > (1ull << 32);
3710 }
3711 
3712 /**
3713  * State tracking for simple internal shaders
3714  */
3715 struct anv_simple_shader {
3716    /* The device associated with this emission */
3717    struct anv_device *device;
3718    /* The command buffer associated with this emission (can be NULL) */
3719    struct anv_cmd_buffer *cmd_buffer;
3720    /* State stream used for various internal allocations */
3721    struct anv_state_stream *dynamic_state_stream;
3722    struct anv_state_stream *general_state_stream;
3723    /* Where to emit the commands (can be different from cmd_buffer->batch) */
3724    struct anv_batch *batch;
3725    /* Shader to use */
3726    struct anv_shader_bin *kernel;
3727    /* L3 config used by the shader */
3728    const struct intel_l3_config *l3_config;
3729    /* Current URB config */
3730    const struct intel_urb_config *urb_cfg;
3731 
3732    /* Managed by the simpler shader helper*/
3733    struct anv_state bt_state;
3734 };
3735 
3736 /** State tracking for particular pipeline bind point
3737  *
3738  * This struct is the base struct for anv_cmd_graphics_state and
3739  * anv_cmd_compute_state.  These are used to track state which is bound to a
3740  * particular type of pipeline.  Generic state that applies per-stage such as
3741  * binding table offsets and push constants is tracked generically with a
3742  * per-stage array in anv_cmd_state.
3743  */
3744 struct anv_cmd_pipeline_state {
3745    struct anv_descriptor_set *descriptors[MAX_SETS];
3746    struct {
3747       bool             bound;
3748       /**
3749        * Buffer index used by this descriptor set.
3750        */
3751       int32_t          buffer_index; /* -1 means push descriptor */
3752       /**
3753        * Offset of the descriptor set in the descriptor buffer.
3754        */
3755       uint32_t         buffer_offset;
3756       /**
3757        * Final computed address to be emitted in the descriptor set surface
3758        * state.
3759        */
3760       uint64_t         address;
3761       /**
3762        * The descriptor set surface state.
3763        */
3764       struct anv_state state;
3765    } descriptor_buffers[MAX_SETS];
3766    struct anv_push_descriptor_set push_descriptor;
3767 
3768    struct anv_push_constants push_constants;
3769 
3770    /** Tracks whether the push constant data has changed and need to be reemitted */
3771    bool                                         push_constants_data_dirty;
3772 
3773    /* Push constant state allocated when flushing push constants. */
3774    struct anv_state          push_constants_state;
3775 
3776    /**
3777     * Dynamic buffer offsets.
3778     *
3779     * We have a maximum of MAX_DYNAMIC_BUFFERS per pipeline, but with
3780     * independent sets we cannot know which how much in total is going to be
3781     * used. As a result we need to store the maximum possible number per set.
3782     *
3783     * Those values are written into anv_push_constants::dynamic_offsets at
3784     * flush time when have the pipeline with the final
3785     * anv_pipeline_sets_layout.
3786     */
3787    struct {
3788       uint32_t                                  offsets[MAX_DYNAMIC_BUFFERS];
3789    }                                            dynamic_offsets[MAX_SETS];
3790 
3791    /**
3792     * The current bound pipeline.
3793     */
3794    struct anv_pipeline      *pipeline;
3795 };
3796 
3797 enum anv_coarse_pixel_state {
3798    ANV_COARSE_PIXEL_STATE_UNKNOWN,
3799    ANV_COARSE_PIXEL_STATE_DISABLED,
3800    ANV_COARSE_PIXEL_STATE_ENABLED,
3801 };
3802 
3803 /** State tracking for graphics pipeline
3804  *
3805  * This has anv_cmd_pipeline_state as a base struct to track things which get
3806  * bound to a graphics pipeline.  Along with general pipeline bind point state
3807  * which is in the anv_cmd_pipeline_state base struct, it also contains other
3808  * state which is graphics-specific.
3809  */
3810 struct anv_cmd_graphics_state {
3811    struct anv_cmd_pipeline_state base;
3812 
3813    VkRenderingFlags rendering_flags;
3814    VkRect2D render_area;
3815    uint32_t layer_count;
3816    uint32_t samples;
3817    uint32_t view_mask;
3818    uint32_t color_att_count;
3819    struct anv_state att_states;
3820    struct anv_attachment color_att[MAX_RTS];
3821    struct anv_attachment depth_att;
3822    struct anv_attachment stencil_att;
3823    struct anv_state null_surface_state;
3824 
3825    anv_cmd_dirty_mask_t dirty;
3826    uint32_t vb_dirty;
3827 
3828    struct anv_vb_cache_range ib_bound_range;
3829    struct anv_vb_cache_range ib_dirty_range;
3830    struct anv_vb_cache_range vb_bound_ranges[33];
3831    struct anv_vb_cache_range vb_dirty_ranges[33];
3832 
3833    uint32_t restart_index;
3834 
3835    VkShaderStageFlags push_constant_stages;
3836 
3837    uint32_t primitive_topology;
3838    bool used_task_shader;
3839 
3840    struct anv_buffer *index_buffer;
3841    uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */
3842    uint32_t index_offset;
3843    uint32_t index_size;
3844 
3845    uint32_t indirect_data_stride;
3846    bool indirect_data_stride_aligned;
3847 
3848    struct vk_vertex_input_state vertex_input;
3849    struct vk_sample_locations_state sample_locations;
3850 
3851    /* Dynamic msaa flags, this value can be different from
3852     * anv_push_constants::gfx::fs_msaa_flags, as the push constant value only
3853     * needs to be updated for fragment shaders dynamically checking the value.
3854     */
3855    enum intel_msaa_flags fs_msaa_flags;
3856 
3857    bool object_preemption;
3858    bool has_uint_rt;
3859 
3860    /* State tracking for Wa_14018912822. */
3861    bool color_blend_zero;
3862    bool alpha_blend_zero;
3863 
3864    /**
3865     * State tracking for Wa_18020335297.
3866     */
3867    bool                                         viewport_set;
3868 
3869    /**
3870     * State tracking for Wa_18038825448.
3871     */
3872    enum anv_coarse_pixel_state coarse_pixel_active;
3873 
3874    struct intel_urb_config urb_cfg;
3875 
3876    uint32_t n_occlusion_queries;
3877 
3878    struct anv_gfx_dynamic_state dyn_state;
3879 };
3880 
3881 enum anv_depth_reg_mode {
3882    ANV_DEPTH_REG_MODE_UNKNOWN = 0,
3883    ANV_DEPTH_REG_MODE_HW_DEFAULT,
3884    ANV_DEPTH_REG_MODE_D16_1X_MSAA,
3885 };
3886 
3887 /** State tracking for compute pipeline
3888  *
3889  * This has anv_cmd_pipeline_state as a base struct to track things which get
3890  * bound to a compute pipeline.  Along with general pipeline bind point state
3891  * which is in the anv_cmd_pipeline_state base struct, it also contains other
3892  * state which is compute-specific.
3893  */
3894 struct anv_cmd_compute_state {
3895    struct anv_cmd_pipeline_state base;
3896 
3897    bool pipeline_dirty;
3898 
3899    struct anv_address num_workgroups;
3900 
3901    uint32_t scratch_size;
3902 };
3903 
3904 struct anv_cmd_ray_tracing_state {
3905    struct anv_cmd_pipeline_state base;
3906 
3907    bool pipeline_dirty;
3908 
3909    struct {
3910       struct anv_bo *bo;
3911       struct brw_rt_scratch_layout layout;
3912    } scratch;
3913 
3914    struct anv_address build_priv_mem_addr;
3915    size_t             build_priv_mem_size;
3916 };
3917 
3918 enum anv_cmd_descriptor_buffer_mode {
3919    ANV_CMD_DESCRIPTOR_BUFFER_MODE_UNKNOWN,
3920    ANV_CMD_DESCRIPTOR_BUFFER_MODE_LEGACY,
3921    ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER,
3922 };
3923 
3924 /** State required while building cmd buffer */
3925 struct anv_cmd_state {
3926    /* PIPELINE_SELECT.PipelineSelection */
3927    uint32_t                                     current_pipeline;
3928    const struct intel_l3_config *               current_l3_config;
3929    uint32_t                                     last_aux_map_state;
3930 
3931    struct anv_cmd_graphics_state                gfx;
3932    struct anv_cmd_compute_state                 compute;
3933    struct anv_cmd_ray_tracing_state             rt;
3934 
3935    enum anv_pipe_bits                           pending_pipe_bits;
3936 
3937    /**
3938     * Whether the last programmed STATE_BASE_ADDRESS references
3939     * anv_device::dynamic_state_pool or anv_device::dynamic_state_pool_db for
3940     * the dynamic state heap.
3941     */
3942    enum anv_cmd_descriptor_buffer_mode          current_db_mode;
3943 
3944    /**
3945     * Whether the command buffer has pending descriptor buffers bound it. This
3946     * variable changes before anv_device::current_db_mode.
3947     */
3948    enum anv_cmd_descriptor_buffer_mode          pending_db_mode;
3949 
3950    struct {
3951       /**
3952        * Tracks operations susceptible to interfere with queries in the
3953        * destination buffer of vkCmdCopyQueryResults, we need those operations to
3954        * have completed before we do the work of vkCmdCopyQueryResults.
3955        */
3956       enum anv_query_bits                          buffer_write_bits;
3957 
3958       /**
3959        * Tracks clear operations of query buffers that can interact with
3960        * vkCmdQueryBegin*, vkCmdWriteTimestamp*,
3961        * vkCmdWriteAccelerationStructuresPropertiesKHR, etc...
3962        *
3963        * We need the clearing of the buffer completed before with write data with
3964        * the command streamer or a shader.
3965        */
3966       enum anv_query_bits                          clear_bits;
3967    } queries;
3968 
3969    VkShaderStageFlags                           descriptors_dirty;
3970    VkShaderStageFlags                           push_descriptors_dirty;
3971    /** Tracks the 3DSTATE_CONSTANT_* instruction that needs to be reemitted */
3972    VkShaderStageFlags                           push_constants_dirty;
3973 
3974    struct {
3975       uint64_t                                  surfaces_address;
3976       uint64_t                                  samplers_address;
3977       bool                                      dirty;
3978       VkShaderStageFlags                        offsets_dirty;
3979       uint64_t                                  address[MAX_SETS];
3980    }                                            descriptor_buffers;
3981 
3982    struct anv_vertex_binding                    vertex_bindings[MAX_VBS];
3983    bool                                         xfb_enabled;
3984    struct anv_xfb_binding                       xfb_bindings[MAX_XFB_BUFFERS];
3985    struct anv_state                             binding_tables[MESA_VULKAN_SHADER_STAGES];
3986    struct anv_state                             samplers[MESA_VULKAN_SHADER_STAGES];
3987 
3988    unsigned char                                sampler_sha1s[MESA_VULKAN_SHADER_STAGES][20];
3989    unsigned char                                surface_sha1s[MESA_VULKAN_SHADER_STAGES][20];
3990    unsigned char                                push_sha1s[MESA_VULKAN_SHADER_STAGES][20];
3991 
3992    /**
3993     * Whether or not the gfx8 PMA fix is enabled.  We ensure that, at the top
3994     * of any command buffer it is disabled by disabling it in EndCommandBuffer
3995     * and before invoking the secondary in ExecuteCommands.
3996     */
3997    bool                                         pma_fix_enabled;
3998 
3999    /**
4000     * Whether or not we know for certain that HiZ is enabled for the current
4001     * subpass.  If, for whatever reason, we are unsure as to whether HiZ is
4002     * enabled or not, this will be false.
4003     */
4004    bool                                         hiz_enabled;
4005 
4006    /* We ensure the registers for the gfx12 D16 fix are initialized at the
4007     * first non-NULL depth stencil packet emission of every command buffer.
4008     * For secondary command buffer execution, we transfer the state from the
4009     * last command buffer to the primary (if known).
4010     */
4011    enum anv_depth_reg_mode                      depth_reg_mode;
4012 
4013    /* The last auxiliary surface operation (or equivalent operation) provided
4014     * to genX(cmd_buffer_update_color_aux_op).
4015     */
4016    enum isl_aux_op                              color_aux_op;
4017 
4018    /**
4019     * Whether RHWO optimization is enabled (Wa_1508744258).
4020     */
4021    bool                                         rhwo_optimization_enabled;
4022 
4023    /**
4024     * Pending state of the RHWO optimization, to be applied at the next
4025     * genX(cmd_buffer_apply_pipe_flushes).
4026     */
4027    bool                                         pending_rhwo_optimization_enabled;
4028 
4029    bool                                         conditional_render_enabled;
4030 
4031    /**
4032     * Last rendering scale argument provided to
4033     * genX(cmd_buffer_emit_hashing_mode)().
4034     */
4035    unsigned                                     current_hash_scale;
4036 
4037    /**
4038     * A buffer used for spill/fill of ray queries.
4039     */
4040    struct anv_bo *                              ray_query_shadow_bo;
4041 
4042    /** Pointer to the last emitted COMPUTE_WALKER.
4043     *
4044     * This is used to edit the instruction post emission to replace the "Post
4045     * Sync" field for utrace timestamp emission.
4046     */
4047    void                                        *last_compute_walker;
4048 
4049    /** Pointer to the last emitted EXECUTE_INDIRECT_DISPATCH.
4050     *
4051     * This is used to edit the instruction post emission to replace the "Post
4052     * Sync" field for utrace timestamp emission.
4053     */
4054    void                                        *last_indirect_dispatch;
4055 };
4056 
4057 #define ANV_MIN_CMD_BUFFER_BATCH_SIZE 8192
4058 #define ANV_MAX_CMD_BUFFER_BATCH_SIZE (16 * 1024 * 1024)
4059 
4060 enum anv_cmd_buffer_exec_mode {
4061    ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,
4062    ANV_CMD_BUFFER_EXEC_MODE_EMIT,
4063    ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
4064    ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
4065    ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
4066    ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,
4067 };
4068 
4069 struct anv_measure_batch;
4070 
4071 struct anv_cmd_buffer {
4072    struct vk_command_buffer                     vk;
4073 
4074    struct anv_device *                          device;
4075    struct anv_queue_family *                    queue_family;
4076 
4077    /** Batch where the main commands live */
4078    struct anv_batch                             batch;
4079 
4080    /* Pointer to the location in the batch where MI_BATCH_BUFFER_END was
4081     * recorded upon calling vkEndCommandBuffer(). This is useful if we need to
4082     * rewrite the end to chain multiple batch together at vkQueueSubmit().
4083     */
4084    void *                                       batch_end;
4085 
4086    /* Fields required for the actual chain of anv_batch_bo's.
4087     *
4088     * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain().
4089     */
4090    struct list_head                             batch_bos;
4091    enum anv_cmd_buffer_exec_mode                exec_mode;
4092 
4093    /* A vector of anv_batch_bo pointers for every batch or surface buffer
4094     * referenced by this command buffer
4095     *
4096     * initialized by anv_cmd_buffer_init_batch_bo_chain()
4097     */
4098    struct u_vector                            seen_bbos;
4099 
4100    /* A vector of int32_t's for every block of binding tables.
4101     *
4102     * initialized by anv_cmd_buffer_init_batch_bo_chain()
4103     */
4104    struct u_vector                              bt_block_states;
4105    struct anv_state                             bt_next;
4106 
4107    struct anv_reloc_list                        surface_relocs;
4108 
4109    /* Serial for tracking buffer completion */
4110    uint32_t                                     serial;
4111 
4112    /* Stream objects for storing temporary data */
4113    struct anv_state_stream                      surface_state_stream;
4114    struct anv_state_stream                      dynamic_state_stream;
4115    struct anv_state_stream                      general_state_stream;
4116    struct anv_state_stream                      indirect_push_descriptor_stream;
4117    struct anv_state_stream                      push_descriptor_buffer_stream;
4118 
4119    VkCommandBufferUsageFlags                    usage_flags;
4120 
4121    struct anv_query_pool                       *perf_query_pool;
4122 
4123    struct anv_cmd_state                         state;
4124 
4125    struct anv_address                           return_addr;
4126 
4127    /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
4128    uint64_t                                     intel_perf_marker;
4129 
4130    struct anv_measure_batch *measure;
4131 
4132    /**
4133     * KHR_performance_query requires self modifying command buffers and this
4134     * array has the location of modifying commands to the query begin and end
4135     * instructions storing performance counters. The array length is
4136     * anv_physical_device::n_perf_query_commands.
4137     */
4138    struct mi_address_token                  *self_mod_locations;
4139 
4140    /**
4141     * Index tracking which of the self_mod_locations items have already been
4142     * used.
4143     */
4144    uint32_t                                      perf_reloc_idx;
4145 
4146    /**
4147     * Sum of all the anv_batch_bo written sizes for this command buffer
4148     * including any executed secondary command buffer.
4149     */
4150    uint32_t                                     total_batch_size;
4151 
4152    struct {
4153       /** Batch generating part of the anv_cmd_buffer::batch */
4154       struct anv_batch                          batch;
4155 
4156       /**
4157        * Location in anv_cmd_buffer::batch at which we left some space to
4158        * insert a MI_BATCH_BUFFER_START into the
4159        * anv_cmd_buffer::generation::batch if needed.
4160        */
4161       struct anv_address                        jump_addr;
4162 
4163       /**
4164        * Location in anv_cmd_buffer::batch at which the generation batch
4165        * should jump back to.
4166        */
4167       struct anv_address                        return_addr;
4168 
4169       /** List of anv_batch_bo used for generation
4170        *
4171        * We have to keep this separated of the anv_cmd_buffer::batch_bos that
4172        * is used for a chaining optimization.
4173        */
4174       struct list_head                          batch_bos;
4175 
4176       /** Ring buffer of generated commands
4177        *
4178        * When generating draws in ring mode, this buffer will hold generated
4179        * 3DPRIMITIVE commands.
4180        */
4181       struct anv_bo                            *ring_bo;
4182 
4183       /**
4184        * State tracking of the generation shader (only used for the non-ring
4185        * mode).
4186        */
4187       struct anv_simple_shader                  shader_state;
4188    } generation;
4189 
4190    /**
4191     * A vector of anv_bo pointers for chunks of memory used by the command
4192     * buffer that are too large to be allocated through dynamic_state_stream.
4193     * This is the case for large enough acceleration structures.
4194     *
4195     * initialized by anv_cmd_buffer_init_batch_bo_chain()
4196     */
4197    struct u_vector                              dynamic_bos;
4198 
4199    /**
4200     * Structure holding tracepoints recorded in the command buffer.
4201     */
4202    struct u_trace                               trace;
4203 
4204    struct {
4205       struct anv_video_session *vid;
4206       struct anv_video_session_params *params;
4207    } video;
4208 
4209    /**
4210     * Companion RCS command buffer to support the MSAA operations on compute
4211     * queue.
4212     */
4213    struct anv_cmd_buffer                        *companion_rcs_cmd_buffer;
4214 
4215    /**
4216     * Whether this command buffer is a companion command buffer of compute one.
4217     */
4218    bool                                         is_companion_rcs_cmd_buffer;
4219 
4220 };
4221 
4222 extern const struct vk_command_buffer_ops anv_cmd_buffer_ops;
4223 
4224 /* Determine whether we can chain a given cmd_buffer to another one. We need
4225  * to make sure that we can edit the end of the batch to point to next one,
4226  * which requires the command buffer to not be used simultaneously.
4227  *
4228  * We could in theory also implement chaining with companion command buffers,
4229  * but let's sparse ourselves some pain and misery. This optimization has no
4230  * benefit on the brand new Xe kernel driver.
4231  */
4232 static inline bool
anv_cmd_buffer_is_chainable(struct anv_cmd_buffer * cmd_buffer)4233 anv_cmd_buffer_is_chainable(struct anv_cmd_buffer *cmd_buffer)
4234 {
4235    return !(cmd_buffer->usage_flags &
4236             VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT) &&
4237           !(cmd_buffer->is_companion_rcs_cmd_buffer);
4238 }
4239 
4240 static inline bool
anv_cmd_buffer_is_render_queue(const struct anv_cmd_buffer * cmd_buffer)4241 anv_cmd_buffer_is_render_queue(const struct anv_cmd_buffer *cmd_buffer)
4242 {
4243    struct anv_queue_family *queue_family = cmd_buffer->queue_family;
4244    return (queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0;
4245 }
4246 
4247 static inline bool
anv_cmd_buffer_is_video_queue(const struct anv_cmd_buffer * cmd_buffer)4248 anv_cmd_buffer_is_video_queue(const struct anv_cmd_buffer *cmd_buffer)
4249 {
4250    struct anv_queue_family *queue_family = cmd_buffer->queue_family;
4251    return ((queue_family->queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) |
4252            (queue_family->queueFlags & VK_QUEUE_VIDEO_ENCODE_BIT_KHR)) != 0;
4253 }
4254 
4255 static inline bool
anv_cmd_buffer_is_compute_queue(const struct anv_cmd_buffer * cmd_buffer)4256 anv_cmd_buffer_is_compute_queue(const struct anv_cmd_buffer *cmd_buffer)
4257 {
4258    struct anv_queue_family *queue_family = cmd_buffer->queue_family;
4259    return queue_family->engine_class == INTEL_ENGINE_CLASS_COMPUTE;
4260 }
4261 
4262 static inline bool
anv_cmd_buffer_is_blitter_queue(const struct anv_cmd_buffer * cmd_buffer)4263 anv_cmd_buffer_is_blitter_queue(const struct anv_cmd_buffer *cmd_buffer)
4264 {
4265    struct anv_queue_family *queue_family = cmd_buffer->queue_family;
4266    return queue_family->engine_class == INTEL_ENGINE_CLASS_COPY;
4267 }
4268 
4269 static inline bool
anv_cmd_buffer_is_render_or_compute_queue(const struct anv_cmd_buffer * cmd_buffer)4270 anv_cmd_buffer_is_render_or_compute_queue(const struct anv_cmd_buffer *cmd_buffer)
4271 {
4272    return anv_cmd_buffer_is_render_queue(cmd_buffer) ||
4273           anv_cmd_buffer_is_compute_queue(cmd_buffer);
4274 }
4275 
4276 static inline struct anv_address
anv_cmd_buffer_dynamic_state_address(struct anv_cmd_buffer * cmd_buffer,struct anv_state state)4277 anv_cmd_buffer_dynamic_state_address(struct anv_cmd_buffer *cmd_buffer,
4278                                      struct anv_state state)
4279 {
4280    return anv_state_pool_state_address(
4281       &cmd_buffer->device->dynamic_state_pool, state);
4282 }
4283 
4284 static inline uint64_t
anv_cmd_buffer_descriptor_buffer_address(struct anv_cmd_buffer * cmd_buffer,int32_t buffer_index)4285 anv_cmd_buffer_descriptor_buffer_address(struct anv_cmd_buffer *cmd_buffer,
4286                                          int32_t buffer_index)
4287 {
4288    if (buffer_index == -1)
4289       return cmd_buffer->device->physical->va.push_descriptor_buffer_pool.addr;
4290 
4291    return cmd_buffer->state.descriptor_buffers.address[buffer_index];
4292 }
4293 
4294 VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
4295 void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
4296 void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
4297 void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);
4298 void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
4299                                   struct anv_cmd_buffer *secondary);
4300 void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
4301 VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,
4302                                 struct anv_cmd_buffer *cmd_buffer,
4303                                 const VkSemaphore *in_semaphores,
4304                                 const uint64_t *in_wait_values,
4305                                 uint32_t num_in_semaphores,
4306                                 const VkSemaphore *out_semaphores,
4307                                 const uint64_t *out_signal_values,
4308                                 uint32_t num_out_semaphores,
4309                                 VkFence fence,
4310                                 int perf_query_pass);
4311 
4312 void anv_cmd_buffer_reset(struct vk_command_buffer *vk_cmd_buffer,
4313                           UNUSED VkCommandBufferResetFlags flags);
4314 
4315 struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
4316                                              const void *data, uint32_t size, uint32_t alignment);
4317 struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
4318                                               uint32_t *a, uint32_t *b,
4319                                               uint32_t dwords, uint32_t alignment);
4320 
4321 struct anv_address
4322 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer);
4323 struct anv_state
4324 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
4325                                    uint32_t entries, uint32_t *state_offset);
4326 struct anv_state
4327 anv_cmd_buffer_alloc_surface_states(struct anv_cmd_buffer *cmd_buffer,
4328                                     uint32_t count);
4329 struct anv_state
4330 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
4331                                    uint32_t size, uint32_t alignment);
4332 struct anv_state
4333 anv_cmd_buffer_alloc_general_state(struct anv_cmd_buffer *cmd_buffer,
4334                                    uint32_t size, uint32_t alignment);
4335 static inline struct anv_state
anv_cmd_buffer_alloc_temporary_state(struct anv_cmd_buffer * cmd_buffer,uint32_t size,uint32_t alignment)4336 anv_cmd_buffer_alloc_temporary_state(struct anv_cmd_buffer *cmd_buffer,
4337                                      uint32_t size, uint32_t alignment)
4338 {
4339    struct anv_state state =
4340       anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
4341                              size, alignment);
4342    if (state.map == NULL)
4343       anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4344    return state;
4345 }
4346 static inline struct anv_address
anv_cmd_buffer_temporary_state_address(struct anv_cmd_buffer * cmd_buffer,struct anv_state state)4347 anv_cmd_buffer_temporary_state_address(struct anv_cmd_buffer *cmd_buffer,
4348                                        struct anv_state state)
4349 {
4350    return anv_state_pool_state_address(
4351       &cmd_buffer->device->dynamic_state_pool, state);
4352 }
4353 
4354 void
4355 anv_cmd_buffer_chain_command_buffers(struct anv_cmd_buffer **cmd_buffers,
4356                                      uint32_t num_cmd_buffers);
4357 void
4358 anv_cmd_buffer_exec_batch_debug(struct anv_queue *queue,
4359                                 uint32_t cmd_buffer_count,
4360                                 struct anv_cmd_buffer **cmd_buffers,
4361                                 struct anv_query_pool *perf_query_pool,
4362                                 uint32_t perf_query_pass);
4363 void
4364 anv_cmd_buffer_clflush(struct anv_cmd_buffer **cmd_buffers,
4365                        uint32_t num_cmd_buffers);
4366 
4367 void
4368 anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer *cmd_buffer,
4369                                          enum anv_pipe_bits flushed_bits);
4370 
4371 /**
4372  * A allocation tied to a command buffer.
4373  *
4374  * Don't use anv_cmd_alloc::address::map to write memory from userspace, use
4375  * anv_cmd_alloc::map instead.
4376  */
4377 struct anv_cmd_alloc {
4378    struct anv_address  address;
4379    void               *map;
4380    size_t              size;
4381 };
4382 
4383 #define ANV_EMPTY_ALLOC ((struct anv_cmd_alloc) { .map = NULL, .size = 0 })
4384 
4385 static inline bool
anv_cmd_alloc_is_empty(struct anv_cmd_alloc alloc)4386 anv_cmd_alloc_is_empty(struct anv_cmd_alloc alloc)
4387 {
4388    return alloc.size == 0;
4389 }
4390 
4391 struct anv_cmd_alloc
4392 anv_cmd_buffer_alloc_space(struct anv_cmd_buffer *cmd_buffer,
4393                            size_t size, uint32_t alignment,
4394                            bool private);
4395 
4396 VkResult
4397 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
4398 
4399 void anv_cmd_buffer_emit_bt_pool_base_address(struct anv_cmd_buffer *cmd_buffer);
4400 
4401 struct anv_state
4402 anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);
4403 struct anv_state
4404 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
4405 
4406 VkResult
4407 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
4408                                          uint32_t num_entries,
4409                                          uint32_t *state_offset,
4410                                          struct anv_state *bt_state);
4411 
4412 void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
4413 
4414 static inline unsigned
anv_cmd_buffer_get_view_count(struct anv_cmd_buffer * cmd_buffer)4415 anv_cmd_buffer_get_view_count(struct anv_cmd_buffer *cmd_buffer)
4416 {
4417    struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
4418    return MAX2(1, util_bitcount(gfx->view_mask));
4419 }
4420 
4421 /* Save/restore cmd buffer states for meta operations */
4422 enum anv_cmd_saved_state_flags {
4423    ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE         = BITFIELD_BIT(0),
4424    ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0         = BITFIELD_BIT(1),
4425    ANV_CMD_SAVED_STATE_PUSH_CONSTANTS           = BITFIELD_BIT(2),
4426 };
4427 
4428 struct anv_cmd_saved_state {
4429    uint32_t flags;
4430 
4431    struct anv_pipeline *pipeline;
4432    struct anv_descriptor_set *descriptor_set;
4433    uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
4434 };
4435 
4436 void anv_cmd_buffer_save_state(struct anv_cmd_buffer *cmd_buffer,
4437                                uint32_t flags,
4438                                struct anv_cmd_saved_state *state);
4439 
4440 void anv_cmd_buffer_restore_state(struct anv_cmd_buffer *cmd_buffer,
4441                                   struct anv_cmd_saved_state *state);
4442 
4443 enum anv_bo_sync_state {
4444    /** Indicates that this is a new (or newly reset fence) */
4445    ANV_BO_SYNC_STATE_RESET,
4446 
4447    /** Indicates that this fence has been submitted to the GPU but is still
4448     * (as far as we know) in use by the GPU.
4449     */
4450    ANV_BO_SYNC_STATE_SUBMITTED,
4451 
4452    ANV_BO_SYNC_STATE_SIGNALED,
4453 };
4454 
4455 struct anv_bo_sync {
4456    struct vk_sync sync;
4457 
4458    enum anv_bo_sync_state state;
4459    struct anv_bo *bo;
4460 };
4461 
4462 extern const struct vk_sync_type anv_bo_sync_type;
4463 
4464 static inline bool
vk_sync_is_anv_bo_sync(const struct vk_sync * sync)4465 vk_sync_is_anv_bo_sync(const struct vk_sync *sync)
4466 {
4467    return sync->type == &anv_bo_sync_type;
4468 }
4469 
4470 VkResult anv_create_sync_for_memory(struct vk_device *device,
4471                                     VkDeviceMemory memory,
4472                                     bool signal_memory,
4473                                     struct vk_sync **sync_out);
4474 
4475 struct anv_event {
4476    struct vk_object_base                        base;
4477    uint64_t                                     semaphore;
4478    struct anv_state                             state;
4479 };
4480 
4481 #define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
4482 
4483 #define anv_foreach_stage(stage, stage_bits)                         \
4484    for (gl_shader_stage stage,                                       \
4485         __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK);    \
4486         stage = __builtin_ffs(__tmp) - 1, __tmp;                     \
4487         __tmp &= ~(1 << (stage)))
4488 
4489 struct anv_pipeline_bind_map {
4490    unsigned char                                surface_sha1[20];
4491    unsigned char                                sampler_sha1[20];
4492    unsigned char                                push_sha1[20];
4493 
4494    uint32_t surface_count;
4495    uint32_t sampler_count;
4496    uint32_t embedded_sampler_count;
4497    uint16_t kernel_args_size;
4498    uint16_t kernel_arg_count;
4499 
4500    struct anv_pipeline_binding *                surface_to_descriptor;
4501    struct anv_pipeline_binding *                sampler_to_descriptor;
4502    struct anv_pipeline_embedded_sampler_binding* embedded_sampler_to_binding;
4503    struct brw_kernel_arg_desc *                 kernel_args;
4504 
4505    struct anv_push_range                        push_ranges[4];
4506 };
4507 
4508 struct anv_push_descriptor_info {
4509    /* A bitfield of descriptors used. */
4510    uint32_t used_descriptors;
4511 
4512    /* A bitfield of UBOs bindings fully promoted to push constants. */
4513    uint32_t fully_promoted_ubo_descriptors;
4514 
4515    /* */
4516    uint8_t used_set_buffer;
4517 };
4518 
4519 /* A list of values we push to implement some of the dynamic states */
4520 enum anv_dynamic_push_bits {
4521    ANV_DYNAMIC_PUSH_INPUT_VERTICES = BITFIELD_BIT(0),
4522 };
4523 
4524 struct anv_shader_upload_params {
4525    gl_shader_stage stage;
4526 
4527    const void *key_data;
4528    uint32_t key_size;
4529 
4530    const void *kernel_data;
4531    uint32_t kernel_size;
4532 
4533    const struct brw_stage_prog_data *prog_data;
4534    uint32_t prog_data_size;
4535 
4536    const struct brw_compile_stats *stats;
4537    uint32_t num_stats;
4538 
4539    const struct nir_xfb_info *xfb_info;
4540 
4541    const struct anv_pipeline_bind_map *bind_map;
4542 
4543    const struct anv_push_descriptor_info *push_desc_info;
4544 
4545    enum anv_dynamic_push_bits dynamic_push_values;
4546 };
4547 
4548 struct anv_embedded_sampler {
4549    uint32_t ref_cnt;
4550 
4551    struct anv_embedded_sampler_key key;
4552 
4553    struct anv_state sampler_state;
4554    struct anv_state border_color_state;
4555 };
4556 
4557 struct anv_shader_bin {
4558    struct vk_pipeline_cache_object base;
4559 
4560    gl_shader_stage stage;
4561 
4562    struct anv_state kernel;
4563    uint32_t kernel_size;
4564 
4565    const struct brw_stage_prog_data *prog_data;
4566    uint32_t prog_data_size;
4567 
4568    struct brw_compile_stats stats[3];
4569    uint32_t num_stats;
4570 
4571    struct nir_xfb_info *xfb_info;
4572 
4573    struct anv_push_descriptor_info push_desc_info;
4574 
4575    struct anv_pipeline_bind_map bind_map;
4576 
4577    enum anv_dynamic_push_bits dynamic_push_values;
4578 
4579    /* Not saved in the pipeline cache.
4580     *
4581     * Array of pointers of length bind_map.embedded_sampler_count
4582     */
4583    struct anv_embedded_sampler **embedded_samplers;
4584 };
4585 
4586 static inline struct anv_shader_bin *
anv_shader_bin_ref(struct anv_shader_bin * shader)4587 anv_shader_bin_ref(struct anv_shader_bin *shader)
4588 {
4589    vk_pipeline_cache_object_ref(&shader->base);
4590 
4591    return shader;
4592 }
4593 
4594 static inline void
anv_shader_bin_unref(struct anv_device * device,struct anv_shader_bin * shader)4595 anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
4596 {
4597    vk_pipeline_cache_object_unref(&device->vk, &shader->base);
4598 }
4599 
4600 struct anv_pipeline_executable {
4601    gl_shader_stage stage;
4602 
4603    struct brw_compile_stats stats;
4604 
4605    char *nir;
4606    char *disasm;
4607 };
4608 
4609 enum anv_pipeline_type {
4610    ANV_PIPELINE_GRAPHICS,
4611    ANV_PIPELINE_GRAPHICS_LIB,
4612    ANV_PIPELINE_COMPUTE,
4613    ANV_PIPELINE_RAY_TRACING,
4614 };
4615 
4616 struct anv_pipeline {
4617    struct vk_object_base                        base;
4618 
4619    struct anv_device *                          device;
4620 
4621    struct anv_batch                             batch;
4622    struct anv_reloc_list                        batch_relocs;
4623 
4624    void *                                       mem_ctx;
4625 
4626    enum anv_pipeline_type                       type;
4627    VkPipelineCreateFlags2KHR                    flags;
4628 
4629    VkShaderStageFlags                           active_stages;
4630 
4631    uint32_t                                     ray_queries;
4632 
4633    /**
4634     * Mask of stages that are accessing push descriptors.
4635     */
4636    VkShaderStageFlags                           use_push_descriptor;
4637 
4638    /**
4639     * Mask of stages that are accessing the push descriptors buffer.
4640     */
4641    VkShaderStageFlags                           use_push_descriptor_buffer;
4642 
4643    /**
4644     * Maximum scratch size for all shaders in this pipeline.
4645     */
4646    uint32_t                                     scratch_size;
4647 
4648    /* Layout of the sets used by the pipeline. */
4649    struct anv_pipeline_sets_layout              layout;
4650 
4651    struct util_dynarray                         executables;
4652 
4653    const struct intel_l3_config *               l3_config;
4654 };
4655 
4656 /* The base graphics pipeline object only hold shaders. */
4657 struct anv_graphics_base_pipeline {
4658    struct anv_pipeline                          base;
4659 
4660    struct vk_sample_locations_state             sample_locations;
4661 
4662    /* Shaders */
4663    struct anv_shader_bin *                      shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4664 
4665    /* A small hash based of shader_info::source_sha1 for identifying
4666     * shaders in renderdoc/shader-db.
4667     */
4668    uint32_t                                     source_hashes[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4669 
4670    /* Feedback index in
4671     * VkPipelineCreationFeedbackCreateInfo::pPipelineStageCreationFeedbacks
4672     *
4673     * For pipeline libraries, we need to remember the order at creation when
4674     * included into a linked pipeline.
4675     */
4676    uint32_t                                     feedback_index[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4677 
4678    /* Robustness flags used shaders
4679     */
4680    enum brw_robustness_flags                    robust_flags[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4681 
4682    /* True if at the time the fragment shader was compiled, it didn't have all
4683     * the information to avoid INTEL_MSAA_FLAG_ENABLE_DYNAMIC.
4684     */
4685    bool                                         fragment_dynamic;
4686 };
4687 
4688 /* The library graphics pipeline object has a partial graphic state and
4689  * possibly some shaders. If requested, shaders are also present in NIR early
4690  * form.
4691  */
4692 struct anv_graphics_lib_pipeline {
4693    struct anv_graphics_base_pipeline            base;
4694 
4695    VkGraphicsPipelineLibraryFlagsEXT            lib_flags;
4696 
4697    struct vk_graphics_pipeline_all_state        all_state;
4698    struct vk_graphics_pipeline_state            state;
4699 
4700    /* Retained shaders for link optimization. */
4701    struct {
4702       /* This hash is the same as computed in
4703        * anv_graphics_pipeline_gather_shaders().
4704        */
4705       unsigned char                             shader_sha1[20];
4706 
4707       enum gl_subgroup_size                     subgroup_size_type;
4708 
4709       /* Hold on the value of VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT
4710        * from library that introduces the stage, so it remains consistent.
4711        */
4712       bool                                      view_index_from_device_index;
4713 
4714       /* NIR captured in anv_pipeline_stage_get_nir(), includes specialization
4715        * constants.
4716        */
4717       nir_shader *                              nir;
4718    }                                            retained_shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4719 
4720    /* Whether the shaders have been retained */
4721    bool                                         retain_shaders;
4722 };
4723 
4724 struct anv_gfx_state_ptr {
4725    /* Both in dwords */
4726    uint16_t  offset;
4727    uint16_t  len;
4728 };
4729 
4730 /* The final graphics pipeline object has all the graphics state ready to be
4731  * programmed into HW packets (dynamic_state field) or fully baked in its
4732  * batch.
4733  */
4734 struct anv_graphics_pipeline {
4735    struct anv_graphics_base_pipeline            base;
4736 
4737    struct vk_vertex_input_state                 vertex_input;
4738    struct vk_sample_locations_state             sample_locations;
4739    struct vk_dynamic_graphics_state             dynamic_state;
4740 
4741    /* If true, the patch control points are passed through push constants
4742     * (anv_push_constants::gfx::tcs_input_vertices)
4743     */
4744    bool                                         dynamic_patch_control_points;
4745 
4746    uint32_t                                     view_mask;
4747    uint32_t                                     instance_multiplier;
4748 
4749    bool                                         rp_has_ds_self_dep;
4750 
4751    bool                                         kill_pixel;
4752    bool                                         uses_xfb;
4753    bool                                         sample_shading_enable;
4754    float                                        min_sample_shading;
4755 
4756    /* Number of VERTEX_ELEMENT_STATE input elements used by the shader */
4757    uint32_t                                     vs_input_elements;
4758 
4759    /* Number of VERTEX_ELEMENT_STATE elements we need to implement some of the
4760     * draw parameters
4761     */
4762    uint32_t                                     svgs_count;
4763 
4764    /* Pre computed VERTEX_ELEMENT_STATE structures for the vertex input that
4765     * can be copied into the anv_cmd_buffer behind a 3DSTATE_VERTEX_BUFFER.
4766     *
4767     * When MESA_VK_DYNAMIC_VI is not dynamic
4768     *
4769     *     vertex_input_elems = vs_input_elements + svgs_count
4770     *
4771     * All the VERTEX_ELEMENT_STATE can be directly copied behind a
4772     * 3DSTATE_VERTEX_ELEMENTS instruction in the command buffer. Otherwise
4773     * this array only holds the svgs_count elements.
4774     */
4775    uint32_t                                     vertex_input_elems;
4776    uint32_t                                     vertex_input_data[2 * 31 /* MAX_VES + 2 internal */];
4777 
4778    /* Pre computed CS instructions that can directly be copied into
4779     * anv_cmd_buffer.
4780     */
4781    uint32_t                                     batch_data[480];
4782 
4783    /* Urb setup utilized by this pipeline. */
4784    struct intel_urb_config urb_cfg;
4785 
4786    /* Fully backed instructions, ready to be emitted in the anv_cmd_buffer */
4787    struct {
4788       struct anv_gfx_state_ptr                  urb;
4789       struct anv_gfx_state_ptr                  vf_statistics;
4790       struct anv_gfx_state_ptr                  vf_sgvs;
4791       struct anv_gfx_state_ptr                  vf_sgvs_2;
4792       struct anv_gfx_state_ptr                  vf_sgvs_instancing;
4793       struct anv_gfx_state_ptr                  vf_instancing;
4794       struct anv_gfx_state_ptr                  primitive_replication;
4795       struct anv_gfx_state_ptr                  sbe;
4796       struct anv_gfx_state_ptr                  sbe_swiz;
4797       struct anv_gfx_state_ptr                  so_decl_list;
4798       struct anv_gfx_state_ptr                  vs;
4799       struct anv_gfx_state_ptr                  hs;
4800       struct anv_gfx_state_ptr                  ds;
4801       struct anv_gfx_state_ptr                  vs_protected;
4802       struct anv_gfx_state_ptr                  hs_protected;
4803       struct anv_gfx_state_ptr                  ds_protected;
4804 
4805       struct anv_gfx_state_ptr                  task_control;
4806       struct anv_gfx_state_ptr                  task_control_protected;
4807       struct anv_gfx_state_ptr                  task_shader;
4808       struct anv_gfx_state_ptr                  task_redistrib;
4809       struct anv_gfx_state_ptr                  clip_mesh;
4810       struct anv_gfx_state_ptr                  mesh_control;
4811       struct anv_gfx_state_ptr                  mesh_control_protected;
4812       struct anv_gfx_state_ptr                  mesh_shader;
4813       struct anv_gfx_state_ptr                  mesh_distrib;
4814       struct anv_gfx_state_ptr                  sbe_mesh;
4815    } final;
4816 
4817    /* Pre packed CS instructions & structures that need to be merged later
4818     * with dynamic state.
4819     */
4820    struct {
4821       struct anv_gfx_state_ptr                  clip;
4822       struct anv_gfx_state_ptr                  sf;
4823       struct anv_gfx_state_ptr                  raster;
4824       struct anv_gfx_state_ptr                  ms;
4825       struct anv_gfx_state_ptr                  ps_extra;
4826       struct anv_gfx_state_ptr                  wm;
4827       struct anv_gfx_state_ptr                  so;
4828       struct anv_gfx_state_ptr                  gs;
4829       struct anv_gfx_state_ptr                  gs_protected;
4830       struct anv_gfx_state_ptr                  te;
4831       struct anv_gfx_state_ptr                  ps;
4832       struct anv_gfx_state_ptr                  ps_protected;
4833       struct anv_gfx_state_ptr                  vfg;
4834    } partial;
4835 };
4836 
4837 #define anv_batch_emit_pipeline_state(batch, pipeline, state)           \
4838    do {                                                                 \
4839       if ((pipeline)->state.len == 0)                                   \
4840          break;                                                         \
4841       uint32_t *dw;                                                     \
4842       dw = anv_batch_emit_dwords((batch), (pipeline)->state.len);       \
4843       if (!dw)                                                          \
4844          break;                                                         \
4845       memcpy(dw, &(pipeline)->batch_data[(pipeline)->state.offset],     \
4846              4 * (pipeline)->state.len);                                \
4847    } while (0)
4848 
4849 #define anv_batch_emit_pipeline_state_protected(batch, pipeline,        \
4850                                                 state, protected)       \
4851    do {                                                                 \
4852       struct anv_gfx_state_ptr *_cmd_state = protected ?                \
4853          &(pipeline)->state##_protected : &(pipeline)->state;           \
4854       if (_cmd_state->len == 0)                                         \
4855          break;                                                         \
4856       uint32_t *dw;                                                     \
4857       dw = anv_batch_emit_dwords((batch), _cmd_state->len);             \
4858       if (!dw)                                                          \
4859          break;                                                         \
4860       memcpy(dw, &(pipeline)->batch_data[_cmd_state->offset],           \
4861              4 * _cmd_state->len);                                      \
4862    } while (0)
4863 
4864 
4865 struct anv_compute_pipeline {
4866    struct anv_pipeline                          base;
4867 
4868    struct anv_shader_bin *                      cs;
4869    uint32_t                                     batch_data[9];
4870    uint32_t                                     interface_descriptor_data[8];
4871 
4872    /* A small hash based of shader_info::source_sha1 for identifying shaders
4873     * in renderdoc/shader-db.
4874     */
4875    uint32_t                                     source_hash;
4876 };
4877 
4878 struct anv_rt_shader_group {
4879    VkRayTracingShaderGroupTypeKHR type;
4880 
4881    /* Whether this group was imported from another pipeline */
4882    bool imported;
4883 
4884    struct anv_shader_bin *general;
4885    struct anv_shader_bin *closest_hit;
4886    struct anv_shader_bin *any_hit;
4887    struct anv_shader_bin *intersection;
4888 
4889    /* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */
4890    uint32_t handle[8];
4891 };
4892 
4893 struct anv_ray_tracing_pipeline {
4894    struct anv_pipeline                          base;
4895 
4896    /* All shaders in the pipeline */
4897    struct util_dynarray                         shaders;
4898 
4899    uint32_t                                     group_count;
4900    struct anv_rt_shader_group *                 groups;
4901 
4902    /* If non-zero, this is the default computed stack size as per the stack
4903     * size computation in the Vulkan spec.  If zero, that indicates that the
4904     * client has requested a dynamic stack size.
4905     */
4906    uint32_t                                     stack_size;
4907 };
4908 
4909 #define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum)             \
4910    static inline struct anv_##pipe_type##_pipeline *                 \
4911    anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline)      \
4912    {                                                                 \
4913       assert(pipeline->type == pipe_enum);                           \
4914       return (struct anv_##pipe_type##_pipeline *) pipeline;         \
4915    }
4916 
ANV_DECL_PIPELINE_DOWNCAST(graphics,ANV_PIPELINE_GRAPHICS)4917 ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
4918 ANV_DECL_PIPELINE_DOWNCAST(graphics_lib, ANV_PIPELINE_GRAPHICS_LIB)
4919 ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
4920 ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)
4921 
4922 /* Can't use the macro because we need to handle both types. */
4923 static inline struct anv_graphics_base_pipeline *
4924 anv_pipeline_to_graphics_base(struct anv_pipeline *pipeline)
4925 {
4926    assert(pipeline->type == ANV_PIPELINE_GRAPHICS ||
4927           pipeline->type == ANV_PIPELINE_GRAPHICS_LIB);
4928    return (struct anv_graphics_base_pipeline *) pipeline;
4929 }
4930 
4931 static inline bool
anv_pipeline_has_stage(const struct anv_graphics_pipeline * pipeline,gl_shader_stage stage)4932 anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
4933                        gl_shader_stage stage)
4934 {
4935    return (pipeline->base.base.active_stages & mesa_to_vk_shader_stage(stage)) != 0;
4936 }
4937 
4938 static inline bool
anv_pipeline_base_has_stage(const struct anv_graphics_base_pipeline * pipeline,gl_shader_stage stage)4939 anv_pipeline_base_has_stage(const struct anv_graphics_base_pipeline *pipeline,
4940                             gl_shader_stage stage)
4941 {
4942    return (pipeline->base.active_stages & mesa_to_vk_shader_stage(stage)) != 0;
4943 }
4944 
4945 static inline bool
anv_pipeline_is_primitive(const struct anv_graphics_pipeline * pipeline)4946 anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
4947 {
4948    return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX);
4949 }
4950 
4951 static inline bool
anv_pipeline_is_mesh(const struct anv_graphics_pipeline * pipeline)4952 anv_pipeline_is_mesh(const struct anv_graphics_pipeline *pipeline)
4953 {
4954    return anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH);
4955 }
4956 
4957 static inline bool
anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer * cmd_buffer)4958 anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer *cmd_buffer)
4959 {
4960    const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx;
4961    const struct vk_dynamic_graphics_state *dyn =
4962       &cmd_buffer->vk.dynamic_graphics_state;
4963    uint8_t color_writes = dyn->cb.color_write_enables;
4964 
4965    /* All writes disabled through vkCmdSetColorWriteEnableEXT */
4966    if ((color_writes & ((1u << state->color_att_count) - 1)) == 0)
4967       return true;
4968 
4969    /* Or all write masks are empty */
4970    for (uint32_t i = 0; i < state->color_att_count; i++) {
4971       if (dyn->cb.attachments[i].write_mask != 0)
4972          return false;
4973    }
4974 
4975    return true;
4976 }
4977 
4978 static inline void
anv_cmd_graphic_state_update_has_uint_rt(struct anv_cmd_graphics_state * state)4979 anv_cmd_graphic_state_update_has_uint_rt(struct anv_cmd_graphics_state *state)
4980 {
4981    state->has_uint_rt = false;
4982    for (unsigned a = 0; a < state->color_att_count; a++) {
4983       if (vk_format_is_int(state->color_att[a].vk_format)) {
4984          state->has_uint_rt = true;
4985          break;
4986       }
4987    }
4988 }
4989 
4990 #define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage)             \
4991 static inline const struct brw_##prefix##_prog_data *                   \
4992 get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline)  \
4993 {                                                                       \
4994    if (anv_pipeline_has_stage(pipeline, stage)) {                       \
4995       return (const struct brw_##prefix##_prog_data *)                  \
4996          pipeline->base.shaders[stage]->prog_data;                      \
4997    } else {                                                             \
4998       return NULL;                                                      \
4999    }                                                                    \
5000 }
5001 
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs,MESA_SHADER_VERTEX)5002 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
5003 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
5004 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
5005 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
5006 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
5007 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(mesh, MESA_SHADER_MESH)
5008 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(task, MESA_SHADER_TASK)
5009 
5010 static inline const struct brw_cs_prog_data *
5011 get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
5012 {
5013    assert(pipeline->cs);
5014    return (const struct brw_cs_prog_data *) pipeline->cs->prog_data;
5015 }
5016 
5017 static inline const struct brw_vue_prog_data *
anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline * pipeline)5018 anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)
5019 {
5020    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
5021       return &get_gs_prog_data(pipeline)->base;
5022    else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
5023       return &get_tes_prog_data(pipeline)->base;
5024    else
5025       return &get_vs_prog_data(pipeline)->base;
5026 }
5027 
5028 VkResult
5029 anv_device_init_rt_shaders(struct anv_device *device);
5030 
5031 void
5032 anv_device_finish_rt_shaders(struct anv_device *device);
5033 
5034 struct anv_kernel_arg {
5035    bool is_ptr;
5036    uint16_t size;
5037 
5038    union {
5039       uint64_t u64;
5040       void *ptr;
5041    };
5042 };
5043 
5044 struct anv_kernel {
5045 #ifndef NDEBUG
5046    const char *name;
5047 #endif
5048    struct anv_shader_bin *bin;
5049    const struct intel_l3_config *l3_config;
5050 };
5051 
5052 struct anv_format_plane {
5053    enum isl_format isl_format:16;
5054    struct isl_swizzle swizzle;
5055 
5056    /* What aspect is associated to this plane */
5057    VkImageAspectFlags aspect;
5058 };
5059 
5060 struct anv_format {
5061    struct anv_format_plane planes[3];
5062    VkFormat vk_format;
5063    uint8_t n_planes;
5064    bool can_ycbcr;
5065    bool can_video;
5066 };
5067 
5068 static inline void
anv_assert_valid_aspect_set(VkImageAspectFlags aspects)5069 anv_assert_valid_aspect_set(VkImageAspectFlags aspects)
5070 {
5071    if (util_bitcount(aspects) == 1) {
5072       assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT |
5073                         VK_IMAGE_ASPECT_DEPTH_BIT |
5074                         VK_IMAGE_ASPECT_STENCIL_BIT |
5075                         VK_IMAGE_ASPECT_PLANE_0_BIT |
5076                         VK_IMAGE_ASPECT_PLANE_1_BIT |
5077                         VK_IMAGE_ASPECT_PLANE_2_BIT));
5078    } else if (aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) {
5079       assert(aspects == VK_IMAGE_ASPECT_PLANE_0_BIT ||
5080              aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
5081                          VK_IMAGE_ASPECT_PLANE_1_BIT) ||
5082              aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
5083                          VK_IMAGE_ASPECT_PLANE_1_BIT |
5084                          VK_IMAGE_ASPECT_PLANE_2_BIT));
5085    } else {
5086       assert(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
5087                          VK_IMAGE_ASPECT_STENCIL_BIT));
5088    }
5089 }
5090 
5091 /**
5092  * Return the aspect's plane relative to all_aspects.  For an image, for
5093  * instance, all_aspects would be the set of aspects in the image.  For
5094  * an image view, all_aspects would be the subset of aspects represented
5095  * by that particular view.
5096  */
5097 static inline uint32_t
anv_aspect_to_plane(VkImageAspectFlags all_aspects,VkImageAspectFlagBits aspect)5098 anv_aspect_to_plane(VkImageAspectFlags all_aspects,
5099                     VkImageAspectFlagBits aspect)
5100 {
5101    anv_assert_valid_aspect_set(all_aspects);
5102    assert(util_bitcount(aspect) == 1);
5103    assert(!(aspect & ~all_aspects));
5104 
5105    /* Because we always put image and view planes in aspect-bit-order, the
5106     * plane index is the number of bits in all_aspects before aspect.
5107     */
5108    return util_bitcount(all_aspects & (aspect - 1));
5109 }
5110 
5111 #define anv_foreach_image_aspect_bit(b, image, aspects) \
5112    u_foreach_bit(b, vk_image_expand_aspect_mask(&(image)->vk, aspects))
5113 
5114 const struct anv_format *
5115 anv_get_format(VkFormat format);
5116 
5117 static inline uint32_t
anv_get_format_planes(VkFormat vk_format)5118 anv_get_format_planes(VkFormat vk_format)
5119 {
5120    const struct anv_format *format = anv_get_format(vk_format);
5121 
5122    return format != NULL ? format->n_planes : 0;
5123 }
5124 
5125 struct anv_format_plane
5126 anv_get_format_plane(const struct intel_device_info *devinfo,
5127                      VkFormat vk_format, uint32_t plane,
5128                      VkImageTiling tiling);
5129 
5130 struct anv_format_plane
5131 anv_get_format_aspect(const struct intel_device_info *devinfo,
5132                       VkFormat vk_format,
5133                       VkImageAspectFlagBits aspect, VkImageTiling tiling);
5134 
5135 static inline enum isl_format
anv_get_isl_format(const struct intel_device_info * devinfo,VkFormat vk_format,VkImageAspectFlags aspect,VkImageTiling tiling)5136 anv_get_isl_format(const struct intel_device_info *devinfo, VkFormat vk_format,
5137                    VkImageAspectFlags aspect, VkImageTiling tiling)
5138 {
5139    return anv_get_format_aspect(devinfo, vk_format, aspect, tiling).isl_format;
5140 }
5141 
5142 bool anv_format_supports_ccs_e(const struct intel_device_info *devinfo,
5143                                const enum isl_format format);
5144 
5145 bool anv_formats_ccs_e_compatible(const struct intel_device_info *devinfo,
5146                                   VkImageCreateFlags create_flags,
5147                                   VkFormat vk_format, VkImageTiling vk_tiling,
5148                                   VkImageUsageFlags vk_usage,
5149                                   const VkImageFormatListCreateInfo *fmt_list);
5150 
5151 extern VkFormat
5152 vk_format_from_android(unsigned android_format, unsigned android_usage);
5153 
5154 static inline VkFormat
anv_get_emulation_format(const struct anv_physical_device * pdevice,VkFormat format)5155 anv_get_emulation_format(const struct anv_physical_device *pdevice, VkFormat format)
5156 {
5157    if (pdevice->flush_astc_ldr_void_extent_denorms) {
5158       const struct util_format_description *desc =
5159          vk_format_description(format);
5160       if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC &&
5161           desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB)
5162          return format;
5163    }
5164 
5165    if (pdevice->emu_astc_ldr)
5166       return vk_texcompress_astc_emulation_format(format);
5167 
5168    return VK_FORMAT_UNDEFINED;
5169 }
5170 
5171 static inline bool
anv_is_format_emulated(const struct anv_physical_device * pdevice,VkFormat format)5172 anv_is_format_emulated(const struct anv_physical_device *pdevice, VkFormat format)
5173 {
5174    return anv_get_emulation_format(pdevice, format) != VK_FORMAT_UNDEFINED;
5175 }
5176 
5177 static inline struct isl_swizzle
anv_swizzle_for_render(struct isl_swizzle swizzle)5178 anv_swizzle_for_render(struct isl_swizzle swizzle)
5179 {
5180    /* Sometimes the swizzle will have alpha map to one.  We do this to fake
5181     * RGB as RGBA for texturing
5182     */
5183    assert(swizzle.a == ISL_CHANNEL_SELECT_ONE ||
5184           swizzle.a == ISL_CHANNEL_SELECT_ALPHA);
5185 
5186    /* But it doesn't matter what we render to that channel */
5187    swizzle.a = ISL_CHANNEL_SELECT_ALPHA;
5188 
5189    return swizzle;
5190 }
5191 
5192 void
5193 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm);
5194 
5195 /**
5196  * Describes how each part of anv_image will be bound to memory.
5197  */
5198 struct anv_image_memory_range {
5199    /**
5200     * Disjoint bindings into which each portion of the image will be bound.
5201     *
5202     * Binding images to memory can be complicated and invold binding different
5203     * portions of the image to different memory objects or regions.  For most
5204     * images, everything lives in the MAIN binding and gets bound by
5205     * vkBindImageMemory.  For disjoint multi-planar images, each plane has
5206     * a unique, disjoint binding and gets bound by vkBindImageMemory2 with
5207     * VkBindImagePlaneMemoryInfo.  There may also exist bits of memory which are
5208     * implicit or driver-managed and live in special-case bindings.
5209     */
5210    enum anv_image_memory_binding {
5211       /**
5212        * Used if and only if image is not multi-planar disjoint. Bound by
5213        * vkBindImageMemory2 without VkBindImagePlaneMemoryInfo.
5214        */
5215       ANV_IMAGE_MEMORY_BINDING_MAIN,
5216 
5217       /**
5218        * Used if and only if image is multi-planar disjoint.  Bound by
5219        * vkBindImageMemory2 with VkBindImagePlaneMemoryInfo.
5220        */
5221       ANV_IMAGE_MEMORY_BINDING_PLANE_0,
5222       ANV_IMAGE_MEMORY_BINDING_PLANE_1,
5223       ANV_IMAGE_MEMORY_BINDING_PLANE_2,
5224 
5225       /**
5226        * Driver-private bo. In special cases we may store the aux surface and/or
5227        * aux state in this binding.
5228        */
5229       ANV_IMAGE_MEMORY_BINDING_PRIVATE,
5230 
5231       /** Sentinel */
5232       ANV_IMAGE_MEMORY_BINDING_END,
5233    } binding;
5234 
5235    uint32_t alignment;
5236    uint64_t size;
5237 
5238    /**
5239     * Offset is relative to the start of the binding created by
5240     * vkBindImageMemory, not to the start of the bo.
5241     */
5242    uint64_t offset;
5243 };
5244 
5245 /**
5246  * Subsurface of an anv_image.
5247  */
5248 struct anv_surface {
5249    struct isl_surf isl;
5250    struct anv_image_memory_range memory_range;
5251 };
5252 
5253 static inline bool MUST_CHECK
anv_surface_is_valid(const struct anv_surface * surface)5254 anv_surface_is_valid(const struct anv_surface *surface)
5255 {
5256    return surface->isl.size_B > 0 && surface->memory_range.size > 0;
5257 }
5258 
5259 struct anv_image {
5260    struct vk_image vk;
5261 
5262    uint32_t n_planes;
5263 
5264    /**
5265     * Image has multi-planar format and was created with
5266     * VK_IMAGE_CREATE_DISJOINT_BIT.
5267     */
5268    bool disjoint;
5269 
5270    /**
5271     * Image is a WSI image
5272     */
5273    bool from_wsi;
5274 
5275    /**
5276     * Image was imported from an struct AHardwareBuffer.  We have to delay
5277     * final image creation until bind time.
5278     */
5279    bool from_ahb;
5280 
5281    /**
5282     * Image was imported from gralloc with VkNativeBufferANDROID. The gralloc bo
5283     * must be released when the image is destroyed.
5284     */
5285    bool from_gralloc;
5286 
5287    /**
5288     * If not UNDEFINED, image has a hidden plane at planes[n_planes] for ASTC
5289     * LDR workaround or emulation.
5290     */
5291    VkFormat emu_plane_format;
5292 
5293    /**
5294     * The memory bindings created by vkCreateImage and vkBindImageMemory.
5295     *
5296     * For details on the image's memory layout, see check_memory_bindings().
5297     *
5298     * vkCreateImage constructs the `memory_range` for each
5299     * anv_image_memory_binding.  After vkCreateImage, each binding is valid if
5300     * and only if `memory_range::size > 0`.
5301     *
5302     * vkBindImageMemory binds each valid `memory_range` to an `address`.
5303     * Usually, the app will provide the address via the parameters of
5304     * vkBindImageMemory.  However, special-case bindings may be bound to
5305     * driver-private memory.
5306     */
5307    struct anv_image_binding {
5308       struct anv_image_memory_range memory_range;
5309       struct anv_address address;
5310       struct anv_sparse_binding_data sparse_data;
5311    } bindings[ANV_IMAGE_MEMORY_BINDING_END];
5312 
5313    /**
5314     * Image subsurfaces
5315     *
5316     * For each foo, anv_image::planes[x].surface is valid if and only if
5317     * anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane()
5318     * to figure the number associated with a given aspect.
5319     *
5320     * The hardware requires that the depth buffer and stencil buffer be
5321     * separate surfaces.  From Vulkan's perspective, though, depth and stencil
5322     * reside in the same VkImage.  To satisfy both the hardware and Vulkan, we
5323     * allocate the depth and stencil buffers as separate surfaces in the same
5324     * bo.
5325     */
5326    struct anv_image_plane {
5327       struct anv_surface primary_surface;
5328 
5329       /**
5330        * The base aux usage for this image.  For color images, this can be
5331        * either CCS_E or CCS_D depending on whether or not we can reliably
5332        * leave CCS on all the time.
5333        */
5334       enum isl_aux_usage aux_usage;
5335 
5336       struct anv_surface aux_surface;
5337 
5338       /** Location of the compression control surface.  */
5339       struct anv_image_memory_range compr_ctrl_memory_range;
5340 
5341       /** Location of the fast clear state.  */
5342       struct anv_image_memory_range fast_clear_memory_range;
5343 
5344       /**
5345        * Whether this image can be fast cleared with non-zero clear colors.
5346        * This can happen with mutable images when formats of different bit
5347        * sizes per components are used.
5348        *
5349        * On Gfx9+, because the clear colors are stored as a 4 components 32bit
5350        * values, we can clear in R16G16_UNORM (store 2 16bit values in the
5351        * components 0 & 1 of the clear color) and then draw in R32_UINT which
5352        * would interpret the clear color as a single component value, using
5353        * only the first 16bit component of the previous written clear color.
5354        *
5355        * On Gfx7/7.5/8, only CC_ZERO/CC_ONE clear colors are supported, this
5356        * boolean will prevent the usage of CC_ONE.
5357        */
5358       bool can_non_zero_fast_clear;
5359 
5360       struct {
5361          /** Whether the image has CCS data mapped through AUX-TT. */
5362          bool mapped;
5363 
5364          /** Main address of the mapping. */
5365          uint64_t addr;
5366 
5367          /** Size of the mapping. */
5368          uint64_t size;
5369       } aux_tt;
5370    } planes[3];
5371 
5372    struct anv_image_memory_range vid_dmv_top_surface;
5373 
5374    /* Link in the anv_device.image_private_objects list */
5375    struct list_head link;
5376 };
5377 
5378 static inline bool
anv_image_is_protected(const struct anv_image * image)5379 anv_image_is_protected(const struct anv_image *image)
5380 {
5381    return image->vk.create_flags & VK_IMAGE_CREATE_PROTECTED_BIT;
5382 }
5383 
5384 static inline bool
anv_image_is_sparse(const struct anv_image * image)5385 anv_image_is_sparse(const struct anv_image *image)
5386 {
5387    return image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
5388 }
5389 
5390 static inline bool
anv_image_is_externally_shared(const struct anv_image * image)5391 anv_image_is_externally_shared(const struct anv_image *image)
5392 {
5393    return image->vk.drm_format_mod != DRM_FORMAT_MOD_INVALID ||
5394           image->vk.external_handle_types != 0;
5395 }
5396 
5397 static inline bool
anv_image_has_private_binding(const struct anv_image * image)5398 anv_image_has_private_binding(const struct anv_image *image)
5399 {
5400    const struct anv_image_binding private_binding =
5401       image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE];
5402    return private_binding.memory_range.size != 0;
5403 }
5404 
5405 static inline bool
anv_image_format_is_d16_or_s8(const struct anv_image * image)5406 anv_image_format_is_d16_or_s8(const struct anv_image *image)
5407 {
5408    return image->vk.format == VK_FORMAT_D16_UNORM ||
5409       image->vk.format == VK_FORMAT_D16_UNORM_S8_UINT ||
5410       image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
5411       image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
5412       image->vk.format == VK_FORMAT_S8_UINT;
5413 }
5414 
5415 /* The ordering of this enum is important */
5416 enum anv_fast_clear_type {
5417    /** Image does not have/support any fast-clear blocks */
5418    ANV_FAST_CLEAR_NONE = 0,
5419    /** Image has/supports fast-clear but only to the default value */
5420    ANV_FAST_CLEAR_DEFAULT_VALUE = 1,
5421    /** Image has/supports fast-clear with an arbitrary fast-clear value */
5422    ANV_FAST_CLEAR_ANY = 2,
5423 };
5424 
5425 /**
5426  * Return the aspect's _format_ plane, not its _memory_ plane (using the
5427  * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a
5428  * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain
5429  * VK_IMAGE_ASPECT_MEMORY_PLANE_* .
5430  */
5431 static inline uint32_t
anv_image_aspect_to_plane(const struct anv_image * image,VkImageAspectFlagBits aspect)5432 anv_image_aspect_to_plane(const struct anv_image *image,
5433                           VkImageAspectFlagBits aspect)
5434 {
5435    return anv_aspect_to_plane(image->vk.aspects, aspect);
5436 }
5437 
5438 /* Returns the number of auxiliary buffer levels attached to an image. */
5439 static inline uint8_t
anv_image_aux_levels(const struct anv_image * const image,VkImageAspectFlagBits aspect)5440 anv_image_aux_levels(const struct anv_image * const image,
5441                      VkImageAspectFlagBits aspect)
5442 {
5443    uint32_t plane = anv_image_aspect_to_plane(image, aspect);
5444    if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
5445       return 0;
5446 
5447    return image->vk.mip_levels;
5448 }
5449 
5450 /* Returns the number of auxiliary buffer layers attached to an image. */
5451 static inline uint32_t
anv_image_aux_layers(const struct anv_image * const image,VkImageAspectFlagBits aspect,const uint8_t miplevel)5452 anv_image_aux_layers(const struct anv_image * const image,
5453                      VkImageAspectFlagBits aspect,
5454                      const uint8_t miplevel)
5455 {
5456    assert(image);
5457 
5458    /* The miplevel must exist in the main buffer. */
5459    assert(miplevel < image->vk.mip_levels);
5460 
5461    if (miplevel >= anv_image_aux_levels(image, aspect)) {
5462       /* There are no layers with auxiliary data because the miplevel has no
5463        * auxiliary data.
5464        */
5465       return 0;
5466    }
5467 
5468    return MAX2(image->vk.array_layers, image->vk.extent.depth >> miplevel);
5469 }
5470 
5471 static inline struct anv_address MUST_CHECK
anv_image_address(const struct anv_image * image,const struct anv_image_memory_range * mem_range)5472 anv_image_address(const struct anv_image *image,
5473                   const struct anv_image_memory_range *mem_range)
5474 {
5475    const struct anv_image_binding *binding = &image->bindings[mem_range->binding];
5476    assert(binding->memory_range.offset == 0);
5477 
5478    if (mem_range->size == 0)
5479       return ANV_NULL_ADDRESS;
5480 
5481    return anv_address_add(binding->address, mem_range->offset);
5482 }
5483 
5484 static inline struct anv_address
anv_image_get_clear_color_addr(UNUSED const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)5485 anv_image_get_clear_color_addr(UNUSED const struct anv_device *device,
5486                                const struct anv_image *image,
5487                                VkImageAspectFlagBits aspect)
5488 {
5489    assert(image->vk.aspects & (VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV |
5490                                VK_IMAGE_ASPECT_DEPTH_BIT));
5491 
5492    uint32_t plane = anv_image_aspect_to_plane(image, aspect);
5493    const struct anv_image_memory_range *mem_range =
5494       &image->planes[plane].fast_clear_memory_range;
5495 
5496    return anv_image_address(image, mem_range);
5497 }
5498 
5499 static inline struct anv_address
anv_image_get_fast_clear_type_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)5500 anv_image_get_fast_clear_type_addr(const struct anv_device *device,
5501                                    const struct anv_image *image,
5502                                    VkImageAspectFlagBits aspect)
5503 {
5504    /* Xe2+ platforms don't need fast clear type. We shouldn't get here. */
5505    assert(device->info->ver < 20);
5506    struct anv_address addr =
5507       anv_image_get_clear_color_addr(device, image, aspect);
5508 
5509    unsigned clear_color_state_size;
5510    if (device->info->ver >= 11) {
5511       /* The fast clear type and the first compression state are stored in the
5512        * last 2 dwords of the clear color struct. Refer to the comment in
5513        * add_aux_state_tracking_buffer().
5514        */
5515       assert(device->isl_dev.ss.clear_color_state_size >= 32);
5516       clear_color_state_size = device->isl_dev.ss.clear_color_state_size - 8;
5517    } else
5518       clear_color_state_size = device->isl_dev.ss.clear_value_size;
5519    return anv_address_add(addr, clear_color_state_size);
5520 }
5521 
5522 static inline struct anv_address
anv_image_get_compression_state_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t array_layer)5523 anv_image_get_compression_state_addr(const struct anv_device *device,
5524                                      const struct anv_image *image,
5525                                      VkImageAspectFlagBits aspect,
5526                                      uint32_t level, uint32_t array_layer)
5527 {
5528    /* Xe2+ platforms don't use compression state. We shouldn't get here. */
5529    assert(device->info->ver < 20);
5530    assert(level < anv_image_aux_levels(image, aspect));
5531    assert(array_layer < anv_image_aux_layers(image, aspect, level));
5532    UNUSED uint32_t plane = anv_image_aspect_to_plane(image, aspect);
5533    assert(isl_aux_usage_has_ccs_e(image->planes[plane].aux_usage));
5534 
5535    /* Relative to start of the plane's fast clear type */
5536    uint32_t offset;
5537 
5538    offset = 4; /* Go past the fast clear type */
5539 
5540    if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
5541       for (uint32_t l = 0; l < level; l++)
5542          offset += u_minify(image->vk.extent.depth, l) * 4;
5543    } else {
5544       offset += level * image->vk.array_layers * 4;
5545    }
5546 
5547    offset += array_layer * 4;
5548 
5549    assert(offset < image->planes[plane].fast_clear_memory_range.size);
5550 
5551    return anv_address_add(
5552       anv_image_get_fast_clear_type_addr(device, image, aspect),
5553       offset);
5554 }
5555 
5556 static inline const struct anv_image_memory_range *
anv_image_get_aux_memory_range(const struct anv_image * image,uint32_t plane)5557 anv_image_get_aux_memory_range(const struct anv_image *image,
5558                                uint32_t plane)
5559 {
5560    if (image->planes[plane].aux_surface.memory_range.size > 0)
5561      return &image->planes[plane].aux_surface.memory_range;
5562    else
5563      return &image->planes[plane].compr_ctrl_memory_range;
5564 }
5565 
5566 /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
5567 static inline bool
anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,const struct anv_image * image)5568 anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,
5569                         const struct anv_image *image)
5570 {
5571    if (!(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
5572       return false;
5573 
5574    /* For Gfx8-11, there are some restrictions around sampling from HiZ.
5575     * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
5576     * say:
5577     *
5578     *    "If this field is set to AUX_HIZ, Number of Multisamples must
5579     *    be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
5580     */
5581    if (image->vk.image_type == VK_IMAGE_TYPE_3D)
5582       return false;
5583 
5584    if (!devinfo->has_sample_with_hiz)
5585       return false;
5586 
5587    return image->vk.samples == 1;
5588 }
5589 
5590 /* Returns true if an MCS-enabled buffer can be sampled from. */
5591 static inline bool
anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,const struct anv_image * image)5592 anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,
5593                               const struct anv_image *image)
5594 {
5595    assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
5596    const uint32_t plane =
5597       anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_COLOR_BIT);
5598 
5599    assert(isl_aux_usage_has_mcs(image->planes[plane].aux_usage));
5600 
5601    const struct anv_surface *anv_surf = &image->planes[plane].primary_surface;
5602 
5603    /* On TGL, the sampler has an issue with some 8 and 16bpp MSAA fast clears.
5604     * See HSD 1707282275, wa_14013111325. Due to the use of
5605     * format-reinterpretation, a simplified workaround is implemented.
5606     */
5607    if (intel_needs_workaround(devinfo, 14013111325) &&
5608        isl_format_get_layout(anv_surf->isl.format)->bpb <= 16) {
5609       return false;
5610    }
5611 
5612    return true;
5613 }
5614 
5615 static inline bool
anv_image_plane_uses_aux_map(const struct anv_device * device,const struct anv_image * image,uint32_t plane)5616 anv_image_plane_uses_aux_map(const struct anv_device *device,
5617                              const struct anv_image *image,
5618                              uint32_t plane)
5619 {
5620    return device->info->has_aux_map &&
5621       isl_aux_usage_has_ccs(image->planes[plane].aux_usage);
5622 }
5623 
5624 static inline bool
anv_image_uses_aux_map(const struct anv_device * device,const struct anv_image * image)5625 anv_image_uses_aux_map(const struct anv_device *device,
5626                        const struct anv_image *image)
5627 {
5628    for (uint32_t p = 0; p < image->n_planes; ++p) {
5629       if (anv_image_plane_uses_aux_map(device, image, p))
5630          return true;
5631    }
5632 
5633    return false;
5634 }
5635 
5636 static inline bool
anv_bo_allows_aux_map(const struct anv_device * device,const struct anv_bo * bo)5637 anv_bo_allows_aux_map(const struct anv_device *device,
5638                       const struct anv_bo *bo)
5639 {
5640    if (device->aux_map_ctx == NULL)
5641       return false;
5642 
5643    return (bo->alloc_flags & ANV_BO_ALLOC_AUX_TT_ALIGNED) != 0;
5644 }
5645 
5646 static inline bool
anv_address_allows_aux_map(const struct anv_device * device,struct anv_address addr)5647 anv_address_allows_aux_map(const struct anv_device *device,
5648                            struct anv_address addr)
5649 {
5650    if (device->aux_map_ctx == NULL)
5651       return false;
5652 
5653    /* Technically, we really only care about what offset the image is bound
5654     * into on the BO, but we don't have that information here. As a heuristic,
5655     * rely on the BO offset instead.
5656     */
5657    if (anv_address_physical(addr) %
5658        intel_aux_map_get_alignment(device->aux_map_ctx) != 0)
5659       return false;
5660 
5661    return true;
5662 }
5663 
5664 void
5665 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
5666                                   const struct anv_image *image,
5667                                   VkImageAspectFlagBits aspect,
5668                                   enum isl_aux_usage aux_usage,
5669                                   uint32_t level,
5670                                   uint32_t base_layer,
5671                                   uint32_t layer_count);
5672 
5673 void
5674 anv_cmd_buffer_mark_image_fast_cleared(struct anv_cmd_buffer *cmd_buffer,
5675                                        const struct anv_image *image,
5676                                        const enum isl_format format,
5677                                        union isl_color_value clear_color);
5678 
5679 void
5680 anv_cmd_buffer_load_clear_color_from_image(struct anv_cmd_buffer *cmd_buffer,
5681                                            struct anv_state state,
5682                                            const struct anv_image *image);
5683 
5684 struct anv_image_binding *
5685 anv_image_aspect_to_binding(struct anv_image *image,
5686                             VkImageAspectFlags aspect);
5687 
5688 void
5689 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
5690                       const struct anv_image *image,
5691                       VkImageAspectFlagBits aspect,
5692                       enum isl_aux_usage aux_usage,
5693                       enum isl_format format, struct isl_swizzle swizzle,
5694                       uint32_t level, uint32_t base_layer, uint32_t layer_count,
5695                       VkRect2D area, union isl_color_value clear_color);
5696 void
5697 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
5698                               const struct anv_image *image,
5699                               VkImageAspectFlags aspects,
5700                               enum isl_aux_usage depth_aux_usage,
5701                               uint32_t level,
5702                               uint32_t base_layer, uint32_t layer_count,
5703                               VkRect2D area,
5704                               const VkClearDepthStencilValue *clear_value);
5705 void
5706 anv_attachment_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
5707                             const struct anv_attachment *att,
5708                             VkImageLayout layout,
5709                             VkImageAspectFlagBits aspect);
5710 
5711 static inline union isl_color_value
anv_image_hiz_clear_value(const struct anv_image * image)5712 anv_image_hiz_clear_value(const struct anv_image *image)
5713 {
5714    /* The benchmarks we're tracking tend to prefer clearing depth buffers to
5715     * 0.0f when the depth buffers are part of images with multiple aspects.
5716     * Otherwise, they tend to prefer clearing depth buffers to 1.0f.
5717     */
5718    if (image->n_planes == 2)
5719       return (union isl_color_value) { .f32 = { 0.0f, } };
5720    else
5721       return (union isl_color_value) { .f32 = { 1.0f, } };
5722 }
5723 
5724 void
5725 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
5726                  const struct anv_image *image,
5727                  VkImageAspectFlagBits aspect, uint32_t level,
5728                  uint32_t base_layer, uint32_t layer_count,
5729                  enum isl_aux_op hiz_op);
5730 void
5731 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
5732                     const struct anv_image *image,
5733                     VkImageAspectFlags aspects,
5734                     uint32_t level,
5735                     uint32_t base_layer, uint32_t layer_count,
5736                     VkRect2D area,
5737                     const VkClearDepthStencilValue *clear_value);
5738 void
5739 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
5740                  const struct anv_image *image,
5741                  enum isl_format format, struct isl_swizzle swizzle,
5742                  VkImageAspectFlagBits aspect,
5743                  uint32_t base_layer, uint32_t layer_count,
5744                  enum isl_aux_op mcs_op, union isl_color_value *clear_value,
5745                  bool predicate);
5746 void
5747 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
5748                  const struct anv_image *image,
5749                  enum isl_format format, struct isl_swizzle swizzle,
5750                  VkImageAspectFlagBits aspect, uint32_t level,
5751                  uint32_t base_layer, uint32_t layer_count,
5752                  enum isl_aux_op ccs_op, union isl_color_value *clear_value,
5753                  bool predicate);
5754 
5755 isl_surf_usage_flags_t
5756 anv_image_choose_isl_surf_usage(struct anv_physical_device *device,
5757                                 VkImageCreateFlags vk_create_flags,
5758                                 VkImageUsageFlags vk_usage,
5759                                 isl_surf_usage_flags_t isl_extra_usage,
5760                                 VkImageAspectFlagBits aspect,
5761                                 VkImageCompressionFlagsEXT comp_flags);
5762 
5763 void
5764 anv_cmd_buffer_fill_area(struct anv_cmd_buffer *cmd_buffer,
5765                          struct anv_address address,
5766                          VkDeviceSize size,
5767                          uint32_t data,
5768                          bool protected);
5769 
5770 VkResult
5771 anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer *cmd_buffer);
5772 
5773 bool
5774 anv_can_hiz_clear_ds_view(struct anv_device *device,
5775                           const struct anv_image_view *iview,
5776                           VkImageLayout layout,
5777                           VkImageAspectFlags clear_aspects,
5778                           float depth_clear_value,
5779                           VkRect2D render_area,
5780                           const VkQueueFlagBits queue_flags);
5781 
5782 bool
5783 anv_can_fast_clear_color_view(struct anv_device *device,
5784                               struct anv_image_view *iview,
5785                               VkImageLayout layout,
5786                               union isl_color_value clear_color,
5787                               uint32_t num_layers,
5788                               VkRect2D render_area,
5789                               const VkQueueFlagBits queue_flags);
5790 
5791 enum isl_aux_state ATTRIBUTE_PURE
5792 anv_layout_to_aux_state(const struct intel_device_info * const devinfo,
5793                         const struct anv_image *image,
5794                         const VkImageAspectFlagBits aspect,
5795                         const VkImageLayout layout,
5796                         const VkQueueFlagBits queue_flags);
5797 
5798 enum isl_aux_usage ATTRIBUTE_PURE
5799 anv_layout_to_aux_usage(const struct intel_device_info * const devinfo,
5800                         const struct anv_image *image,
5801                         const VkImageAspectFlagBits aspect,
5802                         const VkImageUsageFlagBits usage,
5803                         const VkImageLayout layout,
5804                         const VkQueueFlagBits queue_flags);
5805 
5806 enum anv_fast_clear_type ATTRIBUTE_PURE
5807 anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo,
5808                               const struct anv_image * const image,
5809                               const VkImageAspectFlagBits aspect,
5810                               const VkImageLayout layout,
5811                               const VkQueueFlagBits queue_flags);
5812 
5813 bool ATTRIBUTE_PURE
5814 anv_layout_has_untracked_aux_writes(const struct intel_device_info * const devinfo,
5815                                     const struct anv_image * const image,
5816                                     const VkImageAspectFlagBits aspect,
5817                                     const VkImageLayout layout,
5818                                     const VkQueueFlagBits queue_flags);
5819 
5820 static inline bool
anv_image_aspects_compatible(VkImageAspectFlags aspects1,VkImageAspectFlags aspects2)5821 anv_image_aspects_compatible(VkImageAspectFlags aspects1,
5822                              VkImageAspectFlags aspects2)
5823 {
5824    if (aspects1 == aspects2)
5825       return true;
5826 
5827    /* Only 1 color aspects are compatibles. */
5828    if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
5829        (aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
5830        util_bitcount(aspects1) == util_bitcount(aspects2))
5831       return true;
5832 
5833    return false;
5834 }
5835 
5836 struct anv_image_view {
5837    struct vk_image_view vk;
5838 
5839    const struct anv_image *image; /**< VkImageViewCreateInfo::image */
5840 
5841    unsigned n_planes;
5842 
5843    /**
5844     * True if the surface states (if any) are owned by some anv_state_stream
5845     * from internal_surface_state_pool.
5846     */
5847    bool use_surface_state_stream;
5848 
5849    struct {
5850       struct isl_view isl;
5851 
5852       /**
5853        * A version of the image view for storage usage (can apply 3D image
5854        * slicing).
5855        */
5856       struct isl_view isl_storage;
5857 
5858       /**
5859        * RENDER_SURFACE_STATE when using image as a sampler surface with an
5860        * image layout of SHADER_READ_ONLY_OPTIMAL or
5861        * DEPTH_STENCIL_READ_ONLY_OPTIMAL.
5862        */
5863       struct anv_surface_state optimal_sampler;
5864 
5865       /**
5866        * RENDER_SURFACE_STATE when using image as a sampler surface with an
5867        * image layout of GENERAL.
5868        */
5869       struct anv_surface_state general_sampler;
5870 
5871       /**
5872        * RENDER_SURFACE_STATE when using image as a storage image.
5873        */
5874       struct anv_surface_state storage;
5875    } planes[3];
5876 };
5877 
5878 enum anv_image_view_state_flags {
5879    ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL      = (1 << 0),
5880 };
5881 
5882 void anv_image_fill_surface_state(struct anv_device *device,
5883                                   const struct anv_image *image,
5884                                   VkImageAspectFlagBits aspect,
5885                                   const struct isl_view *view,
5886                                   isl_surf_usage_flags_t view_usage,
5887                                   enum isl_aux_usage aux_usage,
5888                                   const union isl_color_value *clear_color,
5889                                   enum anv_image_view_state_flags flags,
5890                                   struct anv_surface_state *state_inout);
5891 
5892 
5893 static inline const struct anv_surface_state *
anv_image_view_texture_surface_state(const struct anv_image_view * iview,uint32_t plane,VkImageLayout layout)5894 anv_image_view_texture_surface_state(const struct anv_image_view *iview,
5895                                      uint32_t plane, VkImageLayout layout)
5896 {
5897    return layout == VK_IMAGE_LAYOUT_GENERAL ?
5898           &iview->planes[plane].general_sampler :
5899           &iview->planes[plane].optimal_sampler;
5900 }
5901 
5902 static inline const struct anv_surface_state *
anv_image_view_storage_surface_state(const struct anv_image_view * iview)5903 anv_image_view_storage_surface_state(const struct anv_image_view *iview)
5904 {
5905    return &iview->planes[0].storage;
5906 }
5907 
5908 static inline bool
anv_cmd_graphics_state_has_image_as_attachment(const struct anv_cmd_graphics_state * state,const struct anv_image * image)5909 anv_cmd_graphics_state_has_image_as_attachment(const struct anv_cmd_graphics_state *state,
5910                                                const struct anv_image *image)
5911 {
5912    for (unsigned a = 0; a < state->color_att_count; a++) {
5913       if (state->color_att[a].iview &&
5914           state->color_att[a].iview->image == image)
5915          return true;
5916    }
5917 
5918    if (state->depth_att.iview && state->depth_att.iview->image == image)
5919       return true;
5920    if (state->stencil_att.iview && state->stencil_att.iview->image == image)
5921       return true;
5922 
5923    return false;
5924 }
5925 
5926 struct anv_image_create_info {
5927    const VkImageCreateInfo *vk_info;
5928 
5929    /** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */
5930    isl_tiling_flags_t isl_tiling_flags;
5931 
5932    /** These flags will be added to any derived from VkImageCreateInfo. */
5933    isl_surf_usage_flags_t isl_extra_usage_flags;
5934 
5935    /** An opt-in stride in pixels, should be 0 for implicit layouts */
5936    uint32_t stride;
5937 
5938    /** Whether to allocate private binding */
5939    bool no_private_binding_alloc;
5940 };
5941 
5942 VkResult anv_image_init(struct anv_device *device, struct anv_image *image,
5943                         const struct anv_image_create_info *create_info);
5944 
5945 void anv_image_finish(struct anv_image *image);
5946 
5947 void anv_image_get_memory_requirements(struct anv_device *device,
5948                                        struct anv_image *image,
5949                                        VkImageAspectFlags aspects,
5950                                        VkMemoryRequirements2 *pMemoryRequirements);
5951 
5952 void anv_image_view_init(struct anv_device *device,
5953                          struct anv_image_view *iview,
5954                          const VkImageViewCreateInfo *pCreateInfo,
5955                          struct anv_state_stream *state_stream);
5956 
5957 void anv_image_view_finish(struct anv_image_view *iview);
5958 
5959 enum isl_format
5960 anv_isl_format_for_descriptor_type(const struct anv_device *device,
5961                                    VkDescriptorType type);
5962 
5963 static inline isl_surf_usage_flags_t
anv_isl_usage_for_descriptor_type(const VkDescriptorType type)5964 anv_isl_usage_for_descriptor_type(const VkDescriptorType type)
5965 {
5966    switch(type) {
5967       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
5968       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
5969          return ISL_SURF_USAGE_CONSTANT_BUFFER_BIT;
5970       default:
5971          return ISL_SURF_USAGE_STORAGE_BIT;
5972    }
5973 }
5974 
5975 static inline uint32_t
anv_rasterization_aa_mode(VkPolygonMode raster_mode,VkLineRasterizationModeKHR line_mode)5976 anv_rasterization_aa_mode(VkPolygonMode raster_mode,
5977                           VkLineRasterizationModeKHR line_mode)
5978 {
5979    if (raster_mode == VK_POLYGON_MODE_LINE &&
5980        line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR)
5981       return true;
5982    return false;
5983 }
5984 
5985 static inline VkLineRasterizationModeKHR
anv_line_rasterization_mode(VkLineRasterizationModeKHR line_mode,unsigned rasterization_samples)5986 anv_line_rasterization_mode(VkLineRasterizationModeKHR line_mode,
5987                             unsigned rasterization_samples)
5988 {
5989    if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_KHR) {
5990       if (rasterization_samples > 1) {
5991          return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_KHR;
5992       } else {
5993          return VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR;
5994       }
5995    }
5996    return line_mode;
5997 }
5998 
5999 static inline bool
anv_is_dual_src_blend_factor(VkBlendFactor factor)6000 anv_is_dual_src_blend_factor(VkBlendFactor factor)
6001 {
6002    return factor == VK_BLEND_FACTOR_SRC1_COLOR ||
6003           factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR ||
6004           factor == VK_BLEND_FACTOR_SRC1_ALPHA ||
6005           factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
6006 }
6007 
6008 static inline bool
anv_is_dual_src_blend_equation(const struct vk_color_blend_attachment_state * cb)6009 anv_is_dual_src_blend_equation(const struct vk_color_blend_attachment_state *cb)
6010 {
6011    return anv_is_dual_src_blend_factor(cb->src_color_blend_factor) &&
6012           anv_is_dual_src_blend_factor(cb->dst_color_blend_factor) &&
6013           anv_is_dual_src_blend_factor(cb->src_alpha_blend_factor) &&
6014           anv_is_dual_src_blend_factor(cb->dst_alpha_blend_factor);
6015 }
6016 
6017 VkFormatFeatureFlags2
6018 anv_get_image_format_features2(const struct anv_physical_device *physical_device,
6019                                VkFormat vk_format,
6020                                const struct anv_format *anv_format,
6021                                VkImageTiling vk_tiling,
6022                                const struct isl_drm_modifier_info *isl_mod_info);
6023 
6024 void anv_fill_buffer_surface_state(struct anv_device *device,
6025                                    void *surface_state_ptr,
6026                                    enum isl_format format,
6027                                    struct isl_swizzle swizzle,
6028                                    isl_surf_usage_flags_t usage,
6029                                    struct anv_address address,
6030                                    uint32_t range, uint32_t stride);
6031 
6032 
6033 struct gfx8_border_color {
6034    union {
6035       float float32[4];
6036       uint32_t uint32[4];
6037    };
6038    /* Pad out to 64 bytes */
6039    uint32_t _pad[12];
6040 };
6041 
6042 struct anv_sampler {
6043    struct vk_sampler            vk;
6044 
6045    /* Hash of the sampler state + border color, useful for embedded samplers
6046     * and included in the descriptor layout hash.
6047     */
6048    unsigned char                sha1[20];
6049 
6050    uint32_t                     state[3][4];
6051    /* Packed SAMPLER_STATE without the border color pointer. */
6052    uint32_t                     state_no_bc[3][4];
6053    uint32_t                     n_planes;
6054 
6055    /* Blob of sampler state data which is guaranteed to be 32-byte aligned
6056     * and with a 32-byte stride for use as bindless samplers.
6057     */
6058    struct anv_state             bindless_state;
6059 
6060    struct anv_state             custom_border_color;
6061 };
6062 
6063 
6064 struct anv_query_pool {
6065    struct vk_query_pool                         vk;
6066 
6067    /** Stride between queries, in bytes */
6068    uint32_t                                     stride;
6069    /** Number of slots in this query pool */
6070    struct anv_bo *                              bo;
6071 
6072    /** Location for the KHR_performance_query small batch updating
6073     *  ANV_PERF_QUERY_OFFSET_REG
6074     */
6075    uint32_t                                     khr_perf_preambles_offset;
6076 
6077    /** Size of each small batch */
6078    uint32_t                                     khr_perf_preamble_stride;
6079 
6080    /* KHR perf queries : */
6081    /** Query pass size in bytes(availability + padding + query data) */
6082    uint32_t                                     pass_size;
6083    /** Offset of the query data within a pass */
6084    uint32_t                                     data_offset;
6085    /** query data / 2 */
6086    uint32_t                                     snapshot_size;
6087    uint32_t                                     n_counters;
6088    struct intel_perf_counter_pass                *counter_pass;
6089    uint32_t                                     n_passes;
6090    struct intel_perf_query_info                 **pass_query;
6091 
6092    /* Video encoding queries */
6093    VkVideoCodecOperationFlagsKHR                codec;
6094 };
6095 
khr_perf_query_preamble_offset(const struct anv_query_pool * pool,uint32_t pass)6096 static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_pool *pool,
6097                                                       uint32_t pass)
6098 {
6099    return pool->khr_perf_preambles_offset +
6100           pool->khr_perf_preamble_stride * pass;
6101 }
6102 
6103 struct anv_vid_mem {
6104    struct anv_device_memory *mem;
6105    VkDeviceSize       offset;
6106    VkDeviceSize       size;
6107 };
6108 
6109 #define ANV_MB_WIDTH 16
6110 #define ANV_MB_HEIGHT 16
6111 #define ANV_VIDEO_H264_MAX_NUM_REF_FRAME 16
6112 #define ANV_VIDEO_H265_MAX_NUM_REF_FRAME 16
6113 #define ANV_VIDEO_H265_HCP_NUM_REF_FRAME 8
6114 #define ANV_MAX_H265_CTB_SIZE 64
6115 
6116 enum anv_vid_mem_h264_types {
6117    ANV_VID_MEM_H264_INTRA_ROW_STORE,
6118    ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE,
6119    ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH,
6120    ANV_VID_MEM_H264_MPR_ROW_SCRATCH,
6121    ANV_VID_MEM_H264_MAX,
6122 };
6123 
6124 enum anv_vid_mem_h265_types {
6125    ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE,
6126    ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE,
6127    ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN,
6128    ANV_VID_MEM_H265_METADATA_LINE,
6129    ANV_VID_MEM_H265_METADATA_TILE_LINE,
6130    ANV_VID_MEM_H265_METADATA_TILE_COLUMN,
6131    ANV_VID_MEM_H265_SAO_LINE,
6132    ANV_VID_MEM_H265_SAO_TILE_LINE,
6133    ANV_VID_MEM_H265_SAO_TILE_COLUMN,
6134    ANV_VID_MEM_H265_DEC_MAX,
6135    ANV_VID_MEM_H265_SSE_SRC_PIX_ROW_STORE = ANV_VID_MEM_H265_DEC_MAX,
6136    ANV_VID_MEM_H265_ENC_MAX,
6137 };
6138 
6139 struct anv_video_session {
6140    struct vk_video_session vk;
6141 
6142    /* the decoder needs some private memory allocations */
6143    struct anv_vid_mem vid_mem[ANV_VID_MEM_H265_ENC_MAX];
6144 };
6145 
6146 struct anv_video_session_params {
6147    struct vk_video_session_parameters vk;
6148    VkVideoEncodeRateControlModeFlagBitsKHR rc_mode;
6149 };
6150 
6151 void
6152 anv_dump_pipe_bits(enum anv_pipe_bits bits, FILE *f);
6153 
6154 static inline void
anv_add_pending_pipe_bits(struct anv_cmd_buffer * cmd_buffer,enum anv_pipe_bits bits,const char * reason)6155 anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer,
6156                           enum anv_pipe_bits bits,
6157                           const char* reason)
6158 {
6159    cmd_buffer->state.pending_pipe_bits |= bits;
6160    if (INTEL_DEBUG(DEBUG_PIPE_CONTROL) && bits) {
6161       fputs("pc: add ", stdout);
6162       anv_dump_pipe_bits(bits, stdout);
6163       fprintf(stdout, "reason: %s\n", reason);
6164    }
6165    if (cmd_buffer->batch.pc_reasons_count < ARRAY_SIZE(cmd_buffer->batch.pc_reasons)) {
6166       cmd_buffer->batch.pc_reasons[cmd_buffer->batch.pc_reasons_count++] = reason;
6167    }
6168 }
6169 
6170 struct anv_performance_configuration_intel {
6171    struct vk_object_base      base;
6172 
6173    struct intel_perf_registers *register_config;
6174 
6175    uint64_t                   config_id;
6176 };
6177 
6178 void anv_physical_device_init_va_ranges(struct anv_physical_device *device);
6179 void anv_physical_device_init_perf(struct anv_physical_device *device, int fd);
6180 void anv_device_perf_init(struct anv_device *device);
6181 void anv_device_perf_close(struct anv_device *device);
6182 void anv_perf_write_pass_results(struct intel_perf_config *perf,
6183                                  struct anv_query_pool *pool, uint32_t pass,
6184                                  const struct intel_perf_query_result *accumulated_results,
6185                                  union VkPerformanceCounterResultKHR *results);
6186 
6187 void anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir,
6188                                 struct nir_shader *fs_nir,
6189                                 struct anv_device *device,
6190                                 const VkGraphicsPipelineCreateInfo *info);
6191 
6192 /* Use to emit a series of memcpy operations */
6193 struct anv_memcpy_state {
6194    struct anv_device *device;
6195    struct anv_cmd_buffer *cmd_buffer;
6196    struct anv_batch *batch;
6197 
6198    /* Configuration programmed by the memcpy operation */
6199    struct intel_urb_config urb_cfg;
6200 
6201    struct anv_vb_cache_range vb_bound;
6202    struct anv_vb_cache_range vb_dirty;
6203 };
6204 
6205 VkResult anv_device_init_internal_kernels(struct anv_device *device);
6206 void anv_device_finish_internal_kernels(struct anv_device *device);
6207 VkResult anv_device_get_internal_shader(struct anv_device *device,
6208                                         enum anv_internal_kernel_name name,
6209                                         struct anv_shader_bin **out_bin);
6210 
6211 VkResult anv_device_init_astc_emu(struct anv_device *device);
6212 void anv_device_finish_astc_emu(struct anv_device *device);
6213 void anv_astc_emu_process(struct anv_cmd_buffer *cmd_buffer,
6214                           struct anv_image *image,
6215                           VkImageLayout layout,
6216                           const VkImageSubresourceLayers *subresource,
6217                           VkOffset3D block_offset,
6218                           VkExtent3D block_extent);
6219 
6220 /* This structure is used in 2 scenarios :
6221  *
6222  *    - copy utrace timestamps from command buffer so that command buffer can
6223  *      be resubmitted multiple times without the recorded timestamps being
6224  *      overwritten before they're read back
6225  *
6226  *    - emit trace points for queue debug tagging
6227  *      (vkQueueBeginDebugUtilsLabelEXT/vkQueueEndDebugUtilsLabelEXT)
6228  */
6229 struct anv_utrace_submit {
6230    struct anv_async_submit base;
6231 
6232    /* structure used by the perfetto glue */
6233    struct intel_ds_flush_data ds;
6234 
6235    /* Stream for temporary allocations */
6236    struct anv_state_stream dynamic_state_stream;
6237    struct anv_state_stream general_state_stream;
6238 
6239    /* Last fully read 64bit timestamp (used to rebuild the upper bits of 32bit
6240     * timestamps)
6241     */
6242    uint64_t last_full_timestamp;
6243 
6244    /* Memcpy state tracking (only used for timestamp copies on render engine) */
6245    struct anv_memcpy_state memcpy_state;
6246 
6247    /* Memcpy state tracking (only used for timestamp copies on compute engine) */
6248    struct anv_simple_shader simple_state;
6249 };
6250 
6251 void anv_device_utrace_init(struct anv_device *device);
6252 void anv_device_utrace_finish(struct anv_device *device);
6253 VkResult
6254 anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
6255                                     uint32_t cmd_buffer_count,
6256                                     struct anv_cmd_buffer **cmd_buffers,
6257                                     struct anv_utrace_submit **out_submit);
6258 
6259 void
6260 anv_device_utrace_emit_gfx_copy_buffer(struct u_trace_context *utctx,
6261                                        void *cmdstream,
6262                                        void *ts_from, uint64_t from_offset_B,
6263                                        void *ts_to, uint64_t to_offset_B,
6264                                        uint64_t size_B);
6265 
6266 static bool
anv_has_cooperative_matrix(const struct anv_physical_device * device)6267 anv_has_cooperative_matrix(const struct anv_physical_device *device)
6268 {
6269    return device->has_cooperative_matrix;
6270 }
6271 
6272 #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
6273    VK_FROM_HANDLE(__anv_type, __name, __handle)
6274 
6275 VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, vk.base, VkCommandBuffer,
6276                        VK_OBJECT_TYPE_COMMAND_BUFFER)
6277 VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
6278 VK_DEFINE_HANDLE_CASTS(anv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
6279 VK_DEFINE_HANDLE_CASTS(anv_physical_device, vk.base, VkPhysicalDevice,
6280                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
6281 VK_DEFINE_HANDLE_CASTS(anv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
6282 
6283 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, vk.base, VkBuffer,
6284                                VK_OBJECT_TYPE_BUFFER)
6285 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, vk.base, VkBufferView,
6286                                VK_OBJECT_TYPE_BUFFER_VIEW)
6287 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool,
6288                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
6289 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet,
6290                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
6291 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base,
6292                                VkDescriptorSetLayout,
6293                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
6294 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, vk.base, VkDeviceMemory,
6295                                VK_OBJECT_TYPE_DEVICE_MEMORY)
6296 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
6297 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
6298 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, vk.base, VkImageView,
6299                                VK_OBJECT_TYPE_IMAGE_VIEW);
6300 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,
6301                                VK_OBJECT_TYPE_PIPELINE)
6302 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,
6303                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
6304 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, vk.base, VkQueryPool,
6305                                VK_OBJECT_TYPE_QUERY_POOL)
6306 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, vk.base, VkSampler,
6307                                VK_OBJECT_TYPE_SAMPLER)
6308 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
6309                                VkPerformanceConfigurationINTEL,
6310                                VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL)
6311 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_video_session, vk.base,
6312                                VkVideoSessionKHR,
6313                                VK_OBJECT_TYPE_VIDEO_SESSION_KHR)
6314 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_video_session_params, vk.base,
6315                                VkVideoSessionParametersKHR,
6316                                VK_OBJECT_TYPE_VIDEO_SESSION_PARAMETERS_KHR)
6317 
6318 #define anv_genX(devinfo, thing) ({             \
6319    __typeof(&gfx9_##thing) genX_thing;          \
6320    switch ((devinfo)->verx10) {                 \
6321    case 90:                                     \
6322       genX_thing = &gfx9_##thing;               \
6323       break;                                    \
6324    case 110:                                    \
6325       genX_thing = &gfx11_##thing;              \
6326       break;                                    \
6327    case 120:                                    \
6328       genX_thing = &gfx12_##thing;              \
6329       break;                                    \
6330    case 125:                                    \
6331       genX_thing = &gfx125_##thing;             \
6332       break;                                    \
6333    case 200:                                    \
6334       genX_thing = &gfx20_##thing;              \
6335       break;                                    \
6336    default:                                     \
6337       unreachable("Unknown hardware generation"); \
6338    }                                            \
6339    genX_thing;                                  \
6340 })
6341 
6342 /* Gen-specific function declarations */
6343 #ifdef genX
6344 #  include "anv_genX.h"
6345 #else
6346 #  define genX(x) gfx9_##x
6347 #  include "anv_genX.h"
6348 #  undef genX
6349 #  define genX(x) gfx11_##x
6350 #  include "anv_genX.h"
6351 #  undef genX
6352 #  define genX(x) gfx12_##x
6353 #  include "anv_genX.h"
6354 #  undef genX
6355 #  define genX(x) gfx125_##x
6356 #  include "anv_genX.h"
6357 #  undef genX
6358 #  define genX(x) gfx20_##x
6359 #  include "anv_genX.h"
6360 #  undef genX
6361 #endif
6362 
6363 #ifdef __cplusplus
6364 }
6365 #endif
6366 
6367 #endif /* ANV_PRIVATE_H */
6368