xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan_hasvk/anv_private.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef ANV_PRIVATE_H
25 #define ANV_PRIVATE_H
26 
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdbool.h>
30 #include <pthread.h>
31 #include <assert.h>
32 #include <stdint.h>
33 #include "drm-uapi/i915_drm.h"
34 #include "drm-uapi/drm_fourcc.h"
35 
36 #ifdef HAVE_VALGRIND
37 #include <valgrind.h>
38 #include <memcheck.h>
39 #define VG(x) x
40 #else
41 #define VG(x) ((void)0)
42 #endif
43 
44 #include "common/intel_engine.h"
45 #include "common/intel_gem.h"
46 #include "common/intel_l3_config.h"
47 #include "common/intel_measure.h"
48 #include "common/intel_mem.h"
49 #include "common/intel_sample_positions.h"
50 #include "decoder/intel_decoder.h"
51 #include "dev/intel_device_info.h"
52 #include "blorp/blorp.h"
53 #include "compiler/elk/elk_compiler.h"
54 #include "ds/intel_driver_ds.h"
55 #include "util/bitset.h"
56 #include "util/bitscan.h"
57 #include "util/detect_os.h"
58 #include "util/macros.h"
59 #include "util/hash_table.h"
60 #include "util/list.h"
61 #include "util/perf/u_trace.h"
62 #include "util/set.h"
63 #include "util/sparse_array.h"
64 #include "util/u_atomic.h"
65 #include "util/u_vector.h"
66 #include "util/u_math.h"
67 #include "util/vma.h"
68 #include "util/xmlconfig.h"
69 #include "vk_alloc.h"
70 #include "vk_buffer.h"
71 #include "vk_command_buffer.h"
72 #include "vk_command_pool.h"
73 #include "vk_debug_report.h"
74 #include "vk_descriptor_update_template.h"
75 #include "vk_device.h"
76 #include "vk_drm_syncobj.h"
77 #include "vk_enum_defines.h"
78 #include "vk_format.h"
79 #include "vk_framebuffer.h"
80 #include "vk_graphics_state.h"
81 #include "vk_image.h"
82 #include "vk_instance.h"
83 #include "vk_pipeline_cache.h"
84 #include "vk_physical_device.h"
85 #include "vk_shader_module.h"
86 #include "vk_sync.h"
87 #include "vk_sync_timeline.h"
88 #include "vk_util.h"
89 #include "vk_queue.h"
90 #include "vk_log.h"
91 #include "vk_ycbcr_conversion.h"
92 
93 /* Pre-declarations needed for WSI entrypoints */
94 struct wl_surface;
95 struct wl_display;
96 typedef struct xcb_connection_t xcb_connection_t;
97 typedef uint32_t xcb_visualid_t;
98 typedef uint32_t xcb_window_t;
99 
100 struct anv_batch;
101 struct anv_buffer;
102 struct anv_buffer_view;
103 struct anv_image_view;
104 struct anv_instance;
105 
106 struct intel_perf_config;
107 struct intel_perf_counter_pass;
108 struct intel_perf_query_result;
109 
110 #include <vulkan/vulkan.h>
111 #include <vulkan/vk_icd.h>
112 
113 #include "anv_android.h"
114 #include "anv_entrypoints.h"
115 #include "isl/isl.h"
116 
117 #include "dev/intel_debug.h"
118 #undef MESA_LOG_TAG
119 #define MESA_LOG_TAG "MESA-INTEL"
120 #include "util/log.h"
121 #include "wsi_common.h"
122 
123 #define NSEC_PER_SEC 1000000000ull
124 
125 /* anv Virtual Memory Layout
126  * =========================
127  *
128  * When the anv driver is determining the virtual graphics addresses of memory
129  * objects itself using the softpin mechanism, the following memory ranges
130  * will be used.
131  *
132  * Three special considerations to notice:
133  *
134  * (1) the dynamic state pool is located within the same 4 GiB as the low
135  * heap. This is to work around a VF cache issue described in a comment in
136  * anv_physical_device_init_heaps.
137  *
138  * (2) the binding table pool is located at lower addresses than the surface
139  * state pool, within a 4 GiB range. This allows surface state base addresses
140  * to cover both binding tables (16 bit offsets) and surface states (32 bit
141  * offsets).
142  *
143  * (3) the last 4 GiB of the address space is withheld from the high
144  * heap. Various hardware units will read past the end of an object for
145  * various reasons. This healthy margin prevents reads from wrapping around
146  * 48-bit addresses.
147  */
148 #define GENERAL_STATE_POOL_MIN_ADDRESS     0x000000200000ULL /* 2 MiB */
149 #define GENERAL_STATE_POOL_MAX_ADDRESS     0x00003fffffffULL
150 #define LOW_HEAP_MIN_ADDRESS               0x000040000000ULL /* 1 GiB */
151 #define LOW_HEAP_MAX_ADDRESS               0x00007fffffffULL
152 #define DYNAMIC_STATE_POOL_MIN_ADDRESS     0x0000c0000000ULL /* 3 GiB */
153 #define DYNAMIC_STATE_POOL_MAX_ADDRESS     0x0000ffffffffULL
154 #define BINDING_TABLE_POOL_MIN_ADDRESS     0x000100000000ULL /* 4 GiB */
155 #define BINDING_TABLE_POOL_MAX_ADDRESS     0x00013fffffffULL
156 #define SURFACE_STATE_POOL_MIN_ADDRESS     0x000140000000ULL /* 5 GiB */
157 #define SURFACE_STATE_POOL_MAX_ADDRESS     0x00017fffffffULL
158 #define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */
159 #define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
160 #define CLIENT_VISIBLE_HEAP_MIN_ADDRESS    0x0001c0000000ULL /* 7 GiB */
161 #define CLIENT_VISIBLE_HEAP_MAX_ADDRESS    0x0002bfffffffULL
162 #define HIGH_HEAP_MIN_ADDRESS              0x0002c0000000ULL /* 11 GiB */
163 
164 #define GENERAL_STATE_POOL_SIZE     \
165    (GENERAL_STATE_POOL_MAX_ADDRESS - GENERAL_STATE_POOL_MIN_ADDRESS + 1)
166 #define LOW_HEAP_SIZE               \
167    (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
168 #define DYNAMIC_STATE_POOL_SIZE     \
169    (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)
170 #define BINDING_TABLE_POOL_SIZE     \
171    (BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)
172 #define BINDING_TABLE_POOL_BLOCK_SIZE (65536)
173 #define SURFACE_STATE_POOL_SIZE     \
174    (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1)
175 #define INSTRUCTION_STATE_POOL_SIZE \
176    (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
177 #define CLIENT_VISIBLE_HEAP_SIZE               \
178    (CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1)
179 
180 /* Allowing different clear colors requires us to perform a depth resolve at
181  * the end of certain render passes. This is because while slow clears store
182  * the clear color in the HiZ buffer, fast clears (without a resolve) don't.
183  * See the PRMs for examples describing when additional resolves would be
184  * necessary. To enable fast clears without requiring extra resolves, we set
185  * the clear value to a globally-defined one. We could allow different values
186  * if the user doesn't expect coherent data during or after a render passes
187  * (VK_ATTACHMENT_STORE_OP_DONT_CARE), but such users (aside from the CTS)
188  * don't seem to exist yet. In almost all Vulkan applications tested thus far,
189  * 1.0f seems to be the only value used. The only application that doesn't set
190  * this value does so through the usage of an seemingly uninitialized clear
191  * value.
192  */
193 #define ANV_HZ_FC_VAL 1.0f
194 
195 /* 3DSTATE_VERTEX_BUFFER supports 33 VBs, we use 2 for base & drawid SGVs */
196 #define MAX_VBS         (33 - 2)
197 
198 /* 3DSTATE_VERTEX_ELEMENTS supports up to 34 VEs, but our backend compiler
199  * only supports the push model of VS inputs, and we only have 128 GRFs,
200  * minus the g0 and g1 payload, which gives us a maximum of 31 VEs.  Plus,
201  * we use two of them for SGVs.
202  */
203 #define MAX_VES         (31 - 2)
204 
205 #define MAX_XFB_BUFFERS  4
206 #define MAX_XFB_STREAMS  4
207 #define MAX_SETS        32
208 #define MAX_RTS          8
209 #define MAX_VIEWPORTS   16
210 #define MAX_SCISSORS    16
211 #define MAX_PUSH_CONSTANTS_SIZE 128
212 #define MAX_DYNAMIC_BUFFERS 16
213 #define MAX_IMAGES 64
214 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
215 #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
216 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
217 /* We need 16 for UBO block reads to work and 32 for push UBOs. However, we
218  * use 64 here to avoid cache issues. This could most likely bring it back to
219  * 32 if we had different virtual addresses for the different views on a given
220  * GEM object.
221  */
222 #define ANV_UBO_ALIGNMENT 64
223 #define ANV_SSBO_ALIGNMENT 4
224 #define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4
225 #define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16
226 #define MAX_SAMPLE_LOCATIONS 16
227 
228 /* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
229  *
230  *    "The surface state model is used when a Binding Table Index (specified
231  *    in the message descriptor) of less than 240 is specified. In this model,
232  *    the Binding Table Index is used to index into the binding table, and the
233  *    binding table entry contains a pointer to the SURFACE_STATE."
234  *
235  * Binding table values above 240 are used for various things in the hardware
236  * such as stateless, stateless with incoherent cache, SLM, and bindless.
237  */
238 #define MAX_BINDING_TABLE_SIZE 240
239 
240 /* The kernel relocation API has a limitation of a 32-bit delta value
241  * applied to the address before it is written which, in spite of it being
242  * unsigned, is treated as signed .  Because of the way that this maps to
243  * the Vulkan API, we cannot handle an offset into a buffer that does not
244  * fit into a signed 32 bits.  The only mechanism we have for dealing with
245  * this at the moment is to limit all VkDeviceMemory objects to a maximum
246  * of 2GB each.  The Vulkan spec allows us to do this:
247  *
248  *    "Some platforms may have a limit on the maximum size of a single
249  *    allocation. For example, certain systems may fail to create
250  *    allocations with a size greater than or equal to 4GB. Such a limit is
251  *    implementation-dependent, and if such a failure occurs then the error
252  *    VK_ERROR_OUT_OF_DEVICE_MEMORY should be returned."
253  */
254 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 31)
255 
256 #define ANV_SVGS_VB_INDEX    MAX_VBS
257 #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
258 
259 /* We reserve this MI ALU register for the purpose of handling predication.
260  * Other code which uses the MI ALU should leave it alone.
261  */
262 #define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
263 
264 /* We reserve this MI ALU register to pass around an offset computed from
265  * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.
266  * Other code which uses the MI ALU should leave it alone.
267  */
268 #define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
269 
270 #define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1)
271 
272 /* For gfx12 we set the streamout buffers using 4 separate commands
273  * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
274  * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
275  * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the
276  * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode.
277  * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for
278  * 3DSTATE_SO_BUFFER_INDEX_0.
279  */
280 #define SO_BUFFER_INDEX_0_CMD 0x60
281 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
282 
283 static inline uint32_t
align_down_npot_u32(uint32_t v,uint32_t a)284 align_down_npot_u32(uint32_t v, uint32_t a)
285 {
286    return v - (v % a);
287 }
288 
289 /** Alignment must be a power of 2. */
290 static inline bool
anv_is_aligned(uintmax_t n,uintmax_t a)291 anv_is_aligned(uintmax_t n, uintmax_t a)
292 {
293    assert(a == (a & -a));
294    return (n & (a - 1)) == 0;
295 }
296 
297 static inline union isl_color_value
vk_to_isl_color(VkClearColorValue color)298 vk_to_isl_color(VkClearColorValue color)
299 {
300    return (union isl_color_value) {
301       .u32 = {
302          color.uint32[0],
303          color.uint32[1],
304          color.uint32[2],
305          color.uint32[3],
306       },
307    };
308 }
309 
310 static inline union isl_color_value
vk_to_isl_color_with_format(VkClearColorValue color,enum isl_format format)311 vk_to_isl_color_with_format(VkClearColorValue color, enum isl_format format)
312 {
313    const struct isl_format_layout *fmtl = isl_format_get_layout(format);
314    union isl_color_value isl_color = { .u32 = {0, } };
315 
316 #define COPY_COLOR_CHANNEL(c, i) \
317    if (fmtl->channels.c.bits) \
318       isl_color.u32[i] = color.uint32[i]
319 
320    COPY_COLOR_CHANNEL(r, 0);
321    COPY_COLOR_CHANNEL(g, 1);
322    COPY_COLOR_CHANNEL(b, 2);
323    COPY_COLOR_CHANNEL(a, 3);
324 
325 #undef COPY_COLOR_CHANNEL
326 
327    return isl_color;
328 }
329 
330 void __anv_perf_warn(struct anv_device *device,
331                      const struct vk_object_base *object,
332                      const char *file, int line, const char *format, ...)
333    anv_printflike(5, 6);
334 
335 /**
336  * Print a FINISHME message, including its source location.
337  */
338 #define anv_finishme(format, ...) \
339    do { \
340       static bool reported = false; \
341       if (!reported) { \
342          mesa_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \
343                     ##__VA_ARGS__); \
344          reported = true; \
345       } \
346    } while (0)
347 
348 /**
349  * Print a perf warning message.  Set INTEL_DEBUG=perf to see these.
350  */
351 #define anv_perf_warn(objects_macro, format, ...)   \
352    do { \
353       static bool reported = false; \
354       if (!reported && INTEL_DEBUG(DEBUG_PERF)) { \
355          __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT,      \
356                   VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,      \
357                   objects_macro, __FILE__, __LINE__,                    \
358                   format, ## __VA_ARGS__);                              \
359          reported = true; \
360       } \
361    } while (0)
362 
363 /* A non-fatal assert.  Useful for debugging. */
364 #if MESA_DEBUG
365 #define anv_assert(x) ({ \
366    if (unlikely(!(x))) \
367       mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
368 })
369 #else
370 #define anv_assert(x)
371 #endif
372 
373 struct anv_bo {
374    const char *name;
375 
376    uint32_t gem_handle;
377 
378    uint32_t refcount;
379 
380    /* Index into the current validation list.  This is used by the
381     * validation list building algorithm to track which buffers are already
382     * in the validation list so that we can ensure uniqueness.
383     */
384    uint32_t exec_obj_index;
385 
386    /* Index for use with util_sparse_array_free_list */
387    uint32_t free_index;
388 
389    /* Last known offset.  This value is provided by the kernel when we
390     * execbuf and is used as the presumed offset for the next bunch of
391     * relocations.
392     */
393    uint64_t offset;
394 
395    /** Size of the buffer not including implicit aux */
396    uint64_t size;
397 
398    /* Map for internally mapped BOs.
399     *
400     * If ANV_BO_ALLOC_MAPPED is set in flags, this is the map for the whole
401     * BO. If ANV_BO_WRAPPER is set in flags, map points to the wrapped BO.
402     */
403    void *map;
404 
405    /** Flags to pass to the kernel through drm_i915_exec_object2::flags */
406    uint32_t flags;
407 
408    /** True if this BO may be shared with other processes */
409    bool is_external:1;
410 
411    /** True if this BO is a wrapper
412     *
413     * When set to true, none of the fields in this BO are meaningful except
414     * for anv_bo::is_wrapper and anv_bo::map which points to the actual BO.
415     * See also anv_bo_unwrap().  Wrapper BOs are not allowed when use_softpin
416     * is set in the physical device.
417     */
418    bool is_wrapper:1;
419 
420    /** See also ANV_BO_ALLOC_FIXED_ADDRESS */
421    bool has_fixed_address:1;
422 
423    /** True if this BO wraps a host pointer */
424    bool from_host_ptr:1;
425 
426    /** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */
427    bool has_client_visible_address:1;
428 };
429 
430 static inline struct anv_bo *
anv_bo_ref(struct anv_bo * bo)431 anv_bo_ref(struct anv_bo *bo)
432 {
433    p_atomic_inc(&bo->refcount);
434    return bo;
435 }
436 
437 static inline struct anv_bo *
anv_bo_unwrap(struct anv_bo * bo)438 anv_bo_unwrap(struct anv_bo *bo)
439 {
440    while (bo->is_wrapper)
441       bo = bo->map;
442    return bo;
443 }
444 
445 static inline bool
anv_bo_is_pinned(struct anv_bo * bo)446 anv_bo_is_pinned(struct anv_bo *bo)
447 {
448 #if defined(GFX_VERx10) && GFX_VERx10 < 80
449    /* Haswell and earlier never use softpin */
450    assert(!(bo->flags & EXEC_OBJECT_PINNED));
451    assert(!bo->has_fixed_address);
452    return false;
453 #else
454    /* If we don't have a GFX_VERx10 #define, we need to look at the BO.  Also,
455     * for GFX version 8, we need to look at the BO because Broadwell softpins
456     * but Cherryview doesn't.
457     */
458    assert((bo->flags & EXEC_OBJECT_PINNED) || !bo->has_fixed_address);
459    return (bo->flags & EXEC_OBJECT_PINNED) != 0;
460 #endif
461 }
462 
463 struct anv_address {
464    struct anv_bo *bo;
465    int64_t offset;
466 };
467 
468 #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
469 
470 static inline struct anv_address
anv_address_from_u64(uint64_t addr_u64)471 anv_address_from_u64(uint64_t addr_u64)
472 {
473    assert(addr_u64 == intel_canonical_address(addr_u64));
474    return (struct anv_address) {
475       .bo = NULL,
476       .offset = addr_u64,
477    };
478 }
479 
480 static inline bool
anv_address_is_null(struct anv_address addr)481 anv_address_is_null(struct anv_address addr)
482 {
483    return addr.bo == NULL && addr.offset == 0;
484 }
485 
486 static inline uint64_t
anv_address_physical(struct anv_address addr)487 anv_address_physical(struct anv_address addr)
488 {
489    if (addr.bo && anv_bo_is_pinned(addr.bo)) {
490       return intel_canonical_address(addr.bo->offset + addr.offset);
491    } else {
492       return intel_canonical_address(addr.offset);
493    }
494 }
495 
496 static inline struct u_trace_address
anv_address_utrace(struct anv_address addr)497 anv_address_utrace(struct anv_address addr)
498 {
499    return (struct u_trace_address) {
500       .bo = addr.bo,
501       .offset = addr.offset,
502    };
503 }
504 
505 static inline struct anv_address
anv_address_add(struct anv_address addr,uint64_t offset)506 anv_address_add(struct anv_address addr, uint64_t offset)
507 {
508    addr.offset += offset;
509    return addr;
510 }
511 
512 /* Represents a lock-free linked list of "free" things.  This is used by
513  * both the block pool and the state pools.  Unfortunately, in order to
514  * solve the ABA problem, we can't use a single uint32_t head.
515  */
516 union anv_free_list {
517    struct {
518       uint32_t offset;
519 
520       /* A simple count that is incremented every time the head changes. */
521       uint32_t count;
522    };
523    /* Make sure it's aligned to 64 bits. This will make atomic operations
524     * faster on 32 bit platforms.
525     */
526    alignas(8) uint64_t u64;
527 };
528 
529 #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
530 
531 struct anv_block_state {
532    union {
533       struct {
534          uint32_t next;
535          uint32_t end;
536       };
537       /* Make sure it's aligned to 64 bits. This will make atomic operations
538        * faster on 32 bit platforms.
539        */
540       alignas(8) uint64_t u64;
541    };
542 };
543 
544 #define anv_block_pool_foreach_bo(bo, pool)  \
545    for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \
546         _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \
547         _pp_bo++)
548 
549 #define ANV_MAX_BLOCK_POOL_BOS 20
550 
551 struct anv_block_pool {
552    const char *name;
553 
554    struct anv_device *device;
555    bool use_relocations;
556 
557    /* Wrapper BO for use in relocation lists.  This BO is simply a wrapper
558     * around the actual BO so that we grow the pool after the wrapper BO has
559     * been put in a relocation list.  This is only used in the non-softpin
560     * case.
561     */
562    struct anv_bo wrapper_bo;
563 
564    struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS];
565    struct anv_bo *bo;
566    uint32_t nbos;
567 
568    uint64_t size;
569 
570    /* The address where the start of the pool is pinned. The various bos that
571     * are created as the pool grows will have addresses in the range
572     * [start_address, start_address + BLOCK_POOL_MEMFD_SIZE).
573     */
574    uint64_t start_address;
575 
576    /* The offset from the start of the bo to the "center" of the block
577     * pool.  Pointers to allocated blocks are given by
578     * bo.map + center_bo_offset + offsets.
579     */
580    uint32_t center_bo_offset;
581 
582    /* Current memory map of the block pool.  This pointer may or may not
583     * point to the actual beginning of the block pool memory.  If
584     * anv_block_pool_alloc_back has ever been called, then this pointer
585     * will point to the "center" position of the buffer and all offsets
586     * (negative or positive) given out by the block pool alloc functions
587     * will be valid relative to this pointer.
588     *
589     * In particular, map == bo.map + center_offset
590     *
591     * DO NOT access this pointer directly. Use anv_block_pool_map() instead,
592     * since it will handle the softpin case as well, where this points to NULL.
593     */
594    void *map;
595    int fd;
596 
597    /**
598     * Array of mmaps and gem handles owned by the block pool, reclaimed when
599     * the block pool is destroyed.
600     */
601    struct u_vector mmap_cleanups;
602 
603    struct anv_block_state state;
604 
605    struct anv_block_state back_state;
606 };
607 
608 /* Block pools are backed by a fixed-size 1GB memfd */
609 #define BLOCK_POOL_MEMFD_SIZE (1ul << 30)
610 
611 /* The center of the block pool is also the middle of the memfd.  This may
612  * change in the future if we decide differently for some reason.
613  */
614 #define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2)
615 
616 static inline uint32_t
anv_block_pool_size(struct anv_block_pool * pool)617 anv_block_pool_size(struct anv_block_pool *pool)
618 {
619    return pool->state.end + pool->back_state.end;
620 }
621 
622 struct anv_state {
623    int32_t offset;
624    uint32_t alloc_size;
625    void *map;
626    uint32_t idx;
627 };
628 
629 #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
630 
631 struct anv_fixed_size_state_pool {
632    union anv_free_list free_list;
633    struct anv_block_state block;
634 };
635 
636 #define ANV_MIN_STATE_SIZE_LOG2 6
637 #define ANV_MAX_STATE_SIZE_LOG2 21
638 
639 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
640 
641 struct anv_free_entry {
642    uint32_t next;
643    struct anv_state state;
644 };
645 
646 struct anv_state_table {
647    struct anv_device *device;
648    int fd;
649    struct anv_free_entry *map;
650    uint32_t size;
651    struct anv_block_state state;
652    struct u_vector cleanups;
653 };
654 
655 struct anv_state_pool {
656    struct anv_block_pool block_pool;
657 
658    /* Offset into the relevant state base address where the state pool starts
659     * allocating memory.
660     */
661    int32_t start_offset;
662 
663    struct anv_state_table table;
664 
665    /* The size of blocks which will be allocated from the block pool */
666    uint32_t block_size;
667 
668    /** Free list for "back" allocations */
669    union anv_free_list back_alloc_free_list;
670 
671    struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
672 };
673 
674 struct anv_state_reserved_pool {
675    struct anv_state_pool *pool;
676    union anv_free_list reserved_blocks;
677    uint32_t count;
678 };
679 
680 struct anv_state_stream {
681    struct anv_state_pool *state_pool;
682 
683    /* The size of blocks to allocate from the state pool */
684    uint32_t block_size;
685 
686    /* Current block we're allocating from */
687    struct anv_state block;
688 
689    /* Offset into the current block at which to allocate the next state */
690    uint32_t next;
691 
692    /* List of all blocks allocated from this pool */
693    struct util_dynarray all_blocks;
694 };
695 
696 /* The block_pool functions exported for testing only.  The block pool should
697  * only be used via a state pool (see below).
698  */
699 VkResult anv_block_pool_init(struct anv_block_pool *pool,
700                              struct anv_device *device,
701                              const char *name,
702                              uint64_t start_address,
703                              uint32_t initial_size);
704 void anv_block_pool_finish(struct anv_block_pool *pool);
705 int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
706                              uint32_t block_size, uint32_t *padding);
707 int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
708                                   uint32_t block_size);
709 void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t
710 size);
711 
712 VkResult anv_state_pool_init(struct anv_state_pool *pool,
713                              struct anv_device *device,
714                              const char *name,
715                              uint64_t base_address,
716                              int32_t start_offset,
717                              uint32_t block_size);
718 void anv_state_pool_finish(struct anv_state_pool *pool);
719 struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
720                                       uint32_t state_size, uint32_t alignment);
721 struct anv_state anv_state_pool_alloc_back(struct anv_state_pool *pool);
722 void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state);
723 void anv_state_stream_init(struct anv_state_stream *stream,
724                            struct anv_state_pool *state_pool,
725                            uint32_t block_size);
726 void anv_state_stream_finish(struct anv_state_stream *stream);
727 struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
728                                         uint32_t size, uint32_t alignment);
729 
730 void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool,
731                                       struct anv_state_pool *parent,
732                                       uint32_t count, uint32_t size,
733                                       uint32_t alignment);
734 void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool);
735 struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool);
736 void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,
737                                   struct anv_state state);
738 
739 VkResult anv_state_table_init(struct anv_state_table *table,
740                              struct anv_device *device,
741                              uint32_t initial_entries);
742 void anv_state_table_finish(struct anv_state_table *table);
743 VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
744                              uint32_t count);
745 void anv_free_list_push(union anv_free_list *list,
746                         struct anv_state_table *table,
747                         uint32_t idx, uint32_t count);
748 struct anv_state* anv_free_list_pop(union anv_free_list *list,
749                                     struct anv_state_table *table);
750 
751 
752 static inline struct anv_state *
anv_state_table_get(struct anv_state_table * table,uint32_t idx)753 anv_state_table_get(struct anv_state_table *table, uint32_t idx)
754 {
755    return &table->map[idx].state;
756 }
757 /**
758  * Implements a pool of re-usable BOs.  The interface is identical to that
759  * of block_pool except that each block is its own BO.
760  */
761 struct anv_bo_pool {
762    const char *name;
763 
764    struct anv_device *device;
765 
766    struct util_sparse_array_free_list free_list[16];
767 };
768 
769 void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
770                       const char *name);
771 void anv_bo_pool_finish(struct anv_bo_pool *pool);
772 VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
773                            struct anv_bo **bo_out);
774 void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
775 
776 struct anv_scratch_pool {
777    /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
778    struct anv_bo *bos[16][MESA_SHADER_STAGES];
779 };
780 
781 void anv_scratch_pool_init(struct anv_device *device,
782                            struct anv_scratch_pool *pool);
783 void anv_scratch_pool_finish(struct anv_device *device,
784                              struct anv_scratch_pool *pool);
785 struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
786                                       struct anv_scratch_pool *pool,
787                                       gl_shader_stage stage,
788                                       unsigned per_thread_scratch);
789 
790 /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
791 struct anv_bo_cache {
792    struct util_sparse_array bo_map;
793    pthread_mutex_t mutex;
794 };
795 
796 VkResult anv_bo_cache_init(struct anv_bo_cache *cache,
797                            struct anv_device *device);
798 void anv_bo_cache_finish(struct anv_bo_cache *cache);
799 
800 struct anv_queue_family {
801    /* Standard bits passed on to the client */
802    VkQueueFlags   queueFlags;
803    uint32_t       queueCount;
804 
805    /* Driver internal information */
806    enum intel_engine_class engine_class;
807 };
808 
809 #define ANV_MAX_QUEUE_FAMILIES 3
810 
811 struct anv_memory_type {
812    /* Standard bits passed on to the client */
813    VkMemoryPropertyFlags   propertyFlags;
814    uint32_t                heapIndex;
815 };
816 
817 struct anv_memory_heap {
818    /* Standard bits passed on to the client */
819    VkDeviceSize      size;
820    VkMemoryHeapFlags flags;
821 
822    /** Driver-internal book-keeping.
823     *
824     * Align it to 64 bits to make atomic operations faster on 32 bit platforms.
825     */
826    alignas(8) VkDeviceSize used;
827 };
828 
829 struct anv_memregion {
830    uint64_t size;
831    uint64_t available;
832 };
833 
834 enum anv_timestamp_capture_type {
835     ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE,
836     ANV_TIMESTAMP_CAPTURE_END_OF_PIPE,
837     ANV_TIMESTAMP_CAPTURE_AT_CS_STALL,
838 };
839 
840 struct anv_physical_device {
841     struct vk_physical_device                   vk;
842 
843     /* Link in anv_instance::physical_devices */
844     struct list_head                            link;
845 
846     struct anv_instance *                       instance;
847     char                                        path[20];
848     struct intel_device_info                      info;
849     bool                                        supports_48bit_addresses;
850     struct elk_compiler *                       compiler;
851     struct isl_device                           isl_dev;
852     struct intel_perf_config *                    perf;
853    /* True if hardware support is incomplete/alpha */
854     bool                                        is_alpha;
855     /*
856      * Number of commands required to implement a performance query begin +
857      * end.
858      */
859     uint32_t                                    n_perf_query_commands;
860     int                                         cmd_parser_version;
861     bool                                        has_exec_async;
862     bool                                        has_exec_capture;
863     int                                         max_context_priority;
864     uint64_t                                    gtt_size;
865 
866     bool                                        use_relocations;
867     bool                                        use_softpin;
868     bool                                        always_use_bindless;
869     bool                                        use_call_secondary;
870 
871     /** True if we can access buffers using A64 messages */
872     bool                                        has_a64_buffer_access;
873     /** True if we can use bindless access for samplers */
874     bool                                        has_bindless_samplers;
875     /** True if we can use timeline semaphores through execbuf */
876     bool                                        has_exec_timeline;
877 
878     /** True if we can read the GPU timestamp register
879      *
880      * When running in a virtual context, the timestamp register is unreadable
881      * on Gfx12+.
882      */
883     bool                                        has_reg_timestamp;
884 
885     bool                                        always_flush_cache;
886 
887     struct {
888       uint32_t                                  family_count;
889       struct anv_queue_family                   families[ANV_MAX_QUEUE_FAMILIES];
890     } queue;
891 
892     struct {
893       uint32_t                                  type_count;
894       struct anv_memory_type                    types[VK_MAX_MEMORY_TYPES];
895       uint32_t                                  heap_count;
896       struct anv_memory_heap                    heaps[VK_MAX_MEMORY_HEAPS];
897       bool                                      need_flush;
898     } memory;
899 
900     struct anv_memregion                        sys;
901     uint8_t                                     driver_build_sha1[20];
902     uint8_t                                     pipeline_cache_uuid[VK_UUID_SIZE];
903     uint8_t                                     driver_uuid[VK_UUID_SIZE];
904     uint8_t                                     device_uuid[VK_UUID_SIZE];
905 
906     struct vk_sync_type                         sync_syncobj_type;
907     struct vk_sync_timeline_type                sync_timeline_type;
908     const struct vk_sync_type *                 sync_types[4];
909 
910     struct wsi_device                       wsi_device;
911     int                                         local_fd;
912     bool                                        has_local;
913     int64_t                                     local_major;
914     int64_t                                     local_minor;
915     int                                         master_fd;
916     bool                                        has_master;
917     int64_t                                     master_major;
918     int64_t                                     master_minor;
919     struct intel_query_engine_info *            engine_info;
920 
921     void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *,
922                                struct anv_address, enum anv_timestamp_capture_type);
923     void (*cmd_capture_data)(struct anv_batch *, struct anv_device *,
924                              struct anv_address, struct anv_address,
925                              uint32_t);
926     struct intel_measure_device                 measure_device;
927 };
928 
929 struct anv_instance {
930     struct vk_instance                          vk;
931 
932     struct driOptionCache                       dri_options;
933     struct driOptionCache                       available_dri_options;
934 
935     /**
936      * Workarounds for game bugs.
937      */
938     uint8_t                                     assume_full_subgroups;
939     bool                                        limit_trig_input_range;
940     bool                                        sample_mask_out_opengl_behaviour;
941     float                                       lower_depth_range_rate;
942     bool                                        report_vk_1_3;
943 
944     /* HW workarounds */
945     bool                                        no_16bit;
946 };
947 
948 VkResult anv_init_wsi(struct anv_physical_device *physical_device);
949 void anv_finish_wsi(struct anv_physical_device *physical_device);
950 
951 struct anv_queue {
952    struct vk_queue                           vk;
953 
954    struct anv_device *                       device;
955 
956    const struct anv_queue_family *           family;
957 
958    uint32_t                                  exec_flags;
959 
960    /** Synchronization object for debug purposes (DEBUG_SYNC) */
961    struct vk_sync                           *sync;
962 
963    struct intel_ds_queue                     ds;
964 };
965 
966 struct nir_xfb_info;
967 struct anv_pipeline_bind_map;
968 
969 extern const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2];
970 
971 struct anv_shader_bin *
972 anv_device_search_for_kernel(struct anv_device *device,
973                              struct vk_pipeline_cache *cache,
974                              const void *key_data, uint32_t key_size,
975                              bool *user_cache_bit);
976 
977 struct anv_shader_bin *
978 anv_device_upload_kernel(struct anv_device *device,
979                          struct vk_pipeline_cache *cache,
980                          gl_shader_stage stage,
981                          const void *key_data, uint32_t key_size,
982                          const void *kernel_data, uint32_t kernel_size,
983                          const struct elk_stage_prog_data *prog_data,
984                          uint32_t prog_data_size,
985                          const struct elk_compile_stats *stats,
986                          uint32_t num_stats,
987                          const struct nir_xfb_info *xfb_info,
988                          const struct anv_pipeline_bind_map *bind_map);
989 
990 struct nir_shader;
991 struct nir_shader_compiler_options;
992 
993 struct nir_shader *
994 anv_device_search_for_nir(struct anv_device *device,
995                           struct vk_pipeline_cache *cache,
996                           const struct nir_shader_compiler_options *nir_options,
997                           unsigned char sha1_key[20],
998                           void *mem_ctx);
999 
1000 void
1001 anv_device_upload_nir(struct anv_device *device,
1002                       struct vk_pipeline_cache *cache,
1003                       const struct nir_shader *nir,
1004                       unsigned char sha1_key[20]);
1005 
1006 struct anv_device {
1007     struct vk_device                            vk;
1008 
1009     struct anv_physical_device *                physical;
1010     const struct intel_device_info *            info;
1011     struct isl_device                           isl_dev;
1012     uint32_t                                    context_id;
1013     int                                         fd;
1014     bool                                        can_chain_batches;
1015 
1016     pthread_mutex_t                             vma_mutex;
1017     struct util_vma_heap                        vma_lo;
1018     struct util_vma_heap                        vma_cva;
1019     struct util_vma_heap                        vma_hi;
1020 
1021     /** List of all anv_device_memory objects */
1022     struct list_head                            memory_objects;
1023 
1024     struct anv_bo_pool                          batch_bo_pool;
1025     struct anv_bo_pool                          utrace_bo_pool;
1026 
1027     struct anv_bo_cache                         bo_cache;
1028 
1029     struct anv_state_pool                       general_state_pool;
1030     struct anv_state_pool                       dynamic_state_pool;
1031     struct anv_state_pool                       instruction_state_pool;
1032     struct anv_state_pool                       binding_table_pool;
1033     struct anv_state_pool                       surface_state_pool;
1034 
1035     struct anv_state_reserved_pool              custom_border_colors;
1036 
1037     /** BO used for various workarounds
1038      *
1039      * There are a number of workarounds on our hardware which require writing
1040      * data somewhere and it doesn't really matter where.  For that, we use
1041      * this BO and just write to the first dword or so.
1042      *
1043      * We also need to be able to handle NULL buffers bound as pushed UBOs.
1044      * For that, we use the high bytes (>= 1024) of the workaround BO.
1045      */
1046     struct anv_bo *                             workaround_bo;
1047     struct anv_address                          workaround_address;
1048 
1049     /**
1050      * Workarounds for game bugs.
1051      */
1052     struct {
1053        struct set *                             doom64_images;
1054     } workarounds;
1055 
1056     struct anv_bo *                             trivial_batch_bo;
1057     struct anv_state                            null_surface_state;
1058 
1059     struct vk_pipeline_cache *                  default_pipeline_cache;
1060     struct vk_pipeline_cache *                  internal_cache;
1061     struct blorp_context                        blorp;
1062 
1063     struct anv_state                            border_colors;
1064 
1065     struct anv_state                            slice_hash;
1066 
1067     uint32_t                                    queue_count;
1068     struct anv_queue  *                         queues;
1069 
1070     struct anv_scratch_pool                     scratch_pool;
1071 
1072     bool                                        robust_buffer_access;
1073 
1074     pthread_mutex_t                             mutex;
1075     pthread_cond_t                              queue_submit;
1076 
1077     struct intel_batch_decode_ctx               decoder_ctx;
1078     /*
1079      * When decoding a anv_cmd_buffer, we might need to search for BOs through
1080      * the cmd_buffer's list.
1081      */
1082     struct anv_cmd_buffer                      *cmd_buffer_being_decoded;
1083 
1084     int                                         perf_fd; /* -1 if no opened */
1085     uint64_t                                    perf_metric; /* 0 if unset */
1086 
1087     const struct intel_l3_config                *l3_config;
1088 
1089     struct intel_debug_block_frame              *debug_frame_desc;
1090 
1091     struct intel_ds_device                       ds;
1092 };
1093 
1094 static inline bool
anv_use_relocations(const struct anv_physical_device * pdevice)1095 anv_use_relocations(const struct anv_physical_device *pdevice)
1096 {
1097 #if defined(GFX_VERx10) && GFX_VERx10 < 80
1098    /* Haswell and earlier never use softpin */
1099    assert(pdevice->use_relocations);
1100    return true;
1101 #else
1102    /* If we don't have a GFX_VERx10 #define, we need to look at the physical
1103     * device.  Also, for GFX version 8, we need to look at the physical
1104     * device because Broadwell softpins but Cherryview doesn't.
1105     */
1106    return pdevice->use_relocations;
1107 #endif
1108 }
1109 
1110 static inline struct anv_state_pool *
anv_binding_table_pool(struct anv_device * device)1111 anv_binding_table_pool(struct anv_device *device)
1112 {
1113    if (anv_use_relocations(device->physical))
1114       return &device->surface_state_pool;
1115    else
1116       return &device->binding_table_pool;
1117 }
1118 
1119 static inline struct anv_state
anv_binding_table_pool_alloc(struct anv_device * device)1120 anv_binding_table_pool_alloc(struct anv_device *device)
1121 {
1122    if (anv_use_relocations(device->physical))
1123       return anv_state_pool_alloc_back(&device->surface_state_pool);
1124    else
1125       return anv_state_pool_alloc(&device->binding_table_pool,
1126                                   device->binding_table_pool.block_size, 0);
1127 }
1128 
1129 static inline void
anv_binding_table_pool_free(struct anv_device * device,struct anv_state state)1130 anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) {
1131    anv_state_pool_free(anv_binding_table_pool(device), state);
1132 }
1133 
1134 static inline uint32_t
anv_mocs(const struct anv_device * device,const struct anv_bo * bo,isl_surf_usage_flags_t usage)1135 anv_mocs(const struct anv_device *device,
1136          const struct anv_bo *bo,
1137          isl_surf_usage_flags_t usage)
1138 {
1139    return isl_mocs(&device->isl_dev, usage, bo && bo->is_external);
1140 }
1141 
1142 void anv_device_init_blorp(struct anv_device *device);
1143 void anv_device_finish_blorp(struct anv_device *device);
1144 
1145 enum anv_bo_alloc_flags {
1146    /** Specifies that the BO must have a 32-bit address
1147     *
1148     * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
1149     */
1150    ANV_BO_ALLOC_32BIT_ADDRESS =  (1 << 0),
1151 
1152    /** Specifies that the BO may be shared externally */
1153    ANV_BO_ALLOC_EXTERNAL =       (1 << 1),
1154 
1155    /** Specifies that the BO should be mapped */
1156    ANV_BO_ALLOC_MAPPED =         (1 << 2),
1157 
1158    /** Specifies that the BO should be snooped so we get coherency */
1159    ANV_BO_ALLOC_SNOOPED =        (1 << 3),
1160 
1161    /** Specifies that the BO should be captured in error states */
1162    ANV_BO_ALLOC_CAPTURE =        (1 << 4),
1163 
1164    /** Specifies that the BO will have an address assigned by the caller
1165     *
1166     * Such BOs do not exist in any VMA heap.
1167     */
1168    ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
1169 
1170    /** Enables implicit synchronization on the BO
1171     *
1172     * This is the opposite of EXEC_OBJECT_ASYNC.
1173     */
1174    ANV_BO_ALLOC_IMPLICIT_SYNC =  (1 << 6),
1175 
1176    /** Enables implicit synchronization on the BO
1177     *
1178     * This is equivalent to EXEC_OBJECT_WRITE.
1179     */
1180    ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
1181 
1182    /** Has an address which is visible to the client */
1183    ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
1184 };
1185 
1186 VkResult anv_device_alloc_bo(struct anv_device *device,
1187                              const char *name, uint64_t size,
1188                              enum anv_bo_alloc_flags alloc_flags,
1189                              uint64_t explicit_address,
1190                              struct anv_bo **bo);
1191 VkResult anv_device_map_bo(struct anv_device *device,
1192                            struct anv_bo *bo,
1193                            uint64_t offset,
1194                            size_t size,
1195                            uint32_t gem_flags,
1196                            void **map_out);
1197 void anv_device_unmap_bo(struct anv_device *device,
1198                          struct anv_bo *bo,
1199                          void *map, size_t map_size);
1200 VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
1201                                             void *host_ptr, uint32_t size,
1202                                             enum anv_bo_alloc_flags alloc_flags,
1203                                             uint64_t client_address,
1204                                             struct anv_bo **bo_out);
1205 VkResult anv_device_import_bo(struct anv_device *device, int fd,
1206                               enum anv_bo_alloc_flags alloc_flags,
1207                               uint64_t client_address,
1208                               struct anv_bo **bo);
1209 VkResult anv_device_export_bo(struct anv_device *device,
1210                               struct anv_bo *bo, int *fd_out);
1211 VkResult anv_device_get_bo_tiling(struct anv_device *device,
1212                                   struct anv_bo *bo,
1213                                   enum isl_tiling *tiling_out);
1214 VkResult anv_device_set_bo_tiling(struct anv_device *device,
1215                                   struct anv_bo *bo,
1216                                   uint32_t row_pitch_B,
1217                                   enum isl_tiling tiling);
1218 void anv_device_release_bo(struct anv_device *device,
1219                            struct anv_bo *bo);
1220 
anv_device_set_physical(struct anv_device * device,struct anv_physical_device * physical_device)1221 static inline void anv_device_set_physical(struct anv_device *device,
1222                                            struct anv_physical_device *physical_device)
1223 {
1224    device->physical = physical_device;
1225    device->info = &physical_device->info;
1226    device->isl_dev = physical_device->isl_dev;
1227 }
1228 
1229 static inline struct anv_bo *
anv_device_lookup_bo(struct anv_device * device,uint32_t gem_handle)1230 anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
1231 {
1232    return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);
1233 }
1234 
1235 VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
1236                          int64_t timeout);
1237 
1238 VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue,
1239                         uint32_t exec_flags,
1240                         const VkDeviceQueueCreateInfo *pCreateInfo,
1241                         uint32_t index_in_family);
1242 void anv_queue_finish(struct anv_queue *queue);
1243 
1244 VkResult anv_queue_submit(struct vk_queue *queue,
1245                           struct vk_queue_submit *submit);
1246 VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
1247                                        struct anv_batch *batch);
1248 
1249 void* anv_gem_mmap(struct anv_device *device,
1250                    uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);
1251 void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size);
1252 uint32_t anv_gem_create(struct anv_device *device, uint64_t size);
1253 void anv_gem_close(struct anv_device *device, uint32_t gem_handle);
1254 uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
1255 int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
1256 int anv_gem_execbuffer(struct anv_device *device,
1257                        struct drm_i915_gem_execbuffer2 *execbuf);
1258 int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
1259                        uint32_t stride, uint32_t tiling);
1260 bool anv_gem_has_context_priority(int fd, int priority);
1261 int anv_gem_set_context_param(int fd, uint32_t context, uint32_t param,
1262                               uint64_t value);
1263 int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
1264 int anv_gem_context_get_reset_stats(int fd, int context,
1265                                     uint32_t *active, uint32_t *pending);
1266 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
1267 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
1268 int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
1269 
1270 uint64_t anv_vma_alloc(struct anv_device *device,
1271                        uint64_t size, uint64_t align,
1272                        enum anv_bo_alloc_flags alloc_flags,
1273                        uint64_t client_address);
1274 void anv_vma_free(struct anv_device *device,
1275                   uint64_t address, uint64_t size);
1276 
1277 struct anv_reloc_list {
1278    uint32_t                                     num_relocs;
1279    uint32_t                                     array_length;
1280    struct drm_i915_gem_relocation_entry *       relocs;
1281    struct anv_bo **                             reloc_bos;
1282    uint32_t                                     dep_words;
1283    BITSET_WORD *                                deps;
1284 };
1285 
1286 VkResult anv_reloc_list_init(struct anv_reloc_list *list,
1287                              const VkAllocationCallbacks *alloc);
1288 void anv_reloc_list_finish(struct anv_reloc_list *list,
1289                            const VkAllocationCallbacks *alloc);
1290 
1291 VkResult anv_reloc_list_add(struct anv_reloc_list *list,
1292                             const VkAllocationCallbacks *alloc,
1293                             uint32_t offset, struct anv_bo *target_bo,
1294                             uint32_t delta, uint64_t *address_u64_out);
1295 
1296 VkResult anv_reloc_list_add_bo(struct anv_reloc_list *list,
1297                                const VkAllocationCallbacks *alloc,
1298                                struct anv_bo *target_bo);
1299 
1300 struct anv_batch_bo {
1301    /* Link in the anv_cmd_buffer.owned_batch_bos list */
1302    struct list_head                             link;
1303 
1304    struct anv_bo *                              bo;
1305 
1306    /* Bytes actually consumed in this batch BO */
1307    uint32_t                                     length;
1308 
1309    /* When this batch BO is used as part of a primary batch buffer, this
1310     * tracked whether it is chained to another primary batch buffer.
1311     *
1312     * If this is the case, the relocation list's last entry points the
1313     * location of the MI_BATCH_BUFFER_START chaining to the next batch.
1314     */
1315    bool                                         chained;
1316 
1317    struct anv_reloc_list                        relocs;
1318 };
1319 
1320 struct anv_batch {
1321    const VkAllocationCallbacks *                alloc;
1322 
1323    struct anv_address                           start_addr;
1324 
1325    void *                                       start;
1326    void *                                       end;
1327    void *                                       next;
1328 
1329    struct anv_reloc_list *                      relocs;
1330 
1331    /* This callback is called (with the associated user data) in the event
1332     * that the batch runs out of space.
1333     */
1334    VkResult (*extend_cb)(struct anv_batch *, void *);
1335    void *                                       user_data;
1336 
1337    /**
1338     * Current error status of the command buffer. Used to track inconsistent
1339     * or incomplete command buffer states that are the consequence of run-time
1340     * errors such as out of memory scenarios. We want to track this in the
1341     * batch because the command buffer object is not visible to some parts
1342     * of the driver.
1343     */
1344    VkResult                                     status;
1345 };
1346 
1347 void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
1348 void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
1349 struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
1350 
1351 static inline void
anv_batch_set_storage(struct anv_batch * batch,struct anv_address addr,void * map,size_t size)1352 anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr,
1353                       void *map, size_t size)
1354 {
1355    batch->start_addr = addr;
1356    batch->next = batch->start = map;
1357    batch->end = map + size;
1358 }
1359 
1360 static inline VkResult
anv_batch_set_error(struct anv_batch * batch,VkResult error)1361 anv_batch_set_error(struct anv_batch *batch, VkResult error)
1362 {
1363    assert(error != VK_SUCCESS);
1364    if (batch->status == VK_SUCCESS)
1365       batch->status = error;
1366    return batch->status;
1367 }
1368 
1369 static inline bool
anv_batch_has_error(struct anv_batch * batch)1370 anv_batch_has_error(struct anv_batch *batch)
1371 {
1372    return batch->status != VK_SUCCESS;
1373 }
1374 
1375 static inline uint64_t
anv_batch_emit_reloc(struct anv_batch * batch,void * location,struct anv_bo * bo,uint32_t delta)1376 anv_batch_emit_reloc(struct anv_batch *batch,
1377                      void *location, struct anv_bo *bo, uint32_t delta)
1378 {
1379    uint64_t address_u64 = 0;
1380    VkResult result = anv_reloc_list_add(batch->relocs, batch->alloc,
1381                                         location - batch->start, bo, delta,
1382                                         &address_u64);
1383    if (unlikely(result != VK_SUCCESS)) {
1384       anv_batch_set_error(batch, result);
1385       return 0;
1386    }
1387 
1388    return address_u64;
1389 }
1390 
1391 static inline void
write_reloc(const struct anv_device * device,void * p,uint64_t v,bool flush)1392 write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)
1393 {
1394    UNUSED unsigned reloc_size = 0;
1395    if (device->info->ver >= 8) {
1396       reloc_size = sizeof(uint64_t);
1397       *(uint64_t *)p = intel_canonical_address(v);
1398    } else {
1399       reloc_size = sizeof(uint32_t);
1400       *(uint32_t *)p = v;
1401    }
1402 
1403 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
1404    if (flush && device->physical->memory.need_flush)
1405       intel_flush_range(p, reloc_size);
1406 #endif
1407 }
1408 
1409 static inline uint64_t
_anv_combine_address(struct anv_batch * batch,void * location,const struct anv_address address,uint32_t delta)1410 _anv_combine_address(struct anv_batch *batch, void *location,
1411                      const struct anv_address address, uint32_t delta)
1412 {
1413    if (address.bo == NULL) {
1414       return address.offset + delta;
1415    } else if (batch == NULL) {
1416       assert(anv_bo_is_pinned(address.bo));
1417       return anv_address_physical(anv_address_add(address, delta));
1418    } else {
1419       assert(batch->start <= location && location < batch->end);
1420       /* i915 relocations are signed. */
1421       assert(INT32_MIN <= address.offset && address.offset <= INT32_MAX);
1422       return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta);
1423    }
1424 }
1425 
1426 #define __gen_address_type struct anv_address
1427 #define __gen_user_data struct anv_batch
1428 #define __gen_combine_address _anv_combine_address
1429 
1430 /* Wrapper macros needed to work around preprocessor argument issues.  In
1431  * particular, arguments don't get pre-evaluated if they are concatenated.
1432  * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the
1433  * GENX macro won't get evaluated if the emit macro contains "cmd ## foo".
1434  * We can work around this easily enough with these helpers.
1435  */
1436 #define __anv_cmd_length(cmd) cmd ## _length
1437 #define __anv_cmd_length_bias(cmd) cmd ## _length_bias
1438 #define __anv_cmd_header(cmd) cmd ## _header
1439 #define __anv_cmd_pack(cmd) cmd ## _pack
1440 #define __anv_reg_num(reg) reg ## _num
1441 
1442 #define anv_pack_struct(dst, struc, ...) do {                              \
1443       struct struc __template = {                                          \
1444          __VA_ARGS__                                                       \
1445       };                                                                   \
1446       __anv_cmd_pack(struc)(NULL, dst, &__template);                       \
1447       VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \
1448    } while (0)
1449 
1450 #define anv_batch_emitn(batch, n, cmd, ...) ({             \
1451       void *__dst = anv_batch_emit_dwords(batch, n);       \
1452       if (__dst) {                                         \
1453          struct cmd __template = {                         \
1454             __anv_cmd_header(cmd),                         \
1455            .DWordLength = n - __anv_cmd_length_bias(cmd),  \
1456             __VA_ARGS__                                    \
1457          };                                                \
1458          __anv_cmd_pack(cmd)(batch, __dst, &__template);   \
1459       }                                                    \
1460       __dst;                                               \
1461    })
1462 
1463 #define anv_batch_emit_merge(batch, dwords0, dwords1)                   \
1464    do {                                                                 \
1465       uint32_t *dw;                                                     \
1466                                                                         \
1467       STATIC_ASSERT(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1));        \
1468       dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0));         \
1469       if (!dw)                                                          \
1470          break;                                                         \
1471       for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++)                \
1472          dw[i] = (dwords0)[i] | (dwords1)[i];                           \
1473       VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\
1474    } while (0)
1475 
1476 #define anv_batch_emit(batch, cmd, name)                            \
1477    for (struct cmd name = { __anv_cmd_header(cmd) },                    \
1478         *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd));    \
1479         __builtin_expect(_dst != NULL, 1);                              \
1480         ({ __anv_cmd_pack(cmd)(batch, _dst, &name);                     \
1481            VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
1482            _dst = NULL;                                                 \
1483          }))
1484 
1485 #define anv_batch_write_reg(batch, reg, name)                           \
1486    for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL;  \
1487         ({                                                              \
1488             uint32_t _dw[__anv_cmd_length(reg)];                        \
1489             __anv_cmd_pack(reg)(NULL, _dw, &name);                      \
1490             for (unsigned i = 0; i < __anv_cmd_length(reg); i++) {      \
1491                anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \
1492                   lri.RegisterOffset   = __anv_reg_num(reg);            \
1493                   lri.DataDWord        = _dw[i];                        \
1494                }                                                        \
1495             }                                                           \
1496            _cont = NULL;                                                \
1497          }))
1498 
1499 /* #define __gen_get_batch_dwords anv_batch_emit_dwords */
1500 /* #define __gen_get_batch_address anv_batch_address */
1501 /* #define __gen_address_value anv_address_physical */
1502 /* #define __gen_address_offset anv_address_add */
1503 
1504 struct anv_device_memory {
1505    struct vk_object_base                        base;
1506 
1507    struct list_head                             link;
1508 
1509    struct anv_bo *                              bo;
1510    const struct anv_memory_type *               type;
1511 
1512    void *                                       map;
1513    size_t                                       map_size;
1514 
1515    /* The map, from the user PoV is map + map_delta */
1516    uint64_t                                     map_delta;
1517 
1518    /* If set, we are holding reference to AHardwareBuffer
1519     * which we must release when memory is freed.
1520     */
1521    struct AHardwareBuffer *                     ahw;
1522 
1523    /* If set, this memory comes from a host pointer. */
1524    void *                                       host_ptr;
1525 };
1526 
1527 /**
1528  * Header for Vertex URB Entry (VUE)
1529  */
1530 struct anv_vue_header {
1531    uint32_t Reserved;
1532    uint32_t RTAIndex; /* RenderTargetArrayIndex */
1533    uint32_t ViewportIndex;
1534    float PointWidth;
1535 };
1536 
1537 /** Struct representing a sampled image descriptor
1538  *
1539  * This descriptor layout is used for sampled images, bare sampler, and
1540  * combined image/sampler descriptors.
1541  */
1542 struct anv_sampled_image_descriptor {
1543    /** Bindless image handle
1544     *
1545     * This is expected to already be shifted such that the 20-bit
1546     * SURFACE_STATE table index is in the top 20 bits.
1547     */
1548    uint32_t image;
1549 
1550    /** Bindless sampler handle
1551     *
1552     * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
1553     * to the dynamic state base address.
1554     */
1555    uint32_t sampler;
1556 };
1557 
1558 struct anv_texture_swizzle_descriptor {
1559    /** Texture swizzle
1560     *
1561     * See also nir_intrinsic_channel_select_intel
1562     */
1563    uint8_t swizzle[4];
1564 
1565    /** Unused padding to ensure the struct is a multiple of 64 bits */
1566    uint32_t _pad;
1567 };
1568 
1569 /** Struct representing a storage image descriptor */
1570 struct anv_storage_image_descriptor {
1571    /** Bindless image handles
1572     *
1573     * These are expected to already be shifted such that the 20-bit
1574     * SURFACE_STATE table index is in the top 20 bits.
1575     */
1576    uint32_t vanilla;
1577    uint32_t lowered;
1578 };
1579 
1580 /** Struct representing a address/range descriptor
1581  *
1582  * The fields of this struct correspond directly to the data layout of
1583  * nir_address_format_64bit_bounded_global addresses.  The last field is the
1584  * offset in the NIR address so it must be zero so that when you load the
1585  * descriptor you get a pointer to the start of the range.
1586  */
1587 struct anv_address_range_descriptor {
1588    uint64_t address;
1589    uint32_t range;
1590    uint32_t zero;
1591 };
1592 
1593 enum anv_descriptor_data {
1594    /** The descriptor contains a BTI reference to a surface state */
1595    ANV_DESCRIPTOR_SURFACE_STATE  = (1 << 0),
1596    /** The descriptor contains a BTI reference to a sampler state */
1597    ANV_DESCRIPTOR_SAMPLER_STATE  = (1 << 1),
1598    /** The descriptor contains an actual buffer view */
1599    ANV_DESCRIPTOR_BUFFER_VIEW    = (1 << 2),
1600    /** The descriptor contains auxiliary image layout data */
1601    ANV_DESCRIPTOR_IMAGE_PARAM    = (1 << 3),
1602    /** The descriptor contains auxiliary image layout data */
1603    ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4),
1604    /** anv_address_range_descriptor with a buffer address and range */
1605    ANV_DESCRIPTOR_ADDRESS_RANGE  = (1 << 5),
1606    /** Bindless surface handle */
1607    ANV_DESCRIPTOR_SAMPLED_IMAGE  = (1 << 6),
1608    /** Storage image handles */
1609    ANV_DESCRIPTOR_STORAGE_IMAGE  = (1 << 7),
1610    /** Storage image handles */
1611    ANV_DESCRIPTOR_TEXTURE_SWIZZLE  = (1 << 8),
1612 };
1613 
1614 struct anv_descriptor_set_binding_layout {
1615    /* The type of the descriptors in this binding */
1616    VkDescriptorType type;
1617 
1618    /* Flags provided when this binding was created */
1619    VkDescriptorBindingFlags flags;
1620 
1621    /* Bitfield representing the type of data this descriptor contains */
1622    enum anv_descriptor_data data;
1623 
1624    /* Maximum number of YCbCr texture/sampler planes */
1625    uint8_t max_plane_count;
1626 
1627    /* Number of array elements in this binding (or size in bytes for inline
1628     * uniform data)
1629     */
1630    uint32_t array_size;
1631 
1632    /* Index into the flattened descriptor set */
1633    uint32_t descriptor_index;
1634 
1635    /* Index into the dynamic state array for a dynamic buffer */
1636    int16_t dynamic_offset_index;
1637 
1638    /* Index into the descriptor set buffer views */
1639    int32_t buffer_view_index;
1640 
1641    /* Offset into the descriptor buffer where this descriptor lives */
1642    uint32_t descriptor_offset;
1643 
1644    /* Pre computed stride */
1645    unsigned descriptor_stride;
1646 
1647    /* Immutable samplers (or NULL if no immutable samplers) */
1648    struct anv_sampler **immutable_samplers;
1649 };
1650 
1651 bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,
1652                                       const struct anv_descriptor_set_binding_layout *binding,
1653                                       bool sampler);
1654 
1655 bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
1656                                       const struct anv_descriptor_set_binding_layout *binding,
1657                                       bool sampler);
1658 
1659 struct anv_descriptor_set_layout {
1660    struct vk_object_base base;
1661 
1662    /* Descriptor set layouts can be destroyed at almost any time */
1663    uint32_t ref_cnt;
1664 
1665    /* Number of bindings in this descriptor set */
1666    uint32_t binding_count;
1667 
1668    /* Total number of descriptors */
1669    uint32_t descriptor_count;
1670 
1671    /* Shader stages affected by this descriptor set */
1672    uint16_t shader_stages;
1673 
1674    /* Number of buffer views in this descriptor set */
1675    uint32_t buffer_view_count;
1676 
1677    /* Number of dynamic offsets used by this descriptor set */
1678    uint16_t dynamic_offset_count;
1679 
1680    /* For each dynamic buffer, which VkShaderStageFlagBits stages are using
1681     * this buffer
1682     */
1683    VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];
1684 
1685    /* Size of the descriptor buffer for this descriptor set */
1686    uint32_t descriptor_buffer_size;
1687 
1688    /* Bindings in this descriptor set */
1689    struct anv_descriptor_set_binding_layout binding[0];
1690 };
1691 
1692 void anv_descriptor_set_layout_destroy(struct anv_device *device,
1693                                        struct anv_descriptor_set_layout *layout);
1694 
1695 static inline void
anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout * layout)1696 anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)
1697 {
1698    assert(layout && layout->ref_cnt >= 1);
1699    p_atomic_inc(&layout->ref_cnt);
1700 }
1701 
1702 static inline void
anv_descriptor_set_layout_unref(struct anv_device * device,struct anv_descriptor_set_layout * layout)1703 anv_descriptor_set_layout_unref(struct anv_device *device,
1704                                 struct anv_descriptor_set_layout *layout)
1705 {
1706    assert(layout && layout->ref_cnt >= 1);
1707    if (p_atomic_dec_zero(&layout->ref_cnt))
1708       anv_descriptor_set_layout_destroy(device, layout);
1709 }
1710 
1711 struct anv_descriptor {
1712    VkDescriptorType type;
1713 
1714    union {
1715       struct {
1716          VkImageLayout layout;
1717          struct anv_image_view *image_view;
1718          struct anv_sampler *sampler;
1719       };
1720 
1721       struct {
1722          struct anv_buffer_view *set_buffer_view;
1723          struct anv_buffer *buffer;
1724          uint64_t offset;
1725          uint64_t range;
1726       };
1727 
1728       struct anv_buffer_view *buffer_view;
1729    };
1730 };
1731 
1732 struct anv_descriptor_set {
1733    struct vk_object_base base;
1734 
1735    struct anv_descriptor_pool *pool;
1736    struct anv_descriptor_set_layout *layout;
1737 
1738    /* Amount of space occupied in the the pool by this descriptor set. It can
1739     * be larger than the size of the descriptor set.
1740     */
1741    uint32_t size;
1742 
1743    /* State relative to anv_descriptor_pool::bo */
1744    struct anv_state desc_mem;
1745    /* Surface state for the descriptor buffer */
1746    struct anv_state desc_surface_state;
1747 
1748    /* Descriptor set address. */
1749    struct anv_address desc_addr;
1750 
1751    uint32_t buffer_view_count;
1752    struct anv_buffer_view *buffer_views;
1753 
1754    /* Link to descriptor pool's desc_sets list . */
1755    struct list_head pool_link;
1756 
1757    uint32_t descriptor_count;
1758    struct anv_descriptor descriptors[0];
1759 };
1760 
1761 static inline bool
anv_descriptor_set_is_push(struct anv_descriptor_set * set)1762 anv_descriptor_set_is_push(struct anv_descriptor_set *set)
1763 {
1764    return set->pool == NULL;
1765 }
1766 
1767 struct anv_buffer_view {
1768    struct vk_object_base base;
1769 
1770    uint64_t range; /**< VkBufferViewCreateInfo::range */
1771 
1772    struct anv_address address;
1773 
1774    struct anv_state surface_state;
1775    struct anv_state storage_surface_state;
1776    struct anv_state lowered_storage_surface_state;
1777 
1778    struct isl_image_param lowered_storage_image_param;
1779 };
1780 
1781 struct anv_push_descriptor_set {
1782    struct anv_descriptor_set set;
1783 
1784    /* Put this field right behind anv_descriptor_set so it fills up the
1785     * descriptors[0] field. */
1786    struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
1787 
1788    /** True if the descriptor set buffer has been referenced by a draw or
1789     * dispatch command.
1790     */
1791    bool set_used_on_gpu;
1792 
1793    struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
1794 };
1795 
1796 static inline struct anv_address
anv_descriptor_set_address(struct anv_descriptor_set * set)1797 anv_descriptor_set_address(struct anv_descriptor_set *set)
1798 {
1799    if (anv_descriptor_set_is_push(set)) {
1800       /* We have to flag push descriptor set as used on the GPU
1801        * so that the next time we push descriptors, we grab a new memory.
1802        */
1803       struct anv_push_descriptor_set *push_set =
1804          (struct anv_push_descriptor_set *)set;
1805       push_set->set_used_on_gpu = true;
1806    }
1807 
1808    return set->desc_addr;
1809 }
1810 
1811 struct anv_descriptor_pool {
1812    struct vk_object_base base;
1813 
1814    uint32_t size;
1815    uint32_t next;
1816    uint32_t free_list;
1817 
1818    struct anv_bo *bo;
1819    struct util_vma_heap bo_heap;
1820 
1821    struct anv_state_stream surface_state_stream;
1822    void *surface_state_free_list;
1823 
1824    struct list_head desc_sets;
1825 
1826    bool host_only;
1827 
1828    char data[0];
1829 };
1830 
1831 size_t
1832 anv_descriptor_set_layout_size(const struct anv_descriptor_set_layout *layout,
1833                                uint32_t var_desc_count);
1834 
1835 uint32_t
1836 anv_descriptor_set_layout_descriptor_buffer_size(const struct anv_descriptor_set_layout *set_layout,
1837                                                  uint32_t var_desc_count);
1838 
1839 void
1840 anv_descriptor_set_write_image_view(struct anv_device *device,
1841                                     struct anv_descriptor_set *set,
1842                                     const VkDescriptorImageInfo * const info,
1843                                     VkDescriptorType type,
1844                                     uint32_t binding,
1845                                     uint32_t element);
1846 
1847 void
1848 anv_descriptor_set_write_buffer_view(struct anv_device *device,
1849                                      struct anv_descriptor_set *set,
1850                                      VkDescriptorType type,
1851                                      struct anv_buffer_view *buffer_view,
1852                                      uint32_t binding,
1853                                      uint32_t element);
1854 
1855 void
1856 anv_descriptor_set_write_buffer(struct anv_device *device,
1857                                 struct anv_descriptor_set *set,
1858                                 struct anv_state_stream *alloc_stream,
1859                                 VkDescriptorType type,
1860                                 struct anv_buffer *buffer,
1861                                 uint32_t binding,
1862                                 uint32_t element,
1863                                 VkDeviceSize offset,
1864                                 VkDeviceSize range);
1865 
1866 void
1867 anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,
1868                                              struct anv_descriptor_set *set,
1869                                              uint32_t binding,
1870                                              const void *data,
1871                                              size_t offset,
1872                                              size_t size);
1873 
1874 void
1875 anv_descriptor_set_write_template(struct anv_device *device,
1876                                   struct anv_descriptor_set *set,
1877                                   struct anv_state_stream *alloc_stream,
1878                                   const struct vk_descriptor_update_template *template,
1879                                   const void *data);
1880 
1881 #define ANV_DESCRIPTOR_SET_NULL             (UINT8_MAX - 5)
1882 #define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS   (UINT8_MAX - 4)
1883 #define ANV_DESCRIPTOR_SET_DESCRIPTORS      (UINT8_MAX - 3)
1884 #define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS  (UINT8_MAX - 2)
1885 #define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)
1886 #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
1887 
1888 struct anv_pipeline_binding {
1889    /** Index in the descriptor set
1890     *
1891     * This is a flattened index; the descriptor set layout is already taken
1892     * into account.
1893     */
1894    uint32_t index;
1895 
1896    /** The descriptor set this surface corresponds to.
1897     *
1898     * The special ANV_DESCRIPTOR_SET_* values above indicates that this
1899     * binding is not a normal descriptor set but something else.
1900     */
1901    uint8_t set;
1902 
1903    union {
1904       /** Plane in the binding index for images */
1905       uint8_t plane;
1906 
1907       /** Dynamic offset index (for dynamic UBOs and SSBOs) */
1908       uint8_t dynamic_offset_index;
1909    };
1910 
1911    /** For a storage image, whether it requires a lowered surface */
1912    uint8_t lowered_storage_surface;
1913 
1914    /** Pad to 64 bits so that there are no holes and we can safely memcmp
1915     * assuming POD zero-initialization.
1916     */
1917    uint8_t pad;
1918 };
1919 
1920 struct anv_push_range {
1921    /** Index in the descriptor set */
1922    uint32_t index;
1923 
1924    /** Descriptor set index */
1925    uint8_t set;
1926 
1927    /** Dynamic offset index (for dynamic UBOs) */
1928    uint8_t dynamic_offset_index;
1929 
1930    /** Start offset in units of 32B */
1931    uint8_t start;
1932 
1933    /** Range in units of 32B */
1934    uint8_t length;
1935 };
1936 
1937 struct anv_pipeline_layout {
1938    struct vk_object_base base;
1939 
1940    struct {
1941       struct anv_descriptor_set_layout *layout;
1942       uint32_t dynamic_offset_start;
1943    } set[MAX_SETS];
1944 
1945    uint32_t num_sets;
1946 
1947    unsigned char sha1[20];
1948 };
1949 
1950 struct anv_buffer {
1951    struct vk_buffer vk;
1952 
1953    /* Set when bound */
1954    struct anv_address address;
1955 };
1956 
1957 enum anv_cmd_dirty_bits {
1958    ANV_CMD_DIRTY_PIPELINE                            = 1 << 0,
1959    ANV_CMD_DIRTY_INDEX_BUFFER                        = 1 << 1,
1960    ANV_CMD_DIRTY_RENDER_TARGETS                      = 1 << 2,
1961    ANV_CMD_DIRTY_XFB_ENABLE                          = 1 << 3,
1962 };
1963 typedef enum anv_cmd_dirty_bits anv_cmd_dirty_mask_t;
1964 
1965 enum anv_pipe_bits {
1966    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT            = (1 << 0),
1967    ANV_PIPE_STALL_AT_SCOREBOARD_BIT          = (1 << 1),
1968    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT       = (1 << 2),
1969    ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT    = (1 << 3),
1970    ANV_PIPE_VF_CACHE_INVALIDATE_BIT          = (1 << 4),
1971    ANV_PIPE_DATA_CACHE_FLUSH_BIT             = (1 << 5),
1972    ANV_PIPE_TILE_CACHE_FLUSH_BIT             = (1 << 6),
1973    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT     = (1 << 10),
1974    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
1975    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT    = (1 << 12),
1976    ANV_PIPE_DEPTH_STALL_BIT                  = (1 << 13),
1977 
1978    /* ANV_PIPE_HDC_PIPELINE_FLUSH_BIT is a precise way to ensure prior data
1979     * cache work has completed.  Available on Gfx12+.  For earlier Gfx we
1980     * must reinterpret this flush as ANV_PIPE_DATA_CACHE_FLUSH_BIT.
1981     */
1982    ANV_PIPE_HDC_PIPELINE_FLUSH_BIT           = (1 << 14),
1983    ANV_PIPE_PSS_STALL_SYNC_BIT               = (1 << 15),
1984 
1985    /*
1986     * This bit flush data-port's Untyped L1 data cache (LSC L1).
1987     */
1988    ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT = (1 << 16),
1989 
1990    ANV_PIPE_CS_STALL_BIT                     = (1 << 20),
1991    ANV_PIPE_END_OF_PIPE_SYNC_BIT             = (1 << 21),
1992 
1993    /* This bit does not exist directly in PIPE_CONTROL.  Instead it means that
1994     * a flush has happened but not a CS stall.  The next time we do any sort
1995     * of invalidation we need to insert a CS stall at that time.  Otherwise,
1996     * we would have to CS stall on every flush which could be bad.
1997     */
1998    ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT       = (1 << 22),
1999 
2000    /* This bit does not exist directly in PIPE_CONTROL. It means that render
2001     * target operations related to transfer commands with VkBuffer as
2002     * destination are ongoing. Some operations like copies on the command
2003     * streamer might need to be aware of this to trigger the appropriate stall
2004     * before they can proceed with the copy.
2005     */
2006    ANV_PIPE_RENDER_TARGET_BUFFER_WRITES      = (1 << 23),
2007 
2008    /* This bit does not exist directly in PIPE_CONTROL. It means that Gfx12
2009     * AUX-TT data has changed and we need to invalidate AUX-TT data.  This is
2010     * done by writing the AUX-TT register.
2011     */
2012    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT         = (1 << 24),
2013 
2014    /* This bit does not exist directly in PIPE_CONTROL. It means that a
2015     * PIPE_CONTROL with a post-sync operation will follow. This is used to
2016     * implement a workaround for Gfx9.
2017     */
2018    ANV_PIPE_POST_SYNC_BIT                    = (1 << 25),
2019 };
2020 
2021 #define ANV_PIPE_FLUSH_BITS ( \
2022    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
2023    ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
2024    ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
2025    ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT | \
2026    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
2027    ANV_PIPE_TILE_CACHE_FLUSH_BIT)
2028 
2029 #define ANV_PIPE_STALL_BITS ( \
2030    ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
2031    ANV_PIPE_DEPTH_STALL_BIT | \
2032    ANV_PIPE_CS_STALL_BIT)
2033 
2034 #define ANV_PIPE_INVALIDATE_BITS ( \
2035    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
2036    ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
2037    ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
2038    ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
2039    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
2040    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
2041    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
2042 
2043 enum intel_ds_stall_flag
2044 anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits);
2045 
2046 static inline enum anv_pipe_bits
anv_pipe_flush_bits_for_access_flags(struct anv_device * device,VkAccessFlags2 flags)2047 anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
2048                                      VkAccessFlags2 flags)
2049 {
2050    enum anv_pipe_bits pipe_bits = 0;
2051 
2052    u_foreach_bit64(b, flags) {
2053       switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
2054       case VK_ACCESS_2_SHADER_WRITE_BIT:
2055       case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT:
2056          /* We're transitioning a buffer that was previously used as write
2057           * destination through the data port. To make its content available
2058           * to future operations, flush the hdc pipeline.
2059           */
2060          pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
2061          pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
2062          break;
2063       case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT:
2064          /* We're transitioning a buffer that was previously used as render
2065           * target. To make its content available to future operations, flush
2066           * the render target cache.
2067           */
2068          pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2069          break;
2070       case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
2071          /* We're transitioning a buffer that was previously used as depth
2072           * buffer. To make its content available to future operations, flush
2073           * the depth cache.
2074           */
2075          pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2076          break;
2077       case VK_ACCESS_2_TRANSFER_WRITE_BIT:
2078          /* We're transitioning a buffer that was previously used as a
2079           * transfer write destination. Generic write operations include color
2080           * & depth operations as well as buffer operations like :
2081           *     - vkCmdClearColorImage()
2082           *     - vkCmdClearDepthStencilImage()
2083           *     - vkCmdBlitImage()
2084           *     - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
2085           *
2086           * Most of these operations are implemented using Blorp which writes
2087           * through the render target, so flush that cache to make it visible
2088           * to future operations. And for depth related operations we also
2089           * need to flush the depth cache.
2090           */
2091          pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2092          pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2093          break;
2094       case VK_ACCESS_2_MEMORY_WRITE_BIT:
2095          /* We're transitioning a buffer for generic write operations. Flush
2096           * all the caches.
2097           */
2098          pipe_bits |= ANV_PIPE_FLUSH_BITS;
2099          break;
2100       case VK_ACCESS_2_HOST_WRITE_BIT:
2101          /* We're transitioning a buffer for access by CPU. Invalidate
2102           * all the caches. Since data and tile caches don't have invalidate,
2103           * we are forced to flush those as well.
2104           */
2105          pipe_bits |= ANV_PIPE_FLUSH_BITS;
2106          pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
2107          break;
2108       case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
2109       case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
2110          /* We're transitioning a buffer written either from VS stage or from
2111           * the command streamer (see CmdEndTransformFeedbackEXT), we just
2112           * need to stall the CS.
2113           */
2114          pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2115          break;
2116       default:
2117          break; /* Nothing to do */
2118       }
2119    }
2120 
2121    return pipe_bits;
2122 }
2123 
2124 static inline enum anv_pipe_bits
anv_pipe_invalidate_bits_for_access_flags(struct anv_device * device,VkAccessFlags2 flags)2125 anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
2126                                           VkAccessFlags2 flags)
2127 {
2128    enum anv_pipe_bits pipe_bits = 0;
2129 
2130    u_foreach_bit64(b, flags) {
2131       switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
2132       case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT:
2133          /* Indirect draw commands take a buffer as input that we're going to
2134           * read from the command streamer to load some of the HW registers
2135           * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
2136           * command streamer stall so that all the cache flushes have
2137           * completed before the command streamer loads from memory.
2138           */
2139          pipe_bits |=  ANV_PIPE_CS_STALL_BIT;
2140          /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex
2141           * through a vertex buffer, so invalidate that cache.
2142           */
2143          pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2144          /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a
2145           * UBO from the buffer, so we need to invalidate constant cache.
2146           */
2147          pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2148          pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2149          /* Tile cache flush needed For CmdDipatchIndirect since command
2150           * streamer and vertex fetch aren't L3 coherent.
2151           */
2152          pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2153          break;
2154       case VK_ACCESS_2_INDEX_READ_BIT:
2155       case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT:
2156          /* We transitioning a buffer to be used for as input for vkCmdDraw*
2157           * commands, so we invalidate the VF cache to make sure there is no
2158           * stale data when we start rendering.
2159           */
2160          pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2161          break;
2162       case VK_ACCESS_2_UNIFORM_READ_BIT:
2163          /* We transitioning a buffer to be used as uniform data. Because
2164           * uniform is accessed through the data port & sampler, we need to
2165           * invalidate the texture cache (sampler) & constant cache (data
2166           * port) to avoid stale data.
2167           */
2168          pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2169          if (device->physical->compiler->indirect_ubos_use_sampler) {
2170             pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2171          } else {
2172             pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
2173             pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
2174          }
2175          break;
2176       case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT:
2177       case VK_ACCESS_2_TRANSFER_READ_BIT:
2178       case VK_ACCESS_2_SHADER_SAMPLED_READ_BIT:
2179          /* Transitioning a buffer to be read through the sampler, so
2180           * invalidate the texture cache, we don't want any stale data.
2181           */
2182          pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2183          break;
2184       case VK_ACCESS_2_SHADER_READ_BIT:
2185          /* Same as VK_ACCESS_2_UNIFORM_READ_BIT and
2186           * VK_ACCESS_2_SHADER_SAMPLED_READ_BIT cases above
2187           */
2188          pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT |
2189                       ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2190          if (!device->physical->compiler->indirect_ubos_use_sampler) {
2191             pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
2192             pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
2193          }
2194          break;
2195       case VK_ACCESS_2_MEMORY_READ_BIT:
2196          /* Transitioning a buffer for generic read, invalidate all the
2197           * caches.
2198           */
2199          pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
2200          break;
2201       case VK_ACCESS_2_MEMORY_WRITE_BIT:
2202          /* Generic write, make sure all previously written things land in
2203           * memory.
2204           */
2205          pipe_bits |= ANV_PIPE_FLUSH_BITS;
2206          break;
2207       case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT:
2208       case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
2209          /* Transitioning a buffer for conditional rendering or transform
2210           * feedback. We'll load the content of this buffer into HW registers
2211           * using the command streamer, so we need to stall the command
2212           * streamer , so we need to stall the command streamer to make sure
2213           * any in-flight flush operations have completed.
2214           */
2215          pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2216          pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2217          pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2218          break;
2219       case VK_ACCESS_2_HOST_READ_BIT:
2220          /* We're transitioning a buffer that was written by CPU.  Flush
2221           * all the caches.
2222           */
2223          pipe_bits |= ANV_PIPE_FLUSH_BITS;
2224          break;
2225       case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
2226          /* We're transitioning a buffer to be written by the streamout fixed
2227           * function. This one is apparently not L3 coherent, so we need a
2228           * tile cache flush to make sure any previous write is not going to
2229           * create WaW hazards.
2230           */
2231          pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2232          break;
2233       case VK_ACCESS_2_SHADER_STORAGE_READ_BIT:
2234       default:
2235          break; /* Nothing to do */
2236       }
2237    }
2238 
2239    return pipe_bits;
2240 }
2241 
2242 #define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV (         \
2243    VK_IMAGE_ASPECT_COLOR_BIT | \
2244    VK_IMAGE_ASPECT_PLANE_0_BIT | \
2245    VK_IMAGE_ASPECT_PLANE_1_BIT | \
2246    VK_IMAGE_ASPECT_PLANE_2_BIT)
2247 #define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \
2248    VK_IMAGE_ASPECT_PLANE_0_BIT | \
2249    VK_IMAGE_ASPECT_PLANE_1_BIT | \
2250    VK_IMAGE_ASPECT_PLANE_2_BIT)
2251 
2252 struct anv_vertex_binding {
2253    struct anv_buffer *                          buffer;
2254    VkDeviceSize                                 offset;
2255    VkDeviceSize                                 size;
2256 };
2257 
2258 struct anv_xfb_binding {
2259    struct anv_buffer *                          buffer;
2260    VkDeviceSize                                 offset;
2261    VkDeviceSize                                 size;
2262 };
2263 
2264 struct anv_push_constants {
2265    /** Push constant data provided by the client through vkPushConstants */
2266    uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
2267 
2268    /** Dynamic offsets for dynamic UBOs and SSBOs */
2269    uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
2270 
2271    /* Robust access pushed registers. */
2272    uint64_t push_reg_mask[MESA_SHADER_STAGES];
2273 
2274    struct {
2275       /** Base workgroup ID
2276        *
2277        * Used for vkCmdDispatchBase.
2278        */
2279       uint32_t base_work_group_id[3];
2280 
2281       /** Subgroup ID
2282        *
2283        * This is never set by software but is implicitly filled out when
2284        * uploading the push constants for compute shaders.
2285        */
2286       uint32_t subgroup_id;
2287    } cs;
2288 };
2289 
2290 struct anv_surface_state {
2291    struct anv_state state;
2292    /** Address of the surface referred to by this state
2293     *
2294     * This address is relative to the start of the BO.
2295     */
2296    struct anv_address address;
2297    /* Address of the aux surface, if any
2298     *
2299     * This field is ANV_NULL_ADDRESS if and only if no aux surface exists.
2300     *
2301     * With the exception of gfx8, the bottom 12 bits of this address' offset
2302     * include extra aux information.
2303     */
2304    struct anv_address aux_address;
2305    /* Address of the clear color, if any
2306     *
2307     * This address is relative to the start of the BO.
2308     */
2309    struct anv_address clear_address;
2310 };
2311 
2312 struct anv_attachment {
2313    VkFormat vk_format;
2314    const struct anv_image_view *iview;
2315    VkImageLayout layout;
2316    enum isl_aux_usage aux_usage;
2317    struct anv_surface_state surface_state;
2318 
2319    VkResolveModeFlagBits resolve_mode;
2320    const struct anv_image_view *resolve_iview;
2321    VkImageLayout resolve_layout;
2322 };
2323 
2324 /** State tracking for vertex buffer flushes
2325  *
2326  * On Gfx8-9, the VF cache only considers the bottom 32 bits of memory
2327  * addresses.  If you happen to have two vertex buffers which get placed
2328  * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
2329  * collisions.  In order to solve this problem, we track vertex address ranges
2330  * which are live in the cache and invalidate the cache if one ever exceeds 32
2331  * bits.
2332  */
2333 struct anv_vb_cache_range {
2334    /* Virtual address at which the live vertex buffer cache range starts for
2335     * this vertex buffer index.
2336     */
2337    uint64_t start;
2338 
2339    /* Virtual address of the byte after where vertex buffer cache range ends.
2340     * This is exclusive such that end - start is the size of the range.
2341     */
2342    uint64_t end;
2343 };
2344 
2345 static inline void
anv_merge_vb_cache_range(struct anv_vb_cache_range * dirty,const struct anv_vb_cache_range * bound)2346 anv_merge_vb_cache_range(struct anv_vb_cache_range *dirty,
2347                          const struct anv_vb_cache_range *bound)
2348 {
2349    if (dirty->start == dirty->end) {
2350       *dirty = *bound;
2351    } else if (bound->start != bound->end) {
2352       dirty->start = MIN2(dirty->start, bound->start);
2353       dirty->end = MAX2(dirty->end, bound->end);
2354    }
2355 }
2356 
2357 /* Check whether we need to apply the Gfx8-9 vertex buffer workaround*/
2358 static inline bool
anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range * bound,struct anv_vb_cache_range * dirty,struct anv_address vb_address,uint32_t vb_size)2359 anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range *bound,
2360                                            struct anv_vb_cache_range *dirty,
2361                                            struct anv_address vb_address,
2362                                            uint32_t vb_size)
2363 {
2364    if (vb_size == 0) {
2365       bound->start = 0;
2366       bound->end = 0;
2367       return false;
2368    }
2369 
2370    assert(vb_address.bo && anv_bo_is_pinned(vb_address.bo));
2371    bound->start = intel_48b_address(anv_address_physical(vb_address));
2372    bound->end = bound->start + vb_size;
2373    assert(bound->end > bound->start); /* No overflow */
2374 
2375    /* Align everything to a cache line */
2376    bound->start &= ~(64ull - 1ull);
2377    bound->end = align64(bound->end, 64);
2378 
2379    anv_merge_vb_cache_range(dirty, bound);
2380 
2381    /* If our range is larger than 32 bits, we have to flush */
2382    assert(bound->end - bound->start <= (1ull << 32));
2383    return (dirty->end - dirty->start) > (1ull << 32);
2384 }
2385 
2386 /** State tracking for particular pipeline bind point
2387  *
2388  * This struct is the base struct for anv_cmd_graphics_state and
2389  * anv_cmd_compute_state.  These are used to track state which is bound to a
2390  * particular type of pipeline.  Generic state that applies per-stage such as
2391  * binding table offsets and push constants is tracked generically with a
2392  * per-stage array in anv_cmd_state.
2393  */
2394 struct anv_cmd_pipeline_state {
2395    struct anv_descriptor_set *descriptors[MAX_SETS];
2396    struct anv_push_descriptor_set *push_descriptors[MAX_SETS];
2397 
2398    struct anv_push_constants push_constants;
2399 
2400    /* Push constant state allocated when flushing push constants. */
2401    struct anv_state          push_constants_state;
2402 };
2403 
2404 /** State tracking for graphics pipeline
2405  *
2406  * This has anv_cmd_pipeline_state as a base struct to track things which get
2407  * bound to a graphics pipeline.  Along with general pipeline bind point state
2408  * which is in the anv_cmd_pipeline_state base struct, it also contains other
2409  * state which is graphics-specific.
2410  */
2411 struct anv_cmd_graphics_state {
2412    struct anv_cmd_pipeline_state base;
2413 
2414    struct anv_graphics_pipeline *pipeline;
2415 
2416    VkRenderingFlags rendering_flags;
2417    VkRect2D render_area;
2418    uint32_t layer_count;
2419    uint32_t samples;
2420    uint32_t view_mask;
2421    uint32_t color_att_count;
2422    struct anv_state att_states;
2423    struct anv_attachment color_att[MAX_RTS];
2424    struct anv_attachment depth_att;
2425    struct anv_attachment stencil_att;
2426    struct anv_state null_surface_state;
2427 
2428    anv_cmd_dirty_mask_t dirty;
2429    uint32_t vb_dirty;
2430 
2431    struct anv_vb_cache_range ib_bound_range;
2432    struct anv_vb_cache_range ib_dirty_range;
2433    struct anv_vb_cache_range vb_bound_ranges[33];
2434    struct anv_vb_cache_range vb_dirty_ranges[33];
2435 
2436    uint32_t restart_index;
2437 
2438    VkShaderStageFlags push_constant_stages;
2439 
2440    uint32_t primitive_topology;
2441 
2442    struct anv_buffer *index_buffer;
2443    uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */
2444    uint32_t index_offset;
2445 
2446    struct vk_sample_locations_state sample_locations;
2447 
2448    bool has_uint_rt;
2449 };
2450 
2451 enum anv_depth_reg_mode {
2452    ANV_DEPTH_REG_MODE_UNKNOWN = 0,
2453    ANV_DEPTH_REG_MODE_HW_DEFAULT,
2454    ANV_DEPTH_REG_MODE_D16_1X_MSAA,
2455 };
2456 
2457 /** State tracking for compute pipeline
2458  *
2459  * This has anv_cmd_pipeline_state as a base struct to track things which get
2460  * bound to a compute pipeline.  Along with general pipeline bind point state
2461  * which is in the anv_cmd_pipeline_state base struct, it also contains other
2462  * state which is compute-specific.
2463  */
2464 struct anv_cmd_compute_state {
2465    struct anv_cmd_pipeline_state base;
2466 
2467    struct anv_compute_pipeline *pipeline;
2468 
2469    bool pipeline_dirty;
2470 
2471    struct anv_state push_data;
2472 
2473    struct anv_address num_workgroups;
2474 };
2475 
2476 /** State required while building cmd buffer */
2477 struct anv_cmd_state {
2478    /* PIPELINE_SELECT.PipelineSelection */
2479    uint32_t                                     current_pipeline;
2480    const struct intel_l3_config *               current_l3_config;
2481 
2482    struct anv_cmd_graphics_state                gfx;
2483    struct anv_cmd_compute_state                 compute;
2484 
2485    enum anv_pipe_bits                           pending_pipe_bits;
2486    VkShaderStageFlags                           descriptors_dirty;
2487    VkShaderStageFlags                           push_constants_dirty;
2488 
2489    struct anv_vertex_binding                    vertex_bindings[MAX_VBS];
2490    bool                                         xfb_enabled;
2491    struct anv_xfb_binding                       xfb_bindings[MAX_XFB_BUFFERS];
2492    struct anv_state                             binding_tables[MESA_VULKAN_SHADER_STAGES];
2493    struct anv_state                             samplers[MESA_VULKAN_SHADER_STAGES];
2494 
2495    unsigned char                                sampler_sha1s[MESA_VULKAN_SHADER_STAGES][20];
2496    unsigned char                                surface_sha1s[MESA_VULKAN_SHADER_STAGES][20];
2497    unsigned char                                push_sha1s[MESA_VULKAN_SHADER_STAGES][20];
2498 
2499    /**
2500     * Whether or not the gfx8 PMA fix is enabled.  We ensure that, at the top
2501     * of any command buffer it is disabled by disabling it in EndCommandBuffer
2502     * and before invoking the secondary in ExecuteCommands.
2503     */
2504    bool                                         pma_fix_enabled;
2505 
2506    /**
2507     * Whether or not we know for certain that HiZ is enabled for the current
2508     * subpass.  If, for whatever reason, we are unsure as to whether HiZ is
2509     * enabled or not, this will be false.
2510     */
2511    bool                                         hiz_enabled;
2512 
2513    /* We ensure the registers for the gfx12 D16 fix are initialized at the
2514     * first non-NULL depth stencil packet emission of every command buffer.
2515     * For secondary command buffer execution, we transfer the state from the
2516     * last command buffer to the primary (if known).
2517     */
2518    enum anv_depth_reg_mode                      depth_reg_mode;
2519 
2520    bool                                         conditional_render_enabled;
2521 
2522    /**
2523     * Last rendering scale argument provided to
2524     * genX(cmd_buffer_emit_hashing_mode)().
2525     */
2526    unsigned                                     current_hash_scale;
2527 };
2528 
2529 #define ANV_MIN_CMD_BUFFER_BATCH_SIZE 8192
2530 #define ANV_MAX_CMD_BUFFER_BATCH_SIZE (16 * 1024 * 1024)
2531 
2532 enum anv_cmd_buffer_exec_mode {
2533    ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,
2534    ANV_CMD_BUFFER_EXEC_MODE_EMIT,
2535    ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
2536    ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
2537    ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
2538    ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,
2539 };
2540 
2541 struct anv_measure_batch;
2542 
2543 struct anv_cmd_buffer {
2544    struct vk_command_buffer                     vk;
2545 
2546    struct anv_device *                          device;
2547    struct anv_queue_family *                    queue_family;
2548 
2549    struct anv_batch                             batch;
2550 
2551    /* Pointer to the location in the batch where MI_BATCH_BUFFER_END was
2552     * recorded upon calling vkEndCommandBuffer(). This is useful if we need to
2553     * rewrite the end to chain multiple batch together at vkQueueSubmit().
2554     */
2555    void *                                       batch_end;
2556 
2557    /* Fields required for the actual chain of anv_batch_bo's.
2558     *
2559     * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain().
2560     */
2561    struct list_head                             batch_bos;
2562    enum anv_cmd_buffer_exec_mode                exec_mode;
2563 
2564    /* A vector of anv_batch_bo pointers for every batch or surface buffer
2565     * referenced by this command buffer
2566     *
2567     * initialized by anv_cmd_buffer_init_batch_bo_chain()
2568     */
2569    struct u_vector                            seen_bbos;
2570 
2571    /* A vector of int32_t's for every block of binding tables.
2572     *
2573     * initialized by anv_cmd_buffer_init_batch_bo_chain()
2574     */
2575    struct u_vector                              bt_block_states;
2576    struct anv_state                             bt_next;
2577 
2578    struct anv_reloc_list                        surface_relocs;
2579    /** Last seen surface state block pool center bo offset */
2580    uint32_t                                     last_ss_pool_center;
2581 
2582    /* Serial for tracking buffer completion */
2583    uint32_t                                     serial;
2584 
2585    /* Stream objects for storing temporary data */
2586    struct anv_state_stream                      surface_state_stream;
2587    struct anv_state_stream                      dynamic_state_stream;
2588    struct anv_state_stream                      general_state_stream;
2589 
2590    VkCommandBufferUsageFlags                    usage_flags;
2591 
2592    struct anv_query_pool                       *perf_query_pool;
2593 
2594    struct anv_cmd_state                         state;
2595 
2596    struct anv_address                           return_addr;
2597 
2598    /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
2599    uint64_t                                     intel_perf_marker;
2600 
2601    struct anv_measure_batch *measure;
2602 
2603    /**
2604     * KHR_performance_query requires self modifying command buffers and this
2605     * array has the location of modifying commands to the query begin and end
2606     * instructions storing performance counters. The array length is
2607     * anv_physical_device::n_perf_query_commands.
2608     */
2609    struct mi_address_token                  *self_mod_locations;
2610 
2611    /**
2612     * Index tracking which of the self_mod_locations items have already been
2613     * used.
2614     */
2615    uint32_t                                      perf_reloc_idx;
2616 
2617    /**
2618     * Sum of all the anv_batch_bo sizes allocated for this command buffer.
2619     * Used to increase allocation size for long command buffers.
2620     */
2621    uint32_t                                     total_batch_size;
2622 
2623    /**
2624     *
2625     */
2626    struct u_trace                               trace;
2627 };
2628 
2629 extern const struct vk_command_buffer_ops anv_cmd_buffer_ops;
2630 
2631 /* Determine whether we can chain a given cmd_buffer to another one. We need
2632  * softpin and we also need to make sure that we can edit the end of the batch
2633  * to point to next one, which requires the command buffer to not be used
2634  * simultaneously.
2635  */
2636 static inline bool
anv_cmd_buffer_is_chainable(struct anv_cmd_buffer * cmd_buffer)2637 anv_cmd_buffer_is_chainable(struct anv_cmd_buffer *cmd_buffer)
2638 {
2639    return !anv_use_relocations(cmd_buffer->device->physical) &&
2640       !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
2641 }
2642 
2643 VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
2644 void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
2645 void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
2646 void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);
2647 void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
2648                                   struct anv_cmd_buffer *secondary);
2649 void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
2650 VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,
2651                                 struct anv_cmd_buffer *cmd_buffer,
2652                                 const VkSemaphore *in_semaphores,
2653                                 const uint64_t *in_wait_values,
2654                                 uint32_t num_in_semaphores,
2655                                 const VkSemaphore *out_semaphores,
2656                                 const uint64_t *out_signal_values,
2657                                 uint32_t num_out_semaphores,
2658                                 VkFence fence,
2659                                 int perf_query_pass);
2660 
2661 void anv_cmd_buffer_reset(struct vk_command_buffer *vk_cmd_buffer,
2662                           UNUSED VkCommandBufferResetFlags flags);
2663 
2664 struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
2665                                              const void *data, uint32_t size, uint32_t alignment);
2666 struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
2667                                               uint32_t *a, uint32_t *b,
2668                                               uint32_t dwords, uint32_t alignment);
2669 
2670 struct anv_address
2671 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer);
2672 struct anv_state
2673 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
2674                                    uint32_t entries, uint32_t *state_offset);
2675 struct anv_state
2676 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer);
2677 struct anv_state
2678 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
2679                                    uint32_t size, uint32_t alignment);
2680 
2681 VkResult
2682 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
2683 
2684 void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
2685 
2686 struct anv_state
2687 anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);
2688 struct anv_state
2689 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
2690 
2691 VkResult
2692 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
2693                                          uint32_t num_entries,
2694                                          uint32_t *state_offset,
2695                                          struct anv_state *bt_state);
2696 
2697 void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
2698 
2699 void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
2700 
2701 enum anv_bo_sync_state {
2702    /** Indicates that this is a new (or newly reset fence) */
2703    ANV_BO_SYNC_STATE_RESET,
2704 
2705    /** Indicates that this fence has been submitted to the GPU but is still
2706     * (as far as we know) in use by the GPU.
2707     */
2708    ANV_BO_SYNC_STATE_SUBMITTED,
2709 
2710    ANV_BO_SYNC_STATE_SIGNALED,
2711 };
2712 
2713 struct anv_bo_sync {
2714    struct vk_sync sync;
2715 
2716    enum anv_bo_sync_state state;
2717    struct anv_bo *bo;
2718 };
2719 
2720 extern const struct vk_sync_type anv_bo_sync_type;
2721 
2722 static inline bool
vk_sync_is_anv_bo_sync(const struct vk_sync * sync)2723 vk_sync_is_anv_bo_sync(const struct vk_sync *sync)
2724 {
2725    return sync->type == &anv_bo_sync_type;
2726 }
2727 
2728 VkResult anv_create_sync_for_memory(struct vk_device *device,
2729                                     VkDeviceMemory memory,
2730                                     bool signal_memory,
2731                                     struct vk_sync **sync_out);
2732 
2733 struct anv_event {
2734    struct vk_object_base                        base;
2735    uint64_t                                     semaphore;
2736    struct anv_state                             state;
2737 };
2738 
2739 #define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
2740 
2741 #define anv_foreach_stage(stage, stage_bits)                         \
2742    for (gl_shader_stage stage,                                       \
2743         __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK);    \
2744         stage = __builtin_ffs(__tmp) - 1, __tmp;                     \
2745         __tmp &= ~(1 << (stage)))
2746 
2747 struct anv_pipeline_bind_map {
2748    unsigned char                                surface_sha1[20];
2749    unsigned char                                sampler_sha1[20];
2750    unsigned char                                push_sha1[20];
2751 
2752    uint32_t surface_count;
2753    uint32_t sampler_count;
2754 
2755    struct anv_pipeline_binding *                surface_to_descriptor;
2756    struct anv_pipeline_binding *                sampler_to_descriptor;
2757 
2758    struct anv_push_range                        push_ranges[4];
2759 };
2760 
2761 struct anv_shader_bin {
2762    struct vk_pipeline_cache_object base;
2763 
2764    gl_shader_stage stage;
2765 
2766    struct anv_state kernel;
2767    uint32_t kernel_size;
2768 
2769    const struct elk_stage_prog_data *prog_data;
2770    uint32_t prog_data_size;
2771 
2772    struct elk_compile_stats stats[3];
2773    uint32_t num_stats;
2774 
2775    struct nir_xfb_info *xfb_info;
2776 
2777    struct anv_pipeline_bind_map bind_map;
2778 };
2779 
2780 struct anv_shader_bin *
2781 anv_shader_bin_create(struct anv_device *device,
2782                       gl_shader_stage stage,
2783                       const void *key, uint32_t key_size,
2784                       const void *kernel, uint32_t kernel_size,
2785                       const struct elk_stage_prog_data *prog_data,
2786                       uint32_t prog_data_size,
2787                       const struct elk_compile_stats *stats, uint32_t num_stats,
2788                       const struct nir_xfb_info *xfb_info,
2789                       const struct anv_pipeline_bind_map *bind_map);
2790 
2791 static inline void
anv_shader_bin_ref(struct anv_shader_bin * shader)2792 anv_shader_bin_ref(struct anv_shader_bin *shader)
2793 {
2794    vk_pipeline_cache_object_ref(&shader->base);
2795 }
2796 
2797 static inline void
anv_shader_bin_unref(struct anv_device * device,struct anv_shader_bin * shader)2798 anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
2799 {
2800    vk_pipeline_cache_object_unref(&device->vk, &shader->base);
2801 }
2802 
2803 struct anv_pipeline_executable {
2804    gl_shader_stage stage;
2805 
2806    struct elk_compile_stats stats;
2807 
2808    char *nir;
2809    char *disasm;
2810 };
2811 
2812 enum anv_pipeline_type {
2813    ANV_PIPELINE_GRAPHICS,
2814    ANV_PIPELINE_COMPUTE,
2815 };
2816 
2817 struct anv_pipeline {
2818    struct vk_object_base                        base;
2819 
2820    struct anv_device *                          device;
2821 
2822    struct anv_batch                             batch;
2823    struct anv_reloc_list                        batch_relocs;
2824 
2825    void *                                       mem_ctx;
2826 
2827    enum anv_pipeline_type                       type;
2828    VkPipelineCreateFlags                        flags;
2829 
2830    struct util_dynarray                         executables;
2831 
2832    const struct intel_l3_config *               l3_config;
2833 };
2834 
2835 struct anv_graphics_pipeline {
2836    struct anv_pipeline                          base;
2837 
2838    /* Shaders */
2839    struct anv_shader_bin *                      shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
2840 
2841    VkShaderStageFlags                           active_stages;
2842 
2843    struct vk_sample_locations_state             sample_locations;
2844    struct vk_dynamic_graphics_state             dynamic_state;
2845 
2846    /* These fields are required with dynamic primitive topology,
2847     * rasterization_samples used only with gen < 8.
2848     */
2849    VkLineRasterizationModeEXT                   line_mode;
2850    VkPolygonMode                                polygon_mode;
2851    uint32_t                                     patch_control_points;
2852    uint32_t                                     rasterization_samples;
2853 
2854    VkColorComponentFlags                        color_comp_writes[MAX_RTS];
2855 
2856    uint32_t                                     view_mask;
2857    uint32_t                                     instance_multiplier;
2858 
2859    bool                                         depth_clamp_enable;
2860    bool                                         depth_clip_enable;
2861    bool                                         kill_pixel;
2862    bool                                         force_fragment_thread_dispatch;
2863    bool                                         negative_one_to_one;
2864 
2865    uint32_t                                     vb_used;
2866    struct anv_pipeline_vertex_binding {
2867       uint32_t                                  stride;
2868       bool                                      instanced;
2869       uint32_t                                  instance_divisor;
2870    } vb[MAX_VBS];
2871 
2872    /* Pre computed CS instructions that can directly be copied into
2873     * anv_cmd_buffer.
2874     */
2875    uint32_t                                     batch_data[512];
2876 
2877    /* Pre packed CS instructions & structures that need to be merged later
2878     * with dynamic state.
2879     */
2880    struct {
2881       uint32_t                                  sf[7];
2882       uint32_t                                  clip[4];
2883       uint32_t                                  xfb_bo_pitch[4];
2884       uint32_t                                  wm[3];
2885       uint32_t                                  blend_state[MAX_RTS * 2];
2886       uint32_t                                  streamout_state[3];
2887    } gfx7;
2888 
2889    struct {
2890       uint32_t                                  sf[4];
2891       uint32_t                                  raster[5];
2892       uint32_t                                  wm[2];
2893       uint32_t                                  ps_blend[2];
2894       uint32_t                                  blend_state[1 + MAX_RTS * 2];
2895       uint32_t                                  streamout_state[5];
2896    } gfx8;
2897 };
2898 
2899 struct anv_compute_pipeline {
2900    struct anv_pipeline                          base;
2901 
2902    struct anv_shader_bin *                      cs;
2903    uint32_t                                     batch_data[9];
2904    uint32_t                                     interface_descriptor_data[8];
2905 };
2906 
2907 #define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum)             \
2908    static inline struct anv_##pipe_type##_pipeline *                 \
2909    anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline)      \
2910    {                                                                 \
2911       assert(pipeline->type == pipe_enum);                           \
2912       return (struct anv_##pipe_type##_pipeline *) pipeline;         \
2913    }
2914 
ANV_DECL_PIPELINE_DOWNCAST(graphics,ANV_PIPELINE_GRAPHICS)2915 ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
2916 ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
2917 
2918 static inline bool
2919 anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
2920                        gl_shader_stage stage)
2921 {
2922    return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
2923 }
2924 
2925 static inline bool
anv_pipeline_is_primitive(const struct anv_graphics_pipeline * pipeline)2926 anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
2927 {
2928    return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX);
2929 }
2930 
2931 static inline bool
anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer * cmd_buffer)2932 anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer *cmd_buffer)
2933 {
2934    const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx;
2935    const struct vk_dynamic_graphics_state *dyn =
2936       &cmd_buffer->vk.dynamic_graphics_state;
2937    uint8_t color_writes = dyn->cb.color_write_enables;
2938 
2939    /* All writes disabled through vkCmdSetColorWriteEnableEXT */
2940    if ((color_writes & ((1u << state->color_att_count) - 1)) == 0)
2941       return true;
2942 
2943    /* Or all write masks are empty */
2944    for (uint32_t i = 0; i < state->color_att_count; i++) {
2945       if (state->pipeline->color_comp_writes[i] != 0)
2946          return false;
2947    }
2948 
2949    return true;
2950 }
2951 
2952 static inline void
anv_cmd_graphic_state_update_has_uint_rt(struct anv_cmd_graphics_state * state)2953 anv_cmd_graphic_state_update_has_uint_rt(struct anv_cmd_graphics_state *state)
2954 {
2955    state->has_uint_rt = false;
2956    for (unsigned a = 0; a < state->color_att_count; a++) {
2957       if (vk_format_is_int(state->color_att[a].vk_format)) {
2958          state->has_uint_rt = true;
2959          break;
2960       }
2961    }
2962 }
2963 
2964 #define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage)             \
2965 static inline const struct elk_##prefix##_prog_data *                   \
2966 get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline)  \
2967 {                                                                       \
2968    if (anv_pipeline_has_stage(pipeline, stage)) {                       \
2969       return (const struct elk_##prefix##_prog_data *)                  \
2970              pipeline->shaders[stage]->prog_data;                       \
2971    } else {                                                             \
2972       return NULL;                                                      \
2973    }                                                                    \
2974 }
2975 
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs,MESA_SHADER_VERTEX)2976 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
2977 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
2978 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
2979 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
2980 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
2981 
2982 static inline const struct elk_cs_prog_data *
2983 get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
2984 {
2985    assert(pipeline->cs);
2986    return (const struct elk_cs_prog_data *) pipeline->cs->prog_data;
2987 }
2988 
2989 static inline const struct elk_vue_prog_data *
anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline * pipeline)2990 anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)
2991 {
2992    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
2993       return &get_gs_prog_data(pipeline)->base;
2994    else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
2995       return &get_tes_prog_data(pipeline)->base;
2996    else
2997       return &get_vs_prog_data(pipeline)->base;
2998 }
2999 
3000 VkResult
3001 anv_pipeline_init(struct anv_pipeline *pipeline,
3002                   struct anv_device *device,
3003                   enum anv_pipeline_type type,
3004                   VkPipelineCreateFlags flags,
3005                   const VkAllocationCallbacks *pAllocator);
3006 
3007 void
3008 anv_pipeline_finish(struct anv_pipeline *pipeline,
3009                     struct anv_device *device,
3010                     const VkAllocationCallbacks *pAllocator);
3011 
3012 struct anv_format_plane {
3013    enum isl_format isl_format:16;
3014    struct isl_swizzle swizzle;
3015 
3016    /* Whether this plane contains chroma channels */
3017    bool has_chroma;
3018 
3019    /* For downscaling of YUV planes */
3020    uint8_t denominator_scales[2];
3021 
3022    /* How to map sampled ycbcr planes to a single 4 component element. */
3023    struct isl_swizzle ycbcr_swizzle;
3024 
3025    /* What aspect is associated to this plane */
3026    VkImageAspectFlags aspect;
3027 };
3028 
3029 
3030 struct anv_format {
3031    struct anv_format_plane planes[3];
3032    VkFormat vk_format;
3033    uint8_t n_planes;
3034    bool can_ycbcr;
3035 };
3036 
3037 static inline void
anv_assert_valid_aspect_set(VkImageAspectFlags aspects)3038 anv_assert_valid_aspect_set(VkImageAspectFlags aspects)
3039 {
3040    if (util_bitcount(aspects) == 1) {
3041       assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT |
3042                         VK_IMAGE_ASPECT_DEPTH_BIT |
3043                         VK_IMAGE_ASPECT_STENCIL_BIT |
3044                         VK_IMAGE_ASPECT_PLANE_0_BIT |
3045                         VK_IMAGE_ASPECT_PLANE_1_BIT |
3046                         VK_IMAGE_ASPECT_PLANE_2_BIT));
3047    } else if (aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) {
3048       assert(aspects == VK_IMAGE_ASPECT_PLANE_0_BIT ||
3049              aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
3050                          VK_IMAGE_ASPECT_PLANE_1_BIT) ||
3051              aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
3052                          VK_IMAGE_ASPECT_PLANE_1_BIT |
3053                          VK_IMAGE_ASPECT_PLANE_2_BIT));
3054    } else {
3055       assert(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
3056                          VK_IMAGE_ASPECT_STENCIL_BIT));
3057    }
3058 }
3059 
3060 /**
3061  * Return the aspect's plane relative to all_aspects.  For an image, for
3062  * instance, all_aspects would be the set of aspects in the image.  For
3063  * an image view, all_aspects would be the subset of aspects represented
3064  * by that particular view.
3065  */
3066 static inline uint32_t
anv_aspect_to_plane(VkImageAspectFlags all_aspects,VkImageAspectFlagBits aspect)3067 anv_aspect_to_plane(VkImageAspectFlags all_aspects,
3068                     VkImageAspectFlagBits aspect)
3069 {
3070    anv_assert_valid_aspect_set(all_aspects);
3071    assert(util_bitcount(aspect) == 1);
3072    assert(!(aspect & ~all_aspects));
3073 
3074    /* Because we always put image and view planes in aspect-bit-order, the
3075     * plane index is the number of bits in all_aspects before aspect.
3076     */
3077    return util_bitcount(all_aspects & (aspect - 1));
3078 }
3079 
3080 #define anv_foreach_image_aspect_bit(b, image, aspects) \
3081    u_foreach_bit(b, vk_image_expand_aspect_mask(&(image)->vk, aspects))
3082 
3083 const struct anv_format *
3084 anv_get_format(VkFormat format);
3085 
3086 static inline uint32_t
anv_get_format_planes(VkFormat vk_format)3087 anv_get_format_planes(VkFormat vk_format)
3088 {
3089    const struct anv_format *format = anv_get_format(vk_format);
3090 
3091    return format != NULL ? format->n_planes : 0;
3092 }
3093 
3094 struct anv_format_plane
3095 anv_get_format_plane(const struct intel_device_info *devinfo,
3096                      VkFormat vk_format, uint32_t plane,
3097                      VkImageTiling tiling);
3098 
3099 struct anv_format_plane
3100 anv_get_format_aspect(const struct intel_device_info *devinfo,
3101                       VkFormat vk_format,
3102                       VkImageAspectFlagBits aspect, VkImageTiling tiling);
3103 
3104 static inline enum isl_format
anv_get_isl_format(const struct intel_device_info * devinfo,VkFormat vk_format,VkImageAspectFlags aspect,VkImageTiling tiling)3105 anv_get_isl_format(const struct intel_device_info *devinfo, VkFormat vk_format,
3106                    VkImageAspectFlags aspect, VkImageTiling tiling)
3107 {
3108    return anv_get_format_aspect(devinfo, vk_format, aspect, tiling).isl_format;
3109 }
3110 
3111 extern VkFormat
3112 vk_format_from_android(unsigned android_format, unsigned android_usage);
3113 
3114 static inline struct isl_swizzle
anv_swizzle_for_render(struct isl_swizzle swizzle)3115 anv_swizzle_for_render(struct isl_swizzle swizzle)
3116 {
3117    /* Sometimes the swizzle will have alpha map to one.  We do this to fake
3118     * RGB as RGBA for texturing
3119     */
3120    assert(swizzle.a == ISL_CHANNEL_SELECT_ONE ||
3121           swizzle.a == ISL_CHANNEL_SELECT_ALPHA);
3122 
3123    /* But it doesn't matter what we render to that channel */
3124    swizzle.a = ISL_CHANNEL_SELECT_ALPHA;
3125 
3126    return swizzle;
3127 }
3128 
3129 void
3130 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm);
3131 
3132 /**
3133  * Describes how each part of anv_image will be bound to memory.
3134  */
3135 struct anv_image_memory_range {
3136    /**
3137     * Disjoint bindings into which each portion of the image will be bound.
3138     *
3139     * Binding images to memory can be complicated and invold binding different
3140     * portions of the image to different memory objects or regions.  For most
3141     * images, everything lives in the MAIN binding and gets bound by
3142     * vkBindImageMemory.  For disjoint multi-planar images, each plane has
3143     * a unique, disjoint binding and gets bound by vkBindImageMemory2 with
3144     * VkBindImagePlaneMemoryInfo.  There may also exist bits of memory which are
3145     * implicit or driver-managed and live in special-case bindings.
3146     */
3147    enum anv_image_memory_binding {
3148       /**
3149        * Used if and only if image is not multi-planar disjoint. Bound by
3150        * vkBindImageMemory2 without VkBindImagePlaneMemoryInfo.
3151        */
3152       ANV_IMAGE_MEMORY_BINDING_MAIN,
3153 
3154       /**
3155        * Used if and only if image is multi-planar disjoint.  Bound by
3156        * vkBindImageMemory2 with VkBindImagePlaneMemoryInfo.
3157        */
3158       ANV_IMAGE_MEMORY_BINDING_PLANE_0,
3159       ANV_IMAGE_MEMORY_BINDING_PLANE_1,
3160       ANV_IMAGE_MEMORY_BINDING_PLANE_2,
3161 
3162       /**
3163        * Driver-private bo. In special cases we may store the aux surface and/or
3164        * aux state in this binding.
3165        */
3166       ANV_IMAGE_MEMORY_BINDING_PRIVATE,
3167 
3168       /** Sentinel */
3169       ANV_IMAGE_MEMORY_BINDING_END,
3170    } binding;
3171 
3172    /**
3173     * Offset is relative to the start of the binding created by
3174     * vkBindImageMemory, not to the start of the bo.
3175     */
3176    uint64_t offset;
3177 
3178    uint64_t size;
3179    uint32_t alignment;
3180 };
3181 
3182 /**
3183  * Subsurface of an anv_image.
3184  */
3185 struct anv_surface {
3186    struct isl_surf isl;
3187    struct anv_image_memory_range memory_range;
3188 };
3189 
3190 static inline bool MUST_CHECK
anv_surface_is_valid(const struct anv_surface * surface)3191 anv_surface_is_valid(const struct anv_surface *surface)
3192 {
3193    return surface->isl.size_B > 0 && surface->memory_range.size > 0;
3194 }
3195 
3196 struct anv_image {
3197    struct vk_image vk;
3198 
3199    uint32_t n_planes;
3200 
3201    /**
3202     * Image has multi-planar format and was created with
3203     * VK_IMAGE_CREATE_DISJOINT_BIT.
3204     */
3205    bool disjoint;
3206 
3207    /**
3208     * Image is a WSI image
3209     */
3210    bool from_wsi;
3211 
3212    /**
3213     * Image was imported from an struct AHardwareBuffer.  We have to delay
3214     * final image creation until bind time.
3215     */
3216    bool from_ahb;
3217 
3218    /**
3219     * Image was imported from gralloc with VkNativeBufferANDROID. The gralloc bo
3220     * must be released when the image is destroyed.
3221     */
3222    bool from_gralloc;
3223 
3224    /**
3225     * The memory bindings created by vkCreateImage and vkBindImageMemory.
3226     *
3227     * For details on the image's memory layout, see check_memory_bindings().
3228     *
3229     * vkCreateImage constructs the `memory_range` for each
3230     * anv_image_memory_binding.  After vkCreateImage, each binding is valid if
3231     * and only if `memory_range::size > 0`.
3232     *
3233     * vkBindImageMemory binds each valid `memory_range` to an `address`.
3234     * Usually, the app will provide the address via the parameters of
3235     * vkBindImageMemory.  However, special-case bindings may be bound to
3236     * driver-private memory.
3237     */
3238    struct anv_image_binding {
3239       struct anv_image_memory_range memory_range;
3240       struct anv_address address;
3241    } bindings[ANV_IMAGE_MEMORY_BINDING_END];
3242 
3243    /**
3244     * Image subsurfaces
3245     *
3246     * For each foo, anv_image::planes[x].surface is valid if and only if
3247     * anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane()
3248     * to figure the number associated with a given aspect.
3249     *
3250     * The hardware requires that the depth buffer and stencil buffer be
3251     * separate surfaces.  From Vulkan's perspective, though, depth and stencil
3252     * reside in the same VkImage.  To satisfy both the hardware and Vulkan, we
3253     * allocate the depth and stencil buffers as separate surfaces in the same
3254     * bo.
3255     */
3256    struct anv_image_plane {
3257       struct anv_surface primary_surface;
3258 
3259       /**
3260        * A surface which shadows the main surface and may have different
3261        * tiling. This is used for sampling using a tiling that isn't supported
3262        * for other operations.
3263        */
3264       struct anv_surface shadow_surface;
3265 
3266       /**
3267        * The base aux usage for this image.  For color images, this can be
3268        * either CCS_E or CCS_D depending on whether or not we can reliably
3269        * leave CCS on all the time.
3270        */
3271       enum isl_aux_usage aux_usage;
3272 
3273       struct anv_surface aux_surface;
3274 
3275       /** Location of the fast clear state.  */
3276       struct anv_image_memory_range fast_clear_memory_range;
3277 
3278       /**
3279        * Whether this image can be fast cleared with non-zero clear colors.
3280        * This can happen with mutable images when formats of different bit
3281        * sizes per components are used.
3282        *
3283        * On Gfx9+, because the clear colors are stored as a 4 components 32bit
3284        * values, we can clear in R16G16_UNORM (store 2 16bit values in the
3285        * components 0 & 1 of the clear color) and then draw in R32_UINT which
3286        * would interpret the clear color as a single component value, using
3287        * only the first 16bit component of the previous written clear color.
3288        *
3289        * On Gfx7/7.5/8, only CC_ZERO/CC_ONE clear colors are supported, this
3290        * boolean will prevent the usage of CC_ONE.
3291        */
3292       bool can_non_zero_fast_clear;
3293    } planes[3];
3294 };
3295 
3296 static inline bool
anv_image_is_externally_shared(const struct anv_image * image)3297 anv_image_is_externally_shared(const struct anv_image *image)
3298 {
3299    return image->vk.drm_format_mod != DRM_FORMAT_MOD_INVALID ||
3300           image->vk.external_handle_types != 0;
3301 }
3302 
3303 static inline bool
anv_image_has_private_binding(const struct anv_image * image)3304 anv_image_has_private_binding(const struct anv_image *image)
3305 {
3306    const struct anv_image_binding private_binding =
3307       image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE];
3308    return private_binding.memory_range.size != 0;
3309 }
3310 
3311 /* The ordering of this enum is important */
3312 enum anv_fast_clear_type {
3313    /** Image does not have/support any fast-clear blocks */
3314    ANV_FAST_CLEAR_NONE = 0,
3315    /** Image has/supports fast-clear but only to the default value */
3316    ANV_FAST_CLEAR_DEFAULT_VALUE = 1,
3317    /** Image has/supports fast-clear with an arbitrary fast-clear value */
3318    ANV_FAST_CLEAR_ANY = 2,
3319 };
3320 
3321 /**
3322  * Return the aspect's _format_ plane, not its _memory_ plane (using the
3323  * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a
3324  * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain
3325  * VK_IMAGE_ASPECT_MEMORY_PLANE_* .
3326  */
3327 static inline uint32_t
anv_image_aspect_to_plane(const struct anv_image * image,VkImageAspectFlagBits aspect)3328 anv_image_aspect_to_plane(const struct anv_image *image,
3329                           VkImageAspectFlagBits aspect)
3330 {
3331    return anv_aspect_to_plane(image->vk.aspects, aspect);
3332 }
3333 
3334 /* Returns the number of auxiliary buffer levels attached to an image. */
3335 static inline uint8_t
anv_image_aux_levels(const struct anv_image * const image,VkImageAspectFlagBits aspect)3336 anv_image_aux_levels(const struct anv_image * const image,
3337                      VkImageAspectFlagBits aspect)
3338 {
3339    uint32_t plane = anv_image_aspect_to_plane(image, aspect);
3340    if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
3341       return 0;
3342 
3343    return image->vk.mip_levels;
3344 }
3345 
3346 /* Returns the number of auxiliary buffer layers attached to an image. */
3347 static inline uint32_t
anv_image_aux_layers(const struct anv_image * const image,VkImageAspectFlagBits aspect,const uint8_t miplevel)3348 anv_image_aux_layers(const struct anv_image * const image,
3349                      VkImageAspectFlagBits aspect,
3350                      const uint8_t miplevel)
3351 {
3352    assert(image);
3353 
3354    /* The miplevel must exist in the main buffer. */
3355    assert(miplevel < image->vk.mip_levels);
3356 
3357    if (miplevel >= anv_image_aux_levels(image, aspect)) {
3358       /* There are no layers with auxiliary data because the miplevel has no
3359        * auxiliary data.
3360        */
3361       return 0;
3362    }
3363 
3364    return MAX2(image->vk.array_layers, image->vk.extent.depth >> miplevel);
3365 }
3366 
3367 static inline struct anv_address MUST_CHECK
anv_image_address(const struct anv_image * image,const struct anv_image_memory_range * mem_range)3368 anv_image_address(const struct anv_image *image,
3369                   const struct anv_image_memory_range *mem_range)
3370 {
3371    const struct anv_image_binding *binding = &image->bindings[mem_range->binding];
3372    assert(binding->memory_range.offset == 0);
3373 
3374    if (mem_range->size == 0)
3375       return ANV_NULL_ADDRESS;
3376 
3377    return anv_address_add(binding->address, mem_range->offset);
3378 }
3379 
3380 static inline struct anv_address
anv_image_get_clear_color_addr(UNUSED const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)3381 anv_image_get_clear_color_addr(UNUSED const struct anv_device *device,
3382                                const struct anv_image *image,
3383                                VkImageAspectFlagBits aspect)
3384 {
3385    assert(image->vk.aspects & (VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV |
3386                                VK_IMAGE_ASPECT_DEPTH_BIT));
3387 
3388    uint32_t plane = anv_image_aspect_to_plane(image, aspect);
3389    const struct anv_image_memory_range *mem_range =
3390       &image->planes[plane].fast_clear_memory_range;
3391 
3392    return anv_image_address(image, mem_range);
3393 }
3394 
3395 static inline struct anv_address
anv_image_get_fast_clear_type_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)3396 anv_image_get_fast_clear_type_addr(const struct anv_device *device,
3397                                    const struct anv_image *image,
3398                                    VkImageAspectFlagBits aspect)
3399 {
3400    struct anv_address addr =
3401       anv_image_get_clear_color_addr(device, image, aspect);
3402 
3403    const unsigned clear_color_state_size = device->info->ver >= 10 ?
3404       device->isl_dev.ss.clear_color_state_size :
3405       device->isl_dev.ss.clear_value_size;
3406    return anv_address_add(addr, clear_color_state_size);
3407 }
3408 
3409 /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
3410 static inline bool
anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,const struct anv_image * image)3411 anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,
3412                         const struct anv_image *image)
3413 {
3414    if (!(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
3415       return false;
3416 
3417    /* For Gfx8-11, there are some restrictions around sampling from HiZ.
3418     * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
3419     * say:
3420     *
3421     *    "If this field is set to AUX_HIZ, Number of Multisamples must
3422     *    be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
3423     */
3424    if (image->vk.image_type == VK_IMAGE_TYPE_3D)
3425       return false;
3426 
3427    /* Allow this feature on BDW even though it is disabled in the BDW devinfo
3428     * struct. There's documentation which suggests that this feature actually
3429     * reduces performance on BDW, but it has only been observed to help so
3430     * far. Sampling fast-cleared blocks on BDW must also be handled with care
3431     * (see depth_stencil_attachment_compute_aux_usage() for more info).
3432     */
3433    if (devinfo->ver != 8 && !devinfo->has_sample_with_hiz)
3434       return false;
3435 
3436    return image->vk.samples == 1;
3437 }
3438 
3439 void
3440 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
3441                                   const struct anv_image *image,
3442                                   VkImageAspectFlagBits aspect,
3443                                   enum isl_aux_usage aux_usage,
3444                                   uint32_t level,
3445                                   uint32_t base_layer,
3446                                   uint32_t layer_count);
3447 
3448 void
3449 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
3450                       const struct anv_image *image,
3451                       VkImageAspectFlagBits aspect,
3452                       enum isl_aux_usage aux_usage,
3453                       enum isl_format format, struct isl_swizzle swizzle,
3454                       uint32_t level, uint32_t base_layer, uint32_t layer_count,
3455                       VkRect2D area, union isl_color_value clear_color);
3456 void
3457 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
3458                               const struct anv_image *image,
3459                               VkImageAspectFlags aspects,
3460                               enum isl_aux_usage depth_aux_usage,
3461                               uint32_t level,
3462                               uint32_t base_layer, uint32_t layer_count,
3463                               VkRect2D area,
3464                               float depth_value, uint8_t stencil_value);
3465 void
3466 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
3467                        const struct anv_image *src_image,
3468                        enum isl_aux_usage src_aux_usage,
3469                        uint32_t src_level, uint32_t src_base_layer,
3470                        const struct anv_image *dst_image,
3471                        enum isl_aux_usage dst_aux_usage,
3472                        uint32_t dst_level, uint32_t dst_base_layer,
3473                        VkImageAspectFlagBits aspect,
3474                        uint32_t src_x, uint32_t src_y,
3475                        uint32_t dst_x, uint32_t dst_y,
3476                        uint32_t width, uint32_t height,
3477                        uint32_t layer_count,
3478                        enum blorp_filter filter);
3479 void
3480 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
3481                  const struct anv_image *image,
3482                  VkImageAspectFlagBits aspect, uint32_t level,
3483                  uint32_t base_layer, uint32_t layer_count,
3484                  enum isl_aux_op hiz_op);
3485 void
3486 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
3487                     const struct anv_image *image,
3488                     VkImageAspectFlags aspects,
3489                     uint32_t level,
3490                     uint32_t base_layer, uint32_t layer_count,
3491                     VkRect2D area, uint8_t stencil_value);
3492 void
3493 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
3494                  const struct anv_image *image,
3495                  enum isl_format format, struct isl_swizzle swizzle,
3496                  VkImageAspectFlagBits aspect,
3497                  uint32_t base_layer, uint32_t layer_count,
3498                  enum isl_aux_op mcs_op, union isl_color_value *clear_value,
3499                  bool predicate);
3500 void
3501 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
3502                  const struct anv_image *image,
3503                  enum isl_format format, struct isl_swizzle swizzle,
3504                  VkImageAspectFlagBits aspect, uint32_t level,
3505                  uint32_t base_layer, uint32_t layer_count,
3506                  enum isl_aux_op ccs_op, union isl_color_value *clear_value,
3507                  bool predicate);
3508 
3509 void
3510 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
3511                          const struct anv_image *image,
3512                          VkImageAspectFlagBits aspect,
3513                          uint32_t base_level, uint32_t level_count,
3514                          uint32_t base_layer, uint32_t layer_count);
3515 
3516 enum isl_aux_state ATTRIBUTE_PURE
3517 anv_layout_to_aux_state(const struct intel_device_info * const devinfo,
3518                         const struct anv_image *image,
3519                         const VkImageAspectFlagBits aspect,
3520                         const VkImageLayout layout);
3521 
3522 enum isl_aux_usage ATTRIBUTE_PURE
3523 anv_layout_to_aux_usage(const struct intel_device_info * const devinfo,
3524                         const struct anv_image *image,
3525                         const VkImageAspectFlagBits aspect,
3526                         const VkImageUsageFlagBits usage,
3527                         const VkImageLayout layout);
3528 
3529 enum anv_fast_clear_type ATTRIBUTE_PURE
3530 anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo,
3531                               const struct anv_image * const image,
3532                               const VkImageAspectFlagBits aspect,
3533                               const VkImageLayout layout);
3534 
3535 static inline bool
anv_image_aspects_compatible(VkImageAspectFlags aspects1,VkImageAspectFlags aspects2)3536 anv_image_aspects_compatible(VkImageAspectFlags aspects1,
3537                              VkImageAspectFlags aspects2)
3538 {
3539    if (aspects1 == aspects2)
3540       return true;
3541 
3542    /* Only 1 color aspects are compatibles. */
3543    if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
3544        (aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
3545        util_bitcount(aspects1) == util_bitcount(aspects2))
3546       return true;
3547 
3548    return false;
3549 }
3550 
3551 struct anv_image_view {
3552    struct vk_image_view vk;
3553 
3554    const struct anv_image *image; /**< VkImageViewCreateInfo::image */
3555 
3556    unsigned n_planes;
3557    struct {
3558       uint32_t image_plane;
3559 
3560       struct isl_view isl;
3561 
3562       /**
3563        * RENDER_SURFACE_STATE when using image as a sampler surface with an
3564        * image layout of SHADER_READ_ONLY_OPTIMAL or
3565        * DEPTH_STENCIL_READ_ONLY_OPTIMAL.
3566        */
3567       struct anv_surface_state optimal_sampler_surface_state;
3568 
3569       /**
3570        * RENDER_SURFACE_STATE when using image as a sampler surface with an
3571        * image layout of GENERAL.
3572        */
3573       struct anv_surface_state general_sampler_surface_state;
3574 
3575       /**
3576        * RENDER_SURFACE_STATE when using image as a storage image. Separate
3577        * states for vanilla (with the original format) and one which has been
3578        * lowered to a format suitable for reading.  This may be a raw surface
3579        * in extreme cases or simply a surface with a different format where we
3580        * expect some conversion to be done in the shader.
3581        */
3582       struct anv_surface_state storage_surface_state;
3583       struct anv_surface_state lowered_storage_surface_state;
3584 
3585       struct isl_image_param lowered_storage_image_param;
3586    } planes[3];
3587 };
3588 
3589 enum anv_image_view_state_flags {
3590    ANV_IMAGE_VIEW_STATE_STORAGE_LOWERED      = (1 << 0),
3591    ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL      = (1 << 1),
3592 };
3593 
3594 void anv_image_fill_surface_state(struct anv_device *device,
3595                                   const struct anv_image *image,
3596                                   VkImageAspectFlagBits aspect,
3597                                   const struct isl_view *view,
3598                                   isl_surf_usage_flags_t view_usage,
3599                                   enum isl_aux_usage aux_usage,
3600                                   const union isl_color_value *clear_color,
3601                                   enum anv_image_view_state_flags flags,
3602                                   struct anv_surface_state *state_inout,
3603                                   struct isl_image_param *image_param_out);
3604 
3605 struct anv_image_create_info {
3606    const VkImageCreateInfo *vk_info;
3607 
3608    /** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */
3609    isl_tiling_flags_t isl_tiling_flags;
3610 
3611    /** These flags will be added to any derived from VkImageCreateInfo. */
3612    isl_surf_usage_flags_t isl_extra_usage_flags;
3613 };
3614 
3615 VkResult anv_image_init(struct anv_device *device, struct anv_image *image,
3616                         struct anv_image_create_info *create_info);
3617 
3618 void anv_image_finish(struct anv_image *image);
3619 
3620 void anv_image_get_memory_requirements(struct anv_device *device,
3621                                        struct anv_image *image,
3622                                        VkImageAspectFlags aspects,
3623                                        VkMemoryRequirements2 *pMemoryRequirements);
3624 
3625 enum isl_format
3626 anv_isl_format_for_descriptor_type(const struct anv_device *device,
3627                                    VkDescriptorType type);
3628 
3629 static inline uint32_t
anv_rasterization_aa_mode(VkPolygonMode raster_mode,VkLineRasterizationModeEXT line_mode)3630 anv_rasterization_aa_mode(VkPolygonMode raster_mode,
3631                           VkLineRasterizationModeEXT line_mode)
3632 {
3633    if (raster_mode == VK_POLYGON_MODE_LINE &&
3634        line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT)
3635       return true;
3636    return false;
3637 }
3638 
3639 VkFormatFeatureFlags2
3640 anv_get_image_format_features2(const struct intel_device_info *devinfo,
3641                                VkFormat vk_format,
3642                                const struct anv_format *anv_format,
3643                                VkImageTiling vk_tiling,
3644                                const struct isl_drm_modifier_info *isl_mod_info);
3645 
3646 void anv_fill_buffer_surface_state(struct anv_device *device,
3647                                    struct anv_state state,
3648                                    enum isl_format format,
3649                                    struct isl_swizzle swizzle,
3650                                    isl_surf_usage_flags_t usage,
3651                                    struct anv_address address,
3652                                    uint32_t range, uint32_t stride);
3653 
3654 
3655 /* Haswell border color is a bit of a disaster.  Float and unorm formats use a
3656  * straightforward 32-bit float color in the first 64 bytes.  Instead of using
3657  * a nice float/integer union like Gfx8+, Haswell specifies the integer border
3658  * color as a separate entry /after/ the float color.  The layout of this entry
3659  * also depends on the format's bpp (with extra hacks for RG32), and overlaps.
3660  *
3661  * Since we don't know the format/bpp, we can't make any of the border colors
3662  * containing '1' work for all formats, as it would be in the wrong place for
3663  * some of them.  We opt to make 32-bit integers work as this seems like the
3664  * most common option.  Fortunately, transparent black works regardless, as
3665  * all zeroes is the same in every bit-size.
3666  */
3667 struct hsw_border_color {
3668    float float32[4];
3669    uint32_t _pad0[12];
3670    uint32_t uint32[4];
3671    uint32_t _pad1[108];
3672 };
3673 
3674 struct gfx8_border_color {
3675    union {
3676       float float32[4];
3677       uint32_t uint32[4];
3678    };
3679    /* Pad out to 64 bytes */
3680    uint32_t _pad[12];
3681 };
3682 
3683 struct anv_sampler {
3684    struct vk_object_base        base;
3685 
3686    uint32_t                     state[3][4];
3687    uint32_t                     n_planes;
3688    struct vk_ycbcr_conversion  *conversion;
3689 
3690    /* Blob of sampler state data which is guaranteed to be 32-byte aligned
3691     * and with a 32-byte stride for use as bindless samplers.
3692     */
3693    struct anv_state             bindless_state;
3694 
3695    struct anv_state             custom_border_color;
3696 };
3697 
3698 #define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
3699 
3700 struct anv_query_pool {
3701    struct vk_object_base                        base;
3702 
3703    VkQueryType                                  type;
3704    VkQueryPipelineStatisticFlags                pipeline_statistics;
3705    /** Stride between slots, in bytes */
3706    uint32_t                                     stride;
3707    /** Number of slots in this query pool */
3708    uint32_t                                     slots;
3709    struct anv_bo *                              bo;
3710 
3711    /* KHR perf queries : */
3712    uint32_t                                     pass_size;
3713    uint32_t                                     data_offset;
3714    uint32_t                                     snapshot_size;
3715    uint32_t                                     n_counters;
3716    struct intel_perf_counter_pass                *counter_pass;
3717    uint32_t                                     n_passes;
3718    struct intel_perf_query_info                 **pass_query;
3719 };
3720 
khr_perf_query_preamble_offset(const struct anv_query_pool * pool,uint32_t pass)3721 static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_pool *pool,
3722                                                       uint32_t pass)
3723 {
3724    return pool->pass_size * pass + 8;
3725 }
3726 
3727 void
3728 anv_dump_pipe_bits(enum anv_pipe_bits bits);
3729 
3730 static inline void
anv_add_pending_pipe_bits(struct anv_cmd_buffer * cmd_buffer,enum anv_pipe_bits bits,const char * reason)3731 anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer,
3732                           enum anv_pipe_bits bits,
3733                           const char* reason)
3734 {
3735    cmd_buffer->state.pending_pipe_bits |= bits;
3736    if (INTEL_DEBUG(DEBUG_PIPE_CONTROL) && bits)
3737    {
3738       fputs("pc: add ", stderr);
3739       anv_dump_pipe_bits(bits);
3740       fprintf(stderr, "reason: %s\n", reason);
3741    }
3742 }
3743 
3744 struct anv_performance_configuration_intel {
3745    struct vk_object_base      base;
3746 
3747    struct intel_perf_registers *register_config;
3748 
3749    uint64_t                   config_id;
3750 };
3751 
3752 void anv_physical_device_init_perf(struct anv_physical_device *device, int fd);
3753 void anv_device_perf_init(struct anv_device *device);
3754 void anv_perf_write_pass_results(struct intel_perf_config *perf,
3755                                  struct anv_query_pool *pool, uint32_t pass,
3756                                  const struct intel_perf_query_result *accumulated_results,
3757                                  union VkPerformanceCounterResultKHR *results);
3758 
3759 /* Use to emit a series of memcpy operations */
3760 struct anv_memcpy_state {
3761    struct anv_device *device;
3762    struct anv_batch *batch;
3763 
3764    struct anv_vb_cache_range vb_bound;
3765    struct anv_vb_cache_range vb_dirty;
3766 };
3767 
3768 struct anv_utrace_flush_copy {
3769    /* Needs to be the first field */
3770    struct intel_ds_flush_data ds;
3771 
3772    /* Batch stuff to implement of copy of timestamps recorded in another
3773     * buffer.
3774     */
3775    struct anv_reloc_list relocs;
3776    struct anv_batch batch;
3777    struct anv_bo *batch_bo;
3778 
3779    /* Buffer of 64bits timestamps */
3780    struct anv_bo *trace_bo;
3781 
3782    /* Syncobj to be signaled when the batch completes */
3783    struct vk_sync *sync;
3784 
3785    /* Queue on which all the recorded traces are submitted */
3786    struct anv_queue *queue;
3787 
3788    struct anv_memcpy_state memcpy_state;
3789 };
3790 
3791 void anv_device_utrace_init(struct anv_device *device);
3792 void anv_device_utrace_finish(struct anv_device *device);
3793 VkResult
3794 anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
3795                                     uint32_t cmd_buffer_count,
3796                                     struct anv_cmd_buffer **cmd_buffers,
3797                                     struct anv_utrace_flush_copy **out_flush_data);
3798 
3799 #ifdef HAVE_PERFETTO
3800 void anv_perfetto_init(void);
3801 uint64_t anv_perfetto_begin_submit(struct anv_queue *queue);
3802 void anv_perfetto_end_submit(struct anv_queue *queue, uint32_t submission_id,
3803                              uint64_t start_ts);
3804 #else
anv_perfetto_init(void)3805 static inline void anv_perfetto_init(void)
3806 {
3807 }
anv_perfetto_begin_submit(struct anv_queue * queue)3808 static inline uint64_t anv_perfetto_begin_submit(struct anv_queue *queue)
3809 {
3810    return 0;
3811 }
anv_perfetto_end_submit(struct anv_queue * queue,uint32_t submission_id,uint64_t start_ts)3812 static inline void anv_perfetto_end_submit(struct anv_queue *queue,
3813                                            uint32_t submission_id,
3814                                            uint64_t start_ts)
3815 {}
3816 #endif
3817 
3818 
3819 #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
3820    VK_FROM_HANDLE(__anv_type, __name, __handle)
3821 
3822 VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, vk.base, VkCommandBuffer,
3823                        VK_OBJECT_TYPE_COMMAND_BUFFER)
3824 VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
3825 VK_DEFINE_HANDLE_CASTS(anv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
3826 VK_DEFINE_HANDLE_CASTS(anv_physical_device, vk.base, VkPhysicalDevice,
3827                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
3828 VK_DEFINE_HANDLE_CASTS(anv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
3829 
3830 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, vk.base, VkBuffer,
3831                                VK_OBJECT_TYPE_BUFFER)
3832 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, base, VkBufferView,
3833                                VK_OBJECT_TYPE_BUFFER_VIEW)
3834 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool,
3835                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
3836 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet,
3837                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
3838 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base,
3839                                VkDescriptorSetLayout,
3840                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
3841 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, base, VkDeviceMemory,
3842                                VK_OBJECT_TYPE_DEVICE_MEMORY)
3843 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
3844 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
3845 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, vk.base, VkImageView,
3846                                VK_OBJECT_TYPE_IMAGE_VIEW);
3847 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,
3848                                VK_OBJECT_TYPE_PIPELINE)
3849 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,
3850                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
3851 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, base, VkQueryPool,
3852                                VK_OBJECT_TYPE_QUERY_POOL)
3853 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, base, VkSampler,
3854                                VK_OBJECT_TYPE_SAMPLER)
3855 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
3856                                VkPerformanceConfigurationINTEL,
3857                                VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL)
3858 
3859 #define anv_genX(devinfo, thing) ({             \
3860    __typeof(&gfx7_##thing) genX_thing;          \
3861    switch ((devinfo)->verx10) {                 \
3862    case 70:                                     \
3863       genX_thing = &gfx7_##thing;               \
3864       break;                                    \
3865    case 75:                                     \
3866       genX_thing = &gfx75_##thing;              \
3867       break;                                    \
3868    case 80:                                     \
3869       genX_thing = &gfx8_##thing;               \
3870       break;                                    \
3871    default:                                     \
3872       unreachable("Unknown hardware generation"); \
3873    }                                            \
3874    genX_thing;                                  \
3875 })
3876 
3877 /* Gen-specific function declarations */
3878 #ifdef genX
3879 #  include "anv_genX.h"
3880 #else
3881 #  define genX(x) gfx7_##x
3882 #  include "anv_genX.h"
3883 #  undef genX
3884 #  define genX(x) gfx75_##x
3885 #  include "anv_genX.h"
3886 #  undef genX
3887 #  define genX(x) gfx8_##x
3888 #  include "anv_genX.h"
3889 #  undef genX
3890 #endif
3891 
3892 #endif /* ANV_PRIVATE_H */
3893