xref: /aosp_15_r20/external/mesa3d/src/broadcom/vulkan/v3dv_private.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * based in part on anv driver which is:
5  * Copyright © 2015 Intel Corporation
6  *
7  * based in part on radv driver which is:
8  * Copyright © 2016 Red Hat.
9  * Copyright © 2016 Bas Nieuwenhuizen
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28  * IN THE SOFTWARE.
29  */
30 #ifndef V3DV_PRIVATE_H
31 #define V3DV_PRIVATE_H
32 
33 #include <stdio.h>
34 #include <string.h>
35 #include <vulkan/vulkan.h>
36 #include <vulkan/vk_icd.h>
37 #include <vk_enum_to_str.h>
38 
39 #include "vk_descriptor_update_template.h"
40 #include "vk_device.h"
41 #include "vk_device_memory.h"
42 #include "vk_format.h"
43 #include "vk_instance.h"
44 #include "vk_image.h"
45 #include "vk_log.h"
46 #include "vk_physical_device.h"
47 #include "vk_shader_module.h"
48 #include "vk_sync.h"
49 #include "vk_sync_timeline.h"
50 #include "vk_util.h"
51 #include "vk_ycbcr_conversion.h"
52 
53 #include "vk_command_buffer.h"
54 #include "vk_command_pool.h"
55 #include "vk_queue.h"
56 #include "vk_pipeline.h"
57 
58 #include <xf86drm.h>
59 
60 #ifdef HAVE_VALGRIND
61 #include <valgrind.h>
62 #include <memcheck.h>
63 #define VG(x) x
64 #else
65 #define VG(x) ((void)0)
66 #endif
67 
68 #include "util/detect_os.h"
69 
70 #include "v3dv_limits.h"
71 
72 #include "common/v3d_device_info.h"
73 #include "common/v3d_limits.h"
74 #include "common/v3d_tiling.h"
75 #include "common/v3d_util.h"
76 
77 #include "compiler/shader_enums.h"
78 #include "compiler/spirv/nir_spirv.h"
79 
80 #include "compiler/v3d_compiler.h"
81 
82 #include "vk_debug_report.h"
83 #include "util/set.h"
84 #include "util/hash_table.h"
85 #include "util/sparse_array.h"
86 #include "util/xmlconfig.h"
87 #include "util/u_atomic.h"
88 
89 #include "v3dv_entrypoints.h"
90 #include "v3dv_bo.h"
91 
92 #include "drm-uapi/v3d_drm.h"
93 
94 #include "vk_alloc.h"
95 #include "simulator/v3d_simulator.h"
96 
97 #include "v3dv_cl.h"
98 
99 #include "wsi_common.h"
100 
101 /* A non-fatal assert.  Useful for debugging. */
102 #if MESA_DEBUG
103 #define v3dv_assert(x) ({ \
104    if (unlikely(!(x))) \
105       fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
106 })
107 #else
108 #define v3dv_assert(x)
109 #endif
110 
111 #define perf_debug(...) do {                       \
112    if (V3D_DBG(PERF))                            \
113       fprintf(stderr, __VA_ARGS__);                \
114 } while (0)
115 
116 struct v3dv_instance;
117 
118 struct v3d_simulator_file;
119 
120 /* Minimum required by the Vulkan 1.1 spec */
121 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)
122 
123 /* Maximum performance counters number */
124 #define V3D_MAX_PERFCNT 93
125 
126 struct v3dv_physical_device {
127    struct vk_physical_device vk;
128 
129    char *name;
130    int32_t render_fd;
131    int32_t display_fd;
132 
133    /* We need these because it is not clear how to detect
134     * valid devids in a portable way
135      */
136    bool has_primary;
137    bool has_render;
138 
139    dev_t primary_devid;
140    dev_t render_devid;
141 
142 #if USE_V3D_SIMULATOR
143    uint32_t device_id;
144 #endif
145 
146    uint8_t driver_build_sha1[20];
147    uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
148    uint8_t device_uuid[VK_UUID_SIZE];
149    uint8_t driver_uuid[VK_UUID_SIZE];
150 
151    struct vk_sync_type drm_syncobj_type;
152    struct vk_sync_timeline_type sync_timeline_type;
153    const struct vk_sync_type *sync_types[3];
154 
155    struct disk_cache *disk_cache;
156 
157    mtx_t mutex;
158 
159    struct wsi_device wsi_device;
160 
161    VkPhysicalDeviceMemoryProperties memory;
162 
163    struct v3d_device_info devinfo;
164 
165 #if USE_V3D_SIMULATOR
166    struct v3d_simulator_file *sim_file;
167 #endif
168 
169    const struct v3d_compiler *compiler;
170    uint32_t next_program_id;
171 
172    alignas(8) uint64_t heap_used;
173 
174    /* This array holds all our 'struct v3dv_bo' allocations. We use this
175     * so we can add a refcount to our BOs and check if a particular BO
176     * was already allocated in this device using its GEM handle. This is
177     * necessary to properly manage BO imports, because the kernel doesn't
178     * refcount the underlying BO memory.
179     *
180     * Specifically, when self-importing (i.e. importing a BO into the same
181     * device that created it), the kernel will give us the same BO handle
182     * for both BOs and we must only free it once when  both references are
183     * freed. Otherwise, if we are not self-importing, we get two different BO
184     * handles, and we want to free each one individually.
185     *
186     * The BOs in this map all have a refcnt with the reference counter and
187     * only self-imported BOs will ever have a refcnt > 1.
188     */
189    struct util_sparse_array bo_map;
190 
191    struct {
192       bool merge_jobs;
193    } options;
194 
195    struct {
196       bool cpu_queue;
197       bool multisync;
198       bool perfmon;
199    } caps;
200 };
201 
202 static inline struct v3dv_bo *
v3dv_device_lookup_bo(struct v3dv_physical_device * device,uint32_t handle)203 v3dv_device_lookup_bo(struct v3dv_physical_device *device, uint32_t handle)
204 {
205    return (struct v3dv_bo *) util_sparse_array_get(&device->bo_map, handle);
206 }
207 
208 VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
209 void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
210 struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain,
211                                                      uint32_t index);
212 
213 void v3dv_meta_clear_init(struct v3dv_device *device);
214 void v3dv_meta_clear_finish(struct v3dv_device *device);
215 
216 void v3dv_meta_blit_init(struct v3dv_device *device);
217 void v3dv_meta_blit_finish(struct v3dv_device *device);
218 
219 void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);
220 void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
221 
222 bool v3dv_meta_can_use_tlb(struct v3dv_image *image,
223                            uint8_t plane,
224                            uint8_t miplevel,
225                            const VkOffset3D *offset,
226                            const VkExtent3D *extent,
227                            VkFormat *compat_format);
228 
229 struct v3dv_instance {
230    struct vk_instance vk;
231 
232    bool pipeline_cache_enabled;
233    bool default_pipeline_cache_enabled;
234    bool meta_cache_enabled;
235 };
236 
237 /* FIXME: In addition to tracking the last job submitted by GPU queue (cl, csd,
238  * tfu), we still need a syncobj to track the last overall job submitted
239  * (V3DV_QUEUE_ANY) for the case we don't support multisync. Someday we can
240  * start expecting multisync to be present and drop the legacy implementation
241  * together with this V3DV_QUEUE_ANY tracker.
242  */
243 enum v3dv_queue_type {
244    V3DV_QUEUE_CL = 0,
245    V3DV_QUEUE_CSD,
246    V3DV_QUEUE_TFU,
247    V3DV_QUEUE_CPU,
248    V3DV_QUEUE_ANY,
249    V3DV_QUEUE_COUNT,
250 };
251 
252 /* For each GPU queue, we use a syncobj to track the last job submitted. We
253  * set the flag `first` to determine when we are starting a new cmd buffer
254  * batch and therefore a job submitted to a given queue will be the first in a
255  * cmd buf batch.
256  */
257 struct v3dv_last_job_sync {
258    /* If the job is the first submitted to a GPU queue in a cmd buffer batch.
259     *
260     * We use V3DV_QUEUE_{CL,CSD,TFU} both with and without multisync.
261     */
262    bool first[V3DV_QUEUE_COUNT];
263    /* Array of syncobj to track the last job submitted to a GPU queue.
264     *
265     * With multisync we use V3DV_QUEUE_{CL,CSD,TFU} to track syncobjs for each
266     * queue, but without multisync we only track the last job submitted to any
267     * queue in V3DV_QUEUE_ANY.
268     */
269    uint32_t syncs[V3DV_QUEUE_COUNT];
270 };
271 
272 struct v3dv_queue {
273    struct vk_queue vk;
274 
275    struct v3dv_device *device;
276 
277    struct v3dv_last_job_sync last_job_syncs;
278 
279    struct v3dv_job *noop_job;
280 
281    /* The last active perfmon ID to prevent mixing of counter results when a
282     * job is submitted with a different perfmon id.
283     */
284    uint32_t last_perfmon_id;
285 };
286 
287 VkResult v3dv_queue_driver_submit(struct vk_queue *vk_queue,
288                                   struct vk_queue_submit *submit);
289 
290 #define V3DV_META_BLIT_CACHE_KEY_SIZE              (4 * sizeof(uint32_t))
291 #define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
292                                                     sizeof(VkComponentMapping))
293 
294 struct v3dv_meta_color_clear_pipeline {
295    VkPipeline pipeline;
296    VkRenderPass pass;
297    bool cached;
298    uint64_t key;
299 };
300 
301 struct v3dv_meta_depth_clear_pipeline {
302    VkPipeline pipeline;
303    uint64_t key;
304 };
305 
306 struct v3dv_meta_blit_pipeline {
307    VkPipeline pipeline;
308    VkRenderPass pass;
309    VkRenderPass pass_no_load;
310    uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
311 };
312 
313 struct v3dv_meta_texel_buffer_copy_pipeline {
314    VkPipeline pipeline;
315    VkRenderPass pass;
316    VkRenderPass pass_no_load;
317    uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
318 };
319 
320 struct v3dv_pipeline_key {
321    uint8_t topology;
322    uint8_t logicop_func;
323    bool msaa;
324    bool sample_alpha_to_coverage;
325    bool sample_alpha_to_one;
326    uint8_t cbufs;
327    struct {
328       enum pipe_format format;
329       uint8_t swizzle[4];
330    } color_fmt[V3D_MAX_DRAW_BUFFERS];
331    uint8_t f32_color_rb;
332    uint32_t va_swap_rb_mask;
333    bool has_multiview;
334    bool line_smooth;
335 };
336 
337 struct v3dv_pipeline_cache_stats {
338    uint32_t miss;
339    uint32_t hit;
340    uint32_t count;
341    uint32_t on_disk_hit;
342 };
343 
344 /* Equivalent to gl_shader_stage, but including the coordinate shaders
345  *
346  * FIXME: perhaps move to common
347  */
348 enum broadcom_shader_stage {
349    BROADCOM_SHADER_VERTEX,
350    BROADCOM_SHADER_VERTEX_BIN,
351    BROADCOM_SHADER_GEOMETRY,
352    BROADCOM_SHADER_GEOMETRY_BIN,
353    BROADCOM_SHADER_FRAGMENT,
354    BROADCOM_SHADER_COMPUTE,
355 };
356 
357 #define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1)
358 
359 /* Assumes that coordinate shaders will be custom-handled by the caller */
360 static inline enum broadcom_shader_stage
gl_shader_stage_to_broadcom(gl_shader_stage stage)361 gl_shader_stage_to_broadcom(gl_shader_stage stage)
362 {
363    switch (stage) {
364    case MESA_SHADER_VERTEX:
365       return BROADCOM_SHADER_VERTEX;
366    case MESA_SHADER_GEOMETRY:
367       return BROADCOM_SHADER_GEOMETRY;
368    case MESA_SHADER_FRAGMENT:
369       return BROADCOM_SHADER_FRAGMENT;
370    case MESA_SHADER_COMPUTE:
371       return BROADCOM_SHADER_COMPUTE;
372    default:
373       unreachable("Unknown gl shader stage");
374    }
375 }
376 
377 static inline gl_shader_stage
broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)378 broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)
379 {
380    switch (stage) {
381    case BROADCOM_SHADER_VERTEX:
382    case BROADCOM_SHADER_VERTEX_BIN:
383       return MESA_SHADER_VERTEX;
384    case BROADCOM_SHADER_GEOMETRY:
385    case BROADCOM_SHADER_GEOMETRY_BIN:
386       return MESA_SHADER_GEOMETRY;
387    case BROADCOM_SHADER_FRAGMENT:
388       return MESA_SHADER_FRAGMENT;
389    case BROADCOM_SHADER_COMPUTE:
390       return MESA_SHADER_COMPUTE;
391    default:
392       unreachable("Unknown broadcom shader stage");
393    }
394 }
395 
396 static inline bool
broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)397 broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)
398 {
399    switch (stage) {
400    case BROADCOM_SHADER_VERTEX_BIN:
401    case BROADCOM_SHADER_GEOMETRY_BIN:
402       return true;
403    default:
404       return false;
405    }
406 }
407 
408 static inline bool
broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)409 broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)
410 {
411    switch (stage) {
412    case BROADCOM_SHADER_VERTEX:
413    case BROADCOM_SHADER_GEOMETRY:
414       return true;
415    default:
416       return false;
417    }
418 }
419 
420 static inline enum broadcom_shader_stage
broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)421 broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)
422 {
423    switch (stage) {
424    case BROADCOM_SHADER_VERTEX:
425       return BROADCOM_SHADER_VERTEX_BIN;
426    case BROADCOM_SHADER_GEOMETRY:
427       return BROADCOM_SHADER_GEOMETRY_BIN;
428    default:
429       unreachable("Invalid shader stage");
430    }
431 }
432 
433 static inline const char *
broadcom_shader_stage_name(enum broadcom_shader_stage stage)434 broadcom_shader_stage_name(enum broadcom_shader_stage stage)
435 {
436    switch(stage) {
437    case BROADCOM_SHADER_VERTEX_BIN:
438       return "MESA_SHADER_VERTEX_BIN";
439    case BROADCOM_SHADER_GEOMETRY_BIN:
440       return "MESA_SHADER_GEOMETRY_BIN";
441    default:
442       return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage));
443    }
444 }
445 
446 struct v3dv_pipeline_cache {
447    struct vk_object_base base;
448 
449    struct v3dv_device *device;
450    mtx_t mutex;
451 
452    struct hash_table *nir_cache;
453    struct v3dv_pipeline_cache_stats nir_stats;
454 
455    struct hash_table *cache;
456    struct v3dv_pipeline_cache_stats stats;
457 
458    /* For VK_EXT_pipeline_creation_cache_control. */
459    bool externally_synchronized;
460 };
461 
462 struct v3dv_device {
463    struct vk_device vk;
464 
465    struct v3dv_instance *instance;
466    struct v3dv_physical_device *pdevice;
467 
468    struct v3d_device_info devinfo;
469    struct v3dv_queue queue;
470 
471    /* Guards query->maybe_available and value for timestamps */
472    mtx_t query_mutex;
473 
474    /* Signaled whenever a query is ended */
475    cnd_t query_ended;
476 
477    /* Resources used for meta operations */
478    struct {
479       mtx_t mtx;
480       struct {
481          VkPipelineLayout p_layout;
482          struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */
483       } color_clear;
484       struct {
485          VkPipelineLayout p_layout;
486          struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */
487       } depth_clear;
488       struct {
489          VkDescriptorSetLayout ds_layout;
490          VkPipelineLayout p_layout;
491          struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
492       } blit;
493       struct {
494          VkDescriptorSetLayout ds_layout;
495          VkPipelineLayout p_layout;
496          struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */
497       } texel_buffer_copy;
498    } meta;
499 
500    struct v3dv_bo_cache {
501       /** List of struct v3d_bo freed, by age. */
502       struct list_head time_list;
503       /** List of struct v3d_bo freed, per size, by age. */
504       struct list_head *size_list;
505       uint32_t size_list_size;
506 
507       mtx_t lock;
508 
509       uint32_t cache_size;
510       uint32_t cache_count;
511       uint32_t max_cache_size;
512    } bo_cache;
513 
514    uint32_t bo_size;
515    uint32_t bo_count;
516 
517    /* Event handling resources.
518     *
519     * Our implementation of events uses a BO to store event state (signaled vs
520     * reset) and dispatches compute shaders to handle GPU event functions
521     * (signal, reset, wait). This struct holds all the resources required
522     * by the implementation.
523     */
524    struct {
525       mtx_t lock;
526 
527       /* BO for the event states: signaled (1) or reset (0) */
528       struct v3dv_bo *bo;
529 
530       /* We pre-allocate all the events we can fit for the size of the BO we
531        * create to track their states, where each event has an index which is
532        * basically the offset of its state in that BO. We keep a free list with
533        * the pre-allocated events that are available.
534        */
535       uint32_t event_count;
536       struct v3dv_event *events;
537       struct list_head free_list;
538 
539       /* Vulkan resources to access the event BO from shaders. We have a
540        * pipeline that sets the state of an event and another that waits on
541        * a single event. Both pipelines require access to the event state BO,
542        * for which we need to allocate a single descripot set.
543        */
544       VkBuffer buffer;
545       VkDeviceMemory mem;
546       VkDescriptorSetLayout descriptor_set_layout;
547       VkPipelineLayout pipeline_layout;
548       VkDescriptorPool descriptor_pool;
549       VkDescriptorSet descriptor_set;
550       VkPipeline set_event_pipeline;
551       VkPipeline wait_event_pipeline;
552    } events;
553 
554    /* Query handling resources.
555     *
556     * Our implementation of occlusion queries uses a BO per pool to keep track
557     * of the per-query availability state and dispatches compute shaders to
558     * handle GPU query functions that read and write that state. This struct
559     * holds Vulkan resources that can be shared across all query pools to
560     * implement this. This framework may be extended in the future to handle
561     * more query types.
562     */
563    struct {
564       VkDescriptorSetLayout buf_descriptor_set_layout;
565 
566       /* Set query availability */
567       VkPipelineLayout avail_pipeline_layout;
568       VkPipeline avail_pipeline;
569 
570       /* Reset query availability and clear occlusion counters */
571       VkPipelineLayout reset_occlusion_pipeline_layout;
572       VkPipeline reset_occlusion_pipeline;
573 
574       /* Copy query results */
575       VkPipelineLayout copy_pipeline_layout;
576       VkPipeline copy_pipeline[8];
577    } queries;
578 
579    struct v3dv_pipeline_cache default_pipeline_cache;
580 
581    /* GL_SHADER_STATE_RECORD needs to specify default attribute values. The
582     * following covers the most common case, that is all attributes format
583     * being float being float, allowing us to reuse the same BO for all
584     * pipelines matching this requirement. Pipelines that need integer
585     * attributes will create their own BO.
586     *
587     * Note that since v71 the default attribute values are not needed, so this
588     * can be NULL.
589     */
590    struct v3dv_bo *default_attribute_float;
591 
592    void *device_address_mem_ctx;
593    struct util_dynarray device_address_bo_list; /* Array of struct v3dv_bo * */
594 };
595 
596 struct v3dv_device_memory {
597    struct vk_device_memory vk;
598 
599    struct v3dv_bo *bo;
600    const VkMemoryType *type;
601    bool is_for_wsi;
602    bool is_for_device_address;
603 };
604 
605 #define V3D_OUTPUT_IMAGE_FORMAT_NO 255
606 #define TEXTURE_DATA_FORMAT_NO     255
607 
608 #define V3DV_MAX_PLANE_COUNT 3
609 struct v3dv_format_plane {
610    /* One of V3D42_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
611    uint8_t rt_type;
612 
613    /* One of V3D42_TEXTURE_DATA_FORMAT_*. */
614    uint8_t tex_type;
615 
616    /* Swizzle to apply to the RGBA shader output for storing to the tile
617     * buffer, to the RGBA tile buffer to produce shader input (for
618     * blending), and for turning the rgba8888 texture sampler return
619     * value into shader rgba values.
620     */
621    uint8_t swizzle[4];
622 
623    /* Whether the return value is 16F/I/UI or 32F/I/UI. */
624    uint8_t return_size;
625 };
626 
627 struct v3dv_format {
628    /* Non 0 plane count implies supported */
629    uint8_t plane_count;
630 
631    struct v3dv_format_plane planes[V3DV_MAX_PLANE_COUNT];
632 
633    /* If the format supports (linear) filtering when texturing. */
634    bool supports_filtering;
635 };
636 
637 /* Note that although VkImageAspectFlags would allow to combine more than one
638  * PLANE bit, for all the use cases we implement that use VkImageAspectFlags,
639  * only one plane is allowed, like for example vkCmdCopyImage:
640  *
641  *   "If srcImage has a VkFormat with two planes then for each element of
642  *    pRegions, srcSubresource.aspectMask must be VK_IMAGE_ASPECT_PLANE_0_BIT
643  *    or VK_IMAGE_ASPECT_PLANE_1_BIT"
644  *
645  */
v3dv_plane_from_aspect(VkImageAspectFlags aspect)646 static uint8_t v3dv_plane_from_aspect(VkImageAspectFlags aspect)
647 {
648    switch (aspect) {
649    case VK_IMAGE_ASPECT_COLOR_BIT:
650    case VK_IMAGE_ASPECT_DEPTH_BIT:
651    case VK_IMAGE_ASPECT_STENCIL_BIT:
652    case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
653    case VK_IMAGE_ASPECT_PLANE_0_BIT:
654    case VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT:
655       return 0;
656    case VK_IMAGE_ASPECT_PLANE_1_BIT:
657    case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
658       return 1;
659    case VK_IMAGE_ASPECT_PLANE_2_BIT:
660    case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
661       return 2;
662    default:
663       unreachable("invalid image aspect");
664    }
665 }
666 
667 struct v3d_resource_slice {
668    uint32_t offset;
669    uint32_t stride;
670    uint32_t padded_height;
671    uint32_t width;
672    uint32_t height;
673    /* Size of a single pane of the slice.  For 3D textures, there will be
674     * a number of panes equal to the minified, power-of-two-aligned
675     * depth.
676     */
677    uint32_t size;
678    uint8_t ub_pad;
679    enum v3d_tiling_mode tiling;
680    uint32_t padded_height_of_output_image_in_uif_blocks;
681 };
682 
683 bool v3dv_format_swizzle_needs_rb_swap(const uint8_t *swizzle);
684 bool v3dv_format_swizzle_needs_reverse(const uint8_t *swizzle);
685 
686 struct v3dv_image {
687    struct vk_image vk;
688 
689    const struct v3dv_format *format;
690    bool tiled;
691 
692    uint8_t plane_count;
693 
694    /* If 0, this is a multi-plane image with use disjoint memory, where each
695     * plane binds a different device memory. Otherwise, all the planes share
696     * the same device memory and this stores the total size of the image in
697     * bytes.
698     */
699    uint32_t non_disjoint_size;
700 
701    struct {
702       uint32_t cpp;
703 
704       struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
705       /* Total size of the plane in bytes. */
706       uint64_t size;
707       uint32_t cube_map_stride;
708 
709       /* If not using disjoint memory, mem and mem_offset is the same for all
710        * planes, in which case mem_offset is the offset of plane 0.
711        */
712       struct v3dv_device_memory *mem;
713       VkDeviceSize mem_offset;
714       uint32_t alignment;
715 
716       /* Pre-subsampled per plane width and height
717        */
718       uint32_t width;
719       uint32_t height;
720 
721       /* Even if we can get it from the parent image format, we keep the
722        * format here for convenience
723        */
724       VkFormat vk_format;
725    } planes[V3DV_MAX_PLANE_COUNT];
726 
727    /* Used only when sampling a linear texture (which V3D doesn't support).
728     * This holds a tiled copy of the image we can use for that purpose.
729     */
730    struct v3dv_image *shadow;
731 };
732 
733 VkResult
734 v3dv_image_init(struct v3dv_device *device,
735                 const VkImageCreateInfo *pCreateInfo,
736                 const VkAllocationCallbacks *pAllocator,
737                 struct v3dv_image *image);
738 
739 VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
740 
741 static uint32_t
v3dv_image_aspect_to_plane(const struct v3dv_image * image,VkImageAspectFlagBits aspect)742 v3dv_image_aspect_to_plane(const struct v3dv_image *image,
743                            VkImageAspectFlagBits aspect)
744 {
745    assert(util_bitcount(aspect) == 1 && (aspect & image->vk.aspects));
746 
747    /* Because we always put image and view planes in aspect-bit-order, the
748     * plane index is the number of bits in the image aspect before aspect.
749     */
750    return util_bitcount(image->vk.aspects & (aspect - 1));
751 }
752 
753 /* Pre-generating packets needs to consider changes in packet sizes across hw
754  * versions. Keep things simple and allocate enough space for any supported
755  * version. We ensure the size is large enough through static asserts.
756  */
757 #define V3DV_TEXTURE_SHADER_STATE_LENGTH 32
758 #define V3DV_SAMPLER_STATE_LENGTH 24
759 #define V3DV_BLEND_CFG_LENGTH 5
760 #define V3DV_CFG_BITS_LENGTH 4
761 #define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36
762 #define V3DV_VCM_CACHE_SIZE_LENGTH 2
763 #define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16
764 #define V3DV_STENCIL_CFG_LENGTH 6
765 
766 struct v3dv_image_view {
767    struct vk_image_view vk;
768 
769    const struct v3dv_format *format;
770 
771    uint8_t view_swizzle[4];
772 
773    uint8_t plane_count;
774    struct {
775       uint8_t image_plane;
776 
777       bool swap_rb;
778       bool channel_reverse;
779       uint32_t internal_bpp;
780       uint32_t internal_type;
781       uint32_t offset;
782 
783       /* Precomputed swizzle (composed from the view swizzle and the format
784        * swizzle).
785        *
786        * This could be also included on the descriptor bo, but the shader state
787        * packet doesn't need it on a bo, so we can just avoid a memory copy
788        */
789       uint8_t swizzle[4];
790 
791       /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
792        * during UpdateDescriptorSets.
793        *
794        * Empirical tests show that cube arrays need a different shader state
795        * depending on whether they are used with a sampler or not, so for these
796        * we generate two states and select the one to use based on the descriptor
797        * type.
798        */
799       uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];
800    } planes[V3DV_MAX_PLANE_COUNT];
801 
802    /* Used only when sampling a linear texture (which V3D doesn't support).
803     * This would represent a view over the tiled shadow image.
804     */
805    struct v3dv_image_view *shadow;
806 };
807 
808 VkResult v3dv_create_image_view(struct v3dv_device *device,
809                                 const VkImageViewCreateInfo *pCreateInfo,
810                                 VkImageView *pView);
811 
812 uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer,
813                            uint8_t plane);
814 
815 struct v3dv_buffer {
816    struct vk_object_base base;
817 
818    VkDeviceSize size;
819    VkBufferUsageFlagBits2KHR usage;
820    uint32_t alignment;
821 
822    struct v3dv_device_memory *mem;
823    VkDeviceSize mem_offset;
824 };
825 
826 void
827 v3dv_buffer_init(struct v3dv_device *device,
828                  const VkBufferCreateInfo *pCreateInfo,
829                  struct v3dv_buffer *buffer,
830                  uint32_t alignment);
831 
832 void
833 v3dv_buffer_bind_memory(const VkBindBufferMemoryInfo *info);
834 
835 struct v3dv_buffer_view {
836    struct vk_object_base base;
837 
838    struct v3dv_buffer *buffer;
839 
840    VkFormat vk_format;
841    const struct v3dv_format *format;
842    uint32_t internal_bpp;
843    uint32_t internal_type;
844 
845    uint32_t offset;
846    uint32_t size;
847    uint32_t num_elements;
848 
849    /* Prepacked TEXTURE_SHADER_STATE. */
850    uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH];
851 };
852 
853 struct v3dv_subpass_attachment {
854    uint32_t attachment;
855    VkImageLayout layout;
856 };
857 
858 struct v3dv_subpass {
859    uint32_t input_count;
860    struct v3dv_subpass_attachment *input_attachments;
861 
862    uint32_t color_count;
863    struct v3dv_subpass_attachment *color_attachments;
864    struct v3dv_subpass_attachment *resolve_attachments;
865 
866    struct v3dv_subpass_attachment ds_attachment;
867    struct v3dv_subpass_attachment ds_resolve_attachment;
868    bool resolve_depth, resolve_stencil;
869 
870    /* If we need to emit the clear of the depth/stencil attachment using a
871     * a draw call instead of using the TLB (GFXH-1461).
872     */
873    bool do_depth_clear_with_draw;
874    bool do_stencil_clear_with_draw;
875 
876    /* Multiview */
877    uint32_t view_mask;
878 };
879 
880 struct v3dv_render_pass_attachment {
881    VkAttachmentDescription2 desc;
882 
883    uint32_t first_subpass;
884    uint32_t last_subpass;
885 
886    /* When multiview is enabled, we no longer care about when a particular
887     * attachment is first or last used in a render pass, since not all views
888     * in the attachment will meet that criteria. Instead, we need to track
889     * each individual view (layer) in each attachment and emit our stores,
890     * loads and clears accordingly.
891     */
892    struct {
893       uint32_t first_subpass;
894       uint32_t last_subpass;
895    } views[MAX_MULTIVIEW_VIEW_COUNT];
896 
897    /* If this is a multisampled attachment that is going to be resolved,
898     * whether we may be able to use the TLB hardware resolve based on the
899     * attachment format.
900     */
901    bool try_tlb_resolve;
902 };
903 
904 struct v3dv_render_pass {
905    struct vk_object_base base;
906 
907    bool multiview_enabled;
908 
909    uint32_t attachment_count;
910    struct v3dv_render_pass_attachment *attachments;
911 
912    uint32_t subpass_count;
913    struct v3dv_subpass *subpasses;
914 
915    struct v3dv_subpass_attachment *subpass_attachments;
916 };
917 
918 struct v3dv_framebuffer {
919    struct vk_object_base base;
920 
921    uint32_t width;
922    uint32_t height;
923    uint32_t layers;
924 
925    /* Typically, edge tiles in the framebuffer have padding depending on the
926     * underlying tiling layout. One consequence of this is that when the
927     * framebuffer dimensions are not aligned to tile boundaries, tile stores
928     * would still write full tiles on the edges and write to the padded area.
929     * If the framebuffer is aliasing a smaller region of a larger image, then
930     * we need to be careful with this though, as we won't have padding on the
931     * edge tiles (which typically means that we need to load the tile buffer
932     * before we store).
933     */
934    bool has_edge_padding;
935 
936    uint32_t attachment_count;
937    uint32_t color_attachment_count;
938 
939    /* Notice that elements in 'attachments' will be NULL if the framebuffer
940     * was created imageless. The driver is expected to access attachment info
941     * from the command buffer state instead.
942     */
943    struct v3dv_image_view *attachments[0];
944 };
945 
946 struct v3dv_frame_tiling {
947    uint32_t width;
948    uint32_t height;
949    uint32_t layers;
950    uint32_t render_target_count;
951    uint32_t internal_bpp;
952    uint32_t total_color_bpp;
953    bool     msaa;
954    bool     double_buffer;
955    uint32_t tile_width;
956    uint32_t tile_height;
957    uint32_t draw_tiles_x;
958    uint32_t draw_tiles_y;
959    uint32_t supertile_width;
960    uint32_t supertile_height;
961    uint32_t frame_width_in_supertiles;
962    uint32_t frame_height_in_supertiles;
963 };
964 
965 bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
966                                        const VkRect2D *area,
967                                        struct v3dv_framebuffer *fb,
968                                        struct v3dv_render_pass *pass,
969                                        uint32_t subpass_idx);
970 
971 /* Checks if we need to emit 2 initial tile clears for double buffer mode.
972  * This happens when we render at least 2 tiles, because in this mode each
973  * tile uses a different half of the tile buffer memory so we can have 2 tiles
974  * in flight (one being stored to memory and the next being rendered). In this
975  * scenario, if we emit a single initial tile clear we would only clear the
976  * first half of the tile buffer.
977  */
978 static inline bool
v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling * tiling)979 v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling *tiling)
980 {
981    return tiling->double_buffer &&
982           (tiling->draw_tiles_x > 1 || tiling->draw_tiles_y > 1 ||
983            tiling->layers > 1);
984 }
985 
986 enum v3dv_cmd_buffer_status {
987    V3DV_CMD_BUFFER_STATUS_NEW           = 0,
988    V3DV_CMD_BUFFER_STATUS_INITIALIZED   = 1,
989    V3DV_CMD_BUFFER_STATUS_RECORDING     = 2,
990    V3DV_CMD_BUFFER_STATUS_EXECUTABLE    = 3
991 };
992 
993 union v3dv_clear_value {
994    uint32_t color[4];
995    struct {
996       float z;
997       uint8_t s;
998    };
999 };
1000 
1001 struct v3dv_cmd_buffer_attachment_state {
1002    /* The original clear value as provided by the Vulkan API */
1003    VkClearValue vk_clear_value;
1004 
1005    /* The hardware clear value */
1006    union v3dv_clear_value clear_value;
1007 
1008    /* The underlying image view (from the framebuffer or, if imageless
1009     * framebuffer is used, from VkRenderPassAttachmentBeginInfo.
1010     */
1011    struct v3dv_image_view *image_view;
1012 
1013    /* If this is a multisampled attachment with a resolve operation. */
1014    bool has_resolve;
1015 
1016    /* If this is a multisampled attachment with a resolve operation,
1017     * whether we can use the TLB for the resolve.
1018     */
1019    bool use_tlb_resolve;
1020 };
1021 
1022 /* Cached values derived from Vulkan viewport/count */
1023 struct v3dv_viewport_state {
1024    float translate[MAX_VIEWPORTS][3];
1025    float scale[MAX_VIEWPORTS][3];
1026 };
1027 
1028 /* Flags for custom dirty state, that could lead to packet emission.
1029  *
1030  * Note *custom*, for all the dynamic state tracking coming from the Vulkan
1031  * API, we use the Mesa runtime framework and their predefined flags
1032  * (MESA_VK_DYNAMIC_XXX).
1033  *
1034  * Here we defined additional flags used to track dirty state.
1035  */
1036 enum v3dv_cmd_dirty_bits {
1037    V3DV_CMD_DIRTY_PIPELINE                  = 1 << 0,
1038    V3DV_CMD_DIRTY_COMPUTE_PIPELINE          = 1 << 1,
1039    V3DV_CMD_DIRTY_VERTEX_BUFFER             = 1 << 2,
1040    V3DV_CMD_DIRTY_INDEX_BUFFER              = 1 << 3,
1041    V3DV_CMD_DIRTY_DESCRIPTOR_SETS           = 1 << 4,
1042    V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS   = 1 << 5,
1043    V3DV_CMD_DIRTY_PUSH_CONSTANTS            = 1 << 6,
1044    V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO        = 1 << 7,
1045    V3DV_CMD_DIRTY_OCCLUSION_QUERY           = 1 << 8,
1046    V3DV_CMD_DIRTY_VIEW_INDEX                = 1 << 9,
1047    V3DV_CMD_DIRTY_DRAW_ID                   = 1 << 10,
1048    V3DV_CMD_DIRTY_ALL                       = (1 << 10) - 1,
1049 };
1050 
1051 struct v3dv_dynamic_state {
1052    /* FIXME: we keep some viewport info cached (translate, scale) because we
1053     * use that on more that one place. But note that translate_z and scale_z
1054     * is also used in several places, and we recompute it based on
1055     * scissor/viewport info all time. So perhaps we could do the same with the
1056     * x and y component.
1057     */
1058    struct v3dv_viewport_state viewport;
1059 
1060    /* We cache the color_write_enable as the vulkan runtime keeps a 8-bit
1061     * bitset with a bit per attachment, but in order to combine with the
1062     * color_write_masks is easier to cache a 32-bit bitset with 4 bits per
1063     * attachment.
1064     */
1065    uint32_t color_write_enable;
1066 };
1067 
1068 void v3dv_viewport_compute_xform(const VkViewport *viewport,
1069                                  float scale[3],
1070                                  float translate[3]);
1071 
1072 enum v3dv_ez_state {
1073    V3D_EZ_UNDECIDED = 0,
1074    V3D_EZ_GT_GE,
1075    V3D_EZ_LT_LE,
1076    V3D_EZ_DISABLED,
1077 };
1078 
1079 enum v3dv_job_type {
1080    V3DV_JOB_TYPE_GPU_CL = 0,
1081    V3DV_JOB_TYPE_GPU_CL_INCOMPLETE,
1082    V3DV_JOB_TYPE_GPU_TFU,
1083    V3DV_JOB_TYPE_GPU_CSD,
1084    V3DV_JOB_TYPE_CPU_RESET_QUERIES,
1085    V3DV_JOB_TYPE_CPU_END_QUERY,
1086    V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
1087    V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
1088    V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
1089 };
1090 
1091 struct v3dv_reset_query_cpu_job_info {
1092    struct v3dv_query_pool *pool;
1093    uint32_t first;
1094    uint32_t count;
1095 };
1096 
1097 struct v3dv_end_query_info {
1098    struct v3dv_query_pool *pool;
1099    uint32_t query;
1100 
1101    /* This is one unless multiview is used */
1102    uint32_t count;
1103 };
1104 
1105 struct v3dv_copy_query_results_cpu_job_info {
1106    struct v3dv_query_pool *pool;
1107    uint32_t first;
1108    uint32_t count;
1109    struct v3dv_buffer *dst;
1110    uint32_t offset;
1111    uint32_t stride;
1112    VkQueryResultFlags flags;
1113 };
1114 
1115 struct v3dv_submit_sync_info {
1116    /* List of syncs to wait before running a job */
1117    uint32_t wait_count;
1118    struct vk_sync_wait *waits;
1119 
1120    /* List of syncs to signal when all jobs complete */
1121    uint32_t signal_count;
1122    struct vk_sync_signal *signals;
1123 };
1124 
1125 struct v3dv_csd_indirect_cpu_job_info {
1126    struct v3dv_buffer *buffer;
1127    uint32_t offset;
1128    struct v3dv_job *csd_job;
1129    uint32_t wg_size;
1130    uint32_t *wg_uniform_offsets[3];
1131    bool needs_wg_uniform_rewrite;
1132 };
1133 
1134 struct v3dv_timestamp_query_cpu_job_info {
1135    struct v3dv_query_pool *pool;
1136    uint32_t query;
1137 
1138    /* This is one unless multiview is used */
1139    uint32_t count;
1140 };
1141 
1142 /* Number of perfmons required to handle all supported performance counters */
1143 #define V3DV_MAX_PERFMONS DIV_ROUND_UP(V3D_MAX_PERFCNT, \
1144                                        DRM_V3D_MAX_PERF_COUNTERS)
1145 
1146 struct v3dv_perf_query {
1147    uint32_t kperfmon_ids[V3DV_MAX_PERFMONS];
1148 
1149    /* A DRM syncobj to wait on the GPU jobs for which we are collecting
1150     * performance data.
1151     */
1152    struct vk_sync *last_job_sync;
1153 };
1154 
1155 struct v3dv_job {
1156    struct list_head list_link;
1157 
1158    /* We only create job clones when executing secondary command buffers into
1159     * primaries. These clones don't make deep copies of the original object
1160     * so we want to flag them to avoid freeing resources they don't own.
1161     */
1162    bool is_clone;
1163 
1164    /* If this is a cloned job, if it has its own BCL resource. This happens
1165     * when we suspend jobs with in command buffers with the
1166     * VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT flag.
1167     */
1168    bool clone_owns_bcl;
1169 
1170    /* VK_KHR_dynamic_rendering */
1171    bool suspending;
1172    bool resuming;
1173    struct v3dv_cl_out *suspend_branch_inst_ptr;
1174    uint32_t suspended_bcl_end;
1175 
1176    /* If the job executes on the transfer stage of the pipeline */
1177    bool is_transfer;
1178 
1179    /* VK_KHR_buffer_device_address allows shaders to use pointers that can
1180     * dereference memory in any buffer that has been flagged with
1181     * VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT. These buffers may not
1182     * be bound via descriptor sets, so we need to make sure that a job that
1183     * uses this functionality includes all these buffers in its kernel
1184     * submission.
1185     */
1186    bool uses_buffer_device_address;
1187 
1188    /* True if we have not identified anything that would be incompatible
1189     * with double-buffer (like MSAA) or that would make double-buffer mode
1190     * not efficient (like tile loads or not having any stores).
1191     */
1192    bool can_use_double_buffer;
1193 
1194    /* This structure keeps track of various scores to inform a heuristic
1195     * for double-buffer mode.
1196     */
1197    struct {
1198       /* Cost of geometry shading */
1199       uint32_t geom;
1200       /* Cost of shader rendering */
1201       uint32_t render;
1202    } double_buffer_score;
1203 
1204    /* We only need to allocate tile state for all layers if the binner
1205     * writes primitives to layers other than the first. This can only be
1206     * done using layered rendering (writing gl_Layer from a geometry shader),
1207     * so for other cases of multilayered framebuffers (typically with
1208     * meta copy/clear operations) that won't use layered rendering, we only
1209     * need one layer worth of of tile state for the binner.
1210     */
1211    bool allocate_tile_state_for_all_layers;
1212 
1213    /* A pointer to the location of the TILE_BINNING_MODE_CFG packet so we can
1214     * rewrite it to enable double-buffer mode by the time we have enough info
1215     * about the job to make that decision.
1216     */
1217    struct v3dv_cl_out *bcl_tile_binning_mode_ptr;
1218 
1219    enum v3dv_job_type type;
1220 
1221    struct v3dv_device *device;
1222 
1223    struct v3dv_cmd_buffer *cmd_buffer;
1224 
1225    struct v3dv_cl bcl;
1226    struct v3dv_cl rcl;
1227    struct v3dv_cl indirect;
1228 
1229    /* Set of all BOs referenced by the job. This will be used for making
1230     * the list of BOs that the kernel will need to have paged in to
1231     * execute our job.
1232     */
1233    struct set *bos;
1234    uint32_t bo_count;
1235    uint64_t bo_handle_mask;
1236 
1237    struct v3dv_bo *tile_alloc;
1238    struct v3dv_bo *tile_state;
1239 
1240    bool tmu_dirty_rcl;
1241 
1242    uint32_t first_subpass;
1243 
1244    /* When the current subpass is split into multiple jobs, this flag is set
1245     * to true for any jobs after the first in the same subpass.
1246     */
1247    bool is_subpass_continue;
1248 
1249    /* If this job is the last job emitted for a subpass. */
1250    bool is_subpass_finish;
1251 
1252    struct v3dv_frame_tiling frame_tiling;
1253 
1254    enum v3dv_ez_state ez_state;
1255    enum v3dv_ez_state first_ez_state;
1256 
1257    /* If we have already decided if we need to disable Early Z/S completely
1258     * for this job.
1259     */
1260    bool decided_global_ez_enable;
1261 
1262    /* If the job emitted any draw calls with Early Z/S enabled */
1263    bool has_ez_draws;
1264 
1265    /* If this job has been configured to use early Z/S clear */
1266    bool early_zs_clear;
1267 
1268    /* Number of draw calls recorded into the job */
1269    uint32_t draw_count;
1270 
1271    /* A flag indicating whether we want to flush every draw separately. This
1272     * can be used for debugging, or for cases where special circumstances
1273     * require this behavior.
1274     */
1275    bool always_flush;
1276 
1277    /* A mask of V3DV_BARRIER_* indicating the source(s) of the barrier. We
1278     * can use this to select the hw queues where we need to serialize the job.
1279     */
1280    uint8_t serialize;
1281 
1282    /* If this is a CL job, whether we should sync before binning */
1283    bool needs_bcl_sync;
1284 
1285    /* If we have emitted a (default) point size packet in this job */
1286    bool emitted_default_point_size;
1287 
1288    /* Job specs for CPU jobs */
1289    union {
1290       struct v3dv_reset_query_cpu_job_info          query_reset;
1291       struct v3dv_end_query_info                    query_end;
1292       struct v3dv_copy_query_results_cpu_job_info   query_copy_results;
1293       struct v3dv_csd_indirect_cpu_job_info         csd_indirect;
1294       struct v3dv_timestamp_query_cpu_job_info      query_timestamp;
1295    } cpu;
1296 
1297    /* Job specs for TFU jobs */
1298    struct drm_v3d_submit_tfu tfu;
1299 
1300    /* Job specs for CSD jobs */
1301    struct {
1302       struct v3dv_bo *shared_memory;
1303       uint32_t wg_count[3];
1304       uint32_t wg_base[3];
1305       struct drm_v3d_submit_csd submit;
1306    } csd;
1307 
1308    /* Perfmons with last job sync for CSD and CL jobs */
1309    struct v3dv_perf_query *perf;
1310 };
1311 
1312 void v3dv_job_init(struct v3dv_job *job,
1313                    enum v3dv_job_type type,
1314                    struct v3dv_device *device,
1315                    struct v3dv_cmd_buffer *cmd_buffer,
1316                    int32_t subpass_idx);
1317 void v3dv_job_destroy(struct v3dv_job *job);
1318 
1319 void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
1320 void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo);
1321 
1322 void v3dv_job_start_frame(struct v3dv_job *job,
1323                           uint32_t width,
1324                           uint32_t height,
1325                           uint32_t layers,
1326                           bool allocate_tile_state_for_all_layers,
1327                           bool allocate_tile_state_now,
1328                           uint32_t render_target_count,
1329                           uint8_t max_internal_bpp,
1330                           uint8_t total_color_bpp,
1331                           bool msaa);
1332 
1333 bool v3dv_job_type_is_gpu(struct v3dv_job *job);
1334 
1335 struct v3dv_job *
1336 v3dv_job_clone(struct v3dv_job *job, bool skip_bcl);
1337 
1338 struct v3dv_job *
1339 v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
1340                              struct v3dv_cmd_buffer *cmd_buffer);
1341 
1342 struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
1343                                                 enum v3dv_job_type type,
1344                                                 struct v3dv_cmd_buffer *cmd_buffer,
1345                                                 uint32_t subpass_idx);
1346 
1347 void
1348 v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
1349                                    uint32_t slot_size,
1350                                    uint32_t used_count,
1351                                    uint32_t *alloc_count,
1352                                    void **ptr);
1353 
1354 void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer,
1355                                    bool indexed, bool indirect,
1356                                    uint32_t vertex_count);
1357 
1358 bool v3dv_job_allocate_tile_state(struct v3dv_job *job);
1359 
1360 void
1361 v3dv_setup_dynamic_framebuffer(struct v3dv_cmd_buffer *cmd_buffer,
1362                                const VkRenderingInfoKHR *pRenderingInfo);
1363 
1364 void
1365 v3dv_destroy_dynamic_framebuffer(struct v3dv_cmd_buffer *cmd_buffer);
1366 
1367 void
1368 v3dv_setup_dynamic_render_pass(struct v3dv_cmd_buffer *cmd_buffer,
1369                                const VkRenderingInfoKHR *pRenderingInfo);
1370 
1371 void
1372 v3dv_setup_dynamic_render_pass_inheritance(struct v3dv_cmd_buffer *cmd_buffer,
1373                                            const VkCommandBufferInheritanceRenderingInfo *info);
1374 
1375 /* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a
1376  * cmd_buffer specific header?
1377  */
1378 struct v3dv_draw_info {
1379    uint32_t vertex_count;
1380    uint32_t instance_count;
1381    uint32_t first_vertex;
1382    uint32_t first_instance;
1383 };
1384 
1385 struct v3dv_vertex_binding {
1386    struct v3dv_buffer *buffer;
1387    VkDeviceSize offset;
1388    VkDeviceSize size;
1389 };
1390 
1391 struct v3dv_descriptor_state {
1392    struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];
1393    uint32_t valid;
1394    uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
1395 };
1396 
1397 struct v3dv_cmd_pipeline_state {
1398    struct v3dv_pipeline *pipeline;
1399 
1400    struct v3dv_descriptor_state descriptor_state;
1401 };
1402 
1403 enum {
1404    V3DV_BARRIER_GRAPHICS_BIT = (1 << 0),
1405    V3DV_BARRIER_COMPUTE_BIT  = (1 << 1),
1406    V3DV_BARRIER_TRANSFER_BIT = (1 << 2),
1407    V3DV_BARRIER_CPU_BIT      = (1 << 3),
1408 };
1409 #define V3DV_BARRIER_ALL (V3DV_BARRIER_GRAPHICS_BIT | \
1410                           V3DV_BARRIER_TRANSFER_BIT | \
1411                           V3DV_BARRIER_COMPUTE_BIT | \
1412                           V3DV_BARRIER_CPU_BIT);
1413 
1414 struct v3dv_barrier_state {
1415    /* Mask of V3DV_BARRIER_* indicating where we consume a barrier. */
1416    uint8_t dst_mask;
1417 
1418    /* For each possible consumer of a barrier, a mask of V3DV_BARRIER_*
1419     * indicating the sources of the dependency.
1420     */
1421    uint8_t src_mask_graphics;
1422    uint8_t src_mask_transfer;
1423    uint8_t src_mask_compute;
1424 
1425    /* For graphics barriers, access masks involved. Used to decide if we need
1426     * to execute a binning or render barrier.
1427     */
1428    VkAccessFlags2 bcl_buffer_access;
1429    VkAccessFlags2 bcl_image_access;
1430 };
1431 
1432 struct v3dv_cmd_buffer_state {
1433    struct v3dv_render_pass *pass;
1434    struct v3dv_framebuffer *framebuffer;
1435 
1436    /* VK_KHR_dynamic_rendering */
1437    struct v3dv_render_pass dynamic_pass;
1438    struct v3dv_subpass dynamic_subpass;
1439    struct v3dv_render_pass_attachment dynamic_attachments[18 /* (8 color + D/S) x 2 (for resolves) */];
1440    struct v3dv_subpass_attachment dynamic_subpass_attachments[18];
1441    struct v3dv_framebuffer *dynamic_framebuffer;
1442 
1443    VkRect2D render_area;
1444 
1445    /* Current job being recorded */
1446    struct v3dv_job *job;
1447 
1448    uint32_t subpass_idx;
1449 
1450    struct v3dv_cmd_pipeline_state gfx;
1451    struct v3dv_cmd_pipeline_state compute;
1452 
1453    /* For most state tracking we rely on vk_dynamic_graphics_state, but we
1454     * maintain a custom structure for some state-related data that we want to
1455     * cache.
1456     */
1457    struct v3dv_dynamic_state dynamic;
1458 
1459    /* This dirty is for v3dv_cmd_dirty_bits (FIXME: perhaps we should be more
1460     * explicit about it). For dirty flags coming from Vulkan dynamic state,
1461     * use the vk_dynamic_graphics_state handled by the vk_cmd_buffer
1462     */
1463    uint32_t dirty;
1464    VkShaderStageFlagBits dirty_descriptor_stages;
1465    VkShaderStageFlagBits dirty_push_constants_stages;
1466 
1467    /* Current clip window. We use this to check whether we have an active
1468     * scissor, since in that case we can't use TLB clears and need to fallback
1469     * to drawing rects.
1470     */
1471    VkRect2D clip_window;
1472 
1473    /* Whether our render area is aligned to tile boundaries. If this is false
1474     * then we have tiles that are only partially covered by the render area,
1475     * and therefore, we need to be careful with our loads and stores so we don't
1476     * modify pixels for the tile area that is not covered by the render area.
1477     * This means, for example, that we can't use the TLB to clear, since that
1478     * always clears full tiles.
1479     */
1480    bool tile_aligned_render_area;
1481 
1482    /* FIXME: we have just one client-side BO for the push constants,
1483     * independently of the stageFlags in vkCmdPushConstants, and the
1484     * pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage
1485     * tuning in the future if it makes sense.
1486     */
1487    uint32_t push_constants_size;
1488    uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
1489 
1490    uint32_t attachment_alloc_count;
1491    struct v3dv_cmd_buffer_attachment_state *attachments;
1492 
1493    struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
1494 
1495    struct {
1496       VkBuffer buffer;
1497       VkDeviceSize offset;
1498       VkDeviceSize size;
1499       uint8_t index_size;
1500    } index_buffer;
1501 
1502    /* Current uniforms */
1503    struct {
1504       struct v3dv_cl_reloc vs_bin;
1505       struct v3dv_cl_reloc vs;
1506       struct v3dv_cl_reloc gs_bin;
1507       struct v3dv_cl_reloc gs;
1508       struct v3dv_cl_reloc fs;
1509    } uniforms;
1510 
1511    /* Current view index for multiview rendering */
1512    uint32_t view_index;
1513 
1514    /* Current draw ID for multidraw */
1515    uint32_t draw_id;
1516 
1517    /* Used to flag OOM conditions during command buffer recording */
1518    bool oom;
1519 
1520    /* If we are currently recording job(s) for a transfer operation */
1521    bool is_transfer;
1522 
1523    /* VK_KHR_dynamic_rendering */
1524    bool suspending;
1525    bool resuming;
1526 
1527    /* Barrier state tracking */
1528    struct v3dv_barrier_state barrier;
1529 
1530    /* Secondary command buffer state */
1531    struct {
1532       bool occlusion_query_enable;
1533    } inheritance;
1534 
1535    /* Command buffer state saved during a meta operation */
1536    struct {
1537       uint32_t subpass_idx;
1538       VkRenderPass pass;
1539       VkFramebuffer framebuffer;
1540 
1541       uint32_t attachment_alloc_count;
1542       uint32_t attachment_count;
1543       struct v3dv_cmd_buffer_attachment_state *attachments;
1544 
1545       bool tile_aligned_render_area;
1546       VkRect2D render_area;
1547 
1548       struct vk_dynamic_graphics_state dynamic_graphics_state;
1549       struct v3dv_dynamic_state dynamic;
1550 
1551       struct v3dv_cmd_pipeline_state gfx;
1552       bool has_descriptor_state;
1553 
1554       uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
1555       uint32_t push_constants_size;
1556    } meta;
1557 
1558    /* Command buffer state for queries */
1559    struct {
1560       /* A list of vkCmdQueryEnd commands recorded in the command buffer during
1561        * a render pass. We queue these here and then schedule the corresponding
1562        * CPU jobs for them at the time we finish the GPU job in which they have
1563        * been recorded.
1564        */
1565       struct {
1566          uint32_t used_count;
1567          uint32_t alloc_count;
1568          struct v3dv_end_query_info *states;
1569       } end;
1570 
1571       struct {
1572          /* This BO is not NULL if we have an active occlusion query, that is,
1573           * we have called vkCmdBeginQuery but not vkCmdEndQuery.
1574           */
1575          struct v3dv_bo *bo;
1576          uint32_t offset;
1577          /* When the driver emits draw calls to implement other operations in
1578           * the middle of a render pass (such as an attachment clear), we need
1579           * to pause occlusion query recording and resume it later so that
1580           * these draw calls don't register in occlussion counters. We use
1581           * this to store the BO reference in which we should resume occlusion
1582           * query counters after the driver is done emitting its draw calls.
1583            */
1584          struct v3dv_bo *paused_bo;
1585 
1586          /* This pointer is not NULL if we have an active performance query */
1587          struct v3dv_perf_query *perf;
1588       } active_query;
1589    } query;
1590 
1591    /* This is dynamic state since VK_EXT_extended_dynamic_state. */
1592    bool z_updates_enable;
1593 
1594    /* ez_state can be dynamic since VK_EXT_extended_dynamic_state so we need
1595     * to keep track of it in the cmd_buffer state
1596     */
1597    enum v3dv_ez_state ez_state;
1598 
1599    /* incompatible_ez_test can be dynamic since VK_EXT_extended_dynamic_state
1600     * so we need to keep track of it in the cmd_buffer state
1601     */
1602    bool incompatible_ez_test;
1603 };
1604 
1605 void
1606 v3dv_cmd_buffer_state_get_viewport_z_xform(struct v3dv_cmd_buffer *cmd_buffer,
1607                                            uint32_t vp_idx,
1608                                            float *translate_z, float *scale_z);
1609 
1610 /* The following struct represents the info from a descriptor that we store on
1611  * the host memory. They are mostly links to other existing vulkan objects,
1612  * like the image_view in order to access to swizzle info, or the buffer used
1613  * for a UBO/SSBO, for example.
1614  *
1615  * FIXME: revisit if makes sense to just move everything that would be needed
1616  * from a descriptor to the bo.
1617  */
1618 struct v3dv_descriptor {
1619    VkDescriptorType type;
1620 
1621    union {
1622       struct {
1623          struct v3dv_image_view *image_view;
1624          struct v3dv_sampler *sampler;
1625       };
1626 
1627       struct {
1628          struct v3dv_buffer *buffer;
1629          size_t offset;
1630          size_t range;
1631       };
1632 
1633       struct v3dv_buffer_view *buffer_view;
1634    };
1635 };
1636 
1637 struct v3dv_query {
1638    /* Used by queries where we implement result copying in the CPU so we can
1639     * tell if the relevant jobs have been submitted for execution. Currently
1640     * these are all but occlusion queries.
1641     */
1642    bool maybe_available;
1643 
1644    union {
1645       /* Used by occlusion queries */
1646       struct {
1647          /* Offset of this query in the occlusion query counter BO */
1648          uint32_t offset;
1649       } occlusion;
1650 
1651       /* Used by timestamp queries */
1652       struct {
1653          /* Offset of this query in the timestamp BO for its value */
1654          uint32_t offset;
1655 
1656          /* Syncobj to signal timestamp query availability */
1657          struct vk_sync *sync;
1658       } timestamp;
1659 
1660       /* Used by performance queries */
1661       struct v3dv_perf_query perf;
1662    };
1663 };
1664 
1665 struct v3dv_query_pool {
1666    struct vk_object_base base;
1667 
1668    /* Per-pool Vulkan resources required to implement GPU-side query
1669     * functions (only occlusion queries for now).
1670     */
1671    struct {
1672       /* Buffer to access the BO with the occlusion query results and
1673        * availability info.
1674        */
1675       VkBuffer buf;
1676       VkDeviceMemory mem;
1677 
1678       /* Descriptor set for accessing the buffer from a pipeline. */
1679       VkDescriptorPool descriptor_pool;
1680       VkDescriptorSet descriptor_set;
1681    } meta;
1682 
1683    /* Only used with occlusion queries */
1684    struct {
1685       /* BO with the occlusion counters and query availability */
1686       struct v3dv_bo *bo;
1687       /* Offset of the availability info in the BO */
1688       uint32_t avail_offset;
1689    } occlusion;
1690 
1691    /* Only used with timestamp queries */
1692    struct {
1693       /* BO with the query timestamp values */
1694       struct v3dv_bo *bo;
1695    } timestamp;
1696 
1697    /* Only used with performance queries */
1698    struct {
1699       uint32_t ncounters;
1700       uint8_t counters[V3D_MAX_PERFCNT];
1701 
1702       /* V3D has a limit on the number of counters we can track in a
1703        * single performance monitor, so if too many counters are requested
1704        * we need to create multiple monitors to record all of them. This
1705        * field represents the number of monitors required for the number
1706        * of counters requested.
1707        */
1708       uint8_t nperfmons;
1709    } perfmon;
1710 
1711    VkQueryType query_type;
1712    uint32_t query_count;
1713    struct v3dv_query *queries;
1714 };
1715 
1716 VkResult
1717 v3dv_query_allocate_resources(struct v3dv_device *decice);
1718 
1719 void
1720 v3dv_query_free_resources(struct v3dv_device *decice);
1721 
1722 VkResult v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
1723                                          struct v3dv_query_pool *pool,
1724                                          uint32_t first,
1725                                          uint32_t count,
1726                                          void *data,
1727                                          VkDeviceSize stride,
1728                                          VkQueryResultFlags flags);
1729 
1730 void v3dv_reset_query_pool_cpu(struct v3dv_device *device,
1731                                struct v3dv_query_pool *query_pool,
1732                                uint32_t first,
1733                                uint32_t last);
1734 
1735 void v3dv_cmd_buffer_emit_set_query_availability(struct v3dv_cmd_buffer *cmd_buffer,
1736                                                  struct v3dv_query_pool *pool,
1737                                                  uint32_t query, uint32_t count,
1738                                                  uint8_t availability);
1739 
1740 typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,
1741                                                        uint64_t pobj,
1742                                                        VkAllocationCallbacks *alloc);
1743 struct v3dv_cmd_buffer_private_obj {
1744    struct list_head list_link;
1745    uint64_t obj;
1746    v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;
1747 };
1748 
1749 extern const struct vk_command_buffer_ops v3dv_cmd_buffer_ops;
1750 
1751 struct v3dv_cmd_buffer {
1752    struct vk_command_buffer vk;
1753 
1754    struct v3dv_device *device;
1755 
1756    VkCommandBufferUsageFlags usage_flags;
1757 
1758    enum v3dv_cmd_buffer_status status;
1759 
1760    struct v3dv_cmd_buffer_state state;
1761 
1762    /* Buffer where we upload push constant data to resolve indirect indexing */
1763    struct v3dv_cl_reloc push_constants_resource;
1764 
1765    /* Collection of Vulkan objects created internally by the driver (typically
1766     * during recording of meta operations) that are part of the command buffer
1767     * and should be destroyed with it.
1768     */
1769    struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */
1770 
1771    /* Per-command buffer resources for meta operations. */
1772    struct {
1773       struct {
1774          /* The current descriptor pool for blit sources */
1775          VkDescriptorPool dspool;
1776       } blit;
1777       struct {
1778          /* The current descriptor pool for texel buffer copy sources */
1779          VkDescriptorPool dspool;
1780       } texel_buffer_copy;
1781       struct {
1782          /* The current descriptor pool for the copy query results output buffer */
1783          VkDescriptorPool dspool;
1784       } query;
1785    } meta;
1786 
1787    /* List of jobs in the command buffer. For primary command buffers it
1788     * represents the jobs we want to submit to the GPU. For secondary command
1789     * buffers it represents jobs that will be merged into a primary command
1790     * buffer via vkCmdExecuteCommands.
1791     */
1792    struct list_head jobs;
1793 };
1794 
1795 struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
1796                                            int32_t subpass_idx,
1797                                            enum v3dv_job_type type);
1798 void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
1799 
1800 struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
1801                                                uint32_t subpass_idx);
1802 struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
1803                                                 uint32_t subpass_idx);
1804 
1805 void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
1806 
1807 void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
1808                                      bool push_descriptor_state);
1809 void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
1810                                     bool needs_subpass_resume);
1811 
1812 void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
1813                                  struct v3dv_query_pool *pool,
1814                                  uint32_t query,
1815                                  VkQueryControlFlags flags);
1816 
1817 void v3dv_cmd_buffer_pause_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer);
1818 void v3dv_cmd_buffer_resume_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer);
1819 
1820 void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
1821                                struct v3dv_query_pool *pool,
1822                                uint32_t query);
1823 
1824 void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
1825                                         struct v3dv_query_pool *pool,
1826                                         uint32_t first,
1827                                         uint32_t count,
1828                                         struct v3dv_buffer *dst,
1829                                         uint32_t offset,
1830                                         uint32_t stride,
1831                                         VkQueryResultFlags flags);
1832 
1833 void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
1834                                  struct drm_v3d_submit_tfu *tfu);
1835 
1836 void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_device *device,
1837                                               struct v3dv_csd_indirect_cpu_job_info *info,
1838                                               const uint32_t *wg_counts);
1839 
1840 void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
1841                                      uint64_t obj,
1842                                      v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
1843 
1844 void v3dv_cmd_buffer_merge_barrier_state(struct v3dv_barrier_state *dst,
1845                                          struct v3dv_barrier_state *src);
1846 
1847 void v3dv_cmd_buffer_consume_bcl_sync(struct v3dv_cmd_buffer *cmd_buffer,
1848                                       struct v3dv_job *job);
1849 
1850 bool v3dv_cmd_buffer_check_needs_load(const struct v3dv_cmd_buffer_state *state,
1851                                       VkImageAspectFlags aspect,
1852                                       uint32_t first_subpass_idx,
1853                                       VkAttachmentLoadOp load_op,
1854                                       uint32_t last_subpass_idx,
1855                                       VkAttachmentStoreOp store_op);
1856 
1857 bool v3dv_cmd_buffer_check_needs_store(const struct v3dv_cmd_buffer_state *state,
1858                                        VkImageAspectFlags aspect,
1859                                        uint32_t last_subpass_idx,
1860                                        VkAttachmentStoreOp store_op);
1861 
1862 void v3dv_cmd_buffer_emit_pipeline_barrier(struct v3dv_cmd_buffer *cmd_buffer,
1863                                            const VkDependencyInfo *info);
1864 
1865 bool v3dv_cmd_buffer_copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1866                                     struct v3dv_image *dst,
1867                                     struct v3dv_image *src,
1868                                     const VkImageCopy2 *region);
1869 
1870 struct v3dv_event {
1871    struct vk_object_base base;
1872 
1873    /* Link in the device list of pre-allocated free events */
1874    struct list_head link;
1875 
1876    /* Each event gets a different index, which we use to compute the offset
1877     * in the BO we use to track their state (signaled vs reset).
1878     */
1879    uint32_t index;
1880 };
1881 
1882 VkResult
1883 v3dv_event_allocate_resources(struct v3dv_device *device);
1884 
1885 void
1886 v3dv_event_free_resources(struct v3dv_device *device);
1887 
1888 struct v3dv_shader_variant {
1889    enum broadcom_shader_stage stage;
1890 
1891    union {
1892       struct v3d_prog_data *base;
1893       struct v3d_vs_prog_data *vs;
1894       struct v3d_gs_prog_data *gs;
1895       struct v3d_fs_prog_data *fs;
1896       struct v3d_compute_prog_data *cs;
1897    } prog_data;
1898 
1899    /* We explicitly save the prog_data_size as it would make easier to
1900     * serialize
1901     */
1902    uint32_t prog_data_size;
1903 
1904    /* The assembly for this variant will be uploaded to a BO shared with all
1905     * other shader stages in that pipeline. This is the offset in that BO.
1906     */
1907    uint32_t assembly_offset;
1908 
1909    /* Note: don't assume qpu_insts to be always NULL or not-NULL. In general
1910     * we will try to free it as soon as we upload it to the shared bo while we
1911     * compile the different stages. But we can decide to keep it around based
1912     * on some pipeline creation flags, like
1913     * VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT.
1914     */
1915    uint64_t *qpu_insts;
1916    uint32_t qpu_insts_size;
1917 };
1918 
1919 /*
1920  * Per-stage info for each stage, useful so shader_module_compile_to_nir and
1921  * other methods doesn't have so many parameters.
1922  *
1923  * FIXME: for the case of the coordinate shader and the vertex shader, module,
1924  * entrypoint, spec_info and nir are the same. There are also info only
1925  * relevant to some stages. But seemed too much a hassle to create a new
1926  * struct only to handle that. Revisit if such kind of info starts to grow.
1927  */
1928 struct v3dv_pipeline_stage {
1929    struct v3dv_pipeline *pipeline;
1930 
1931    enum broadcom_shader_stage stage;
1932 
1933    const struct vk_shader_module *module;
1934    const char *entrypoint;
1935    const VkSpecializationInfo *spec_info;
1936    const VkShaderModuleCreateInfo *module_info;
1937 
1938    nir_shader *nir;
1939 
1940    /* The following is the combined hash of module+entrypoint+spec_info+nir */
1941    unsigned char shader_sha1[20];
1942 
1943    /** A name for this program, so you can track it in shader-db output. */
1944    uint32_t program_id;
1945 
1946    VkPipelineCreationFeedback feedback;
1947 
1948    struct vk_pipeline_robustness_state robustness;
1949 };
1950 
1951 /* We are using the descriptor pool entry for two things:
1952  * * Track the allocated sets, so we can properly free it if needed
1953  * * Track the suballocated pool bo regions, so if some descriptor set is
1954  *   freed, the gap could be reallocated later.
1955  *
1956  * Those only make sense if the pool was not created with the flag
1957  * VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
1958  */
1959 struct v3dv_descriptor_pool_entry
1960 {
1961    struct v3dv_descriptor_set *set;
1962    /* Offset and size of the subregion allocated for this entry from the
1963     * pool->bo
1964     */
1965    uint32_t offset;
1966    uint32_t size;
1967 };
1968 
1969 struct v3dv_descriptor_pool {
1970    struct vk_object_base base;
1971 
1972    /* A list with all descriptor sets allocated from the pool. */
1973    struct list_head set_list;
1974 
1975    /* If this descriptor pool has been allocated for the driver for internal
1976     * use, typically to implement meta operations.
1977     */
1978    bool is_driver_internal;
1979 
1980    struct v3dv_bo *bo;
1981    /* Current offset at the descriptor bo. 0 means that we didn't use it for
1982     * any descriptor. If the descriptor bo is NULL, current offset is
1983     * meaningless
1984     */
1985    uint32_t current_offset;
1986 
1987    /* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the
1988     * descriptor sets are handled as a whole as pool memory and handled by the
1989     * following pointers. If set, they are not used, and individually
1990     * descriptor sets are allocated/freed.
1991     */
1992    uint8_t *host_memory_base;
1993    uint8_t *host_memory_ptr;
1994    uint8_t *host_memory_end;
1995 
1996    uint32_t entry_count;
1997    uint32_t max_entry_count;
1998    struct v3dv_descriptor_pool_entry entries[0];
1999 };
2000 
2001 struct v3dv_descriptor_set {
2002    struct vk_object_base base;
2003 
2004    /* List link into the list of all sets allocated from the pool */
2005    struct list_head pool_link;
2006 
2007    struct v3dv_descriptor_pool *pool;
2008 
2009    struct v3dv_descriptor_set_layout *layout;
2010 
2011    /* Offset relative to the descriptor pool bo for this set */
2012    uint32_t base_offset;
2013 
2014    /* The descriptors below can be indexed (set/binding) using the set_layout
2015     */
2016    struct v3dv_descriptor descriptors[0];
2017 };
2018 
2019 struct v3dv_descriptor_set_binding_layout {
2020    VkDescriptorType type;
2021 
2022    /* Number of array elements in this binding */
2023    uint32_t array_size;
2024 
2025    /* Index into the flattened descriptor set */
2026    uint32_t descriptor_index;
2027 
2028    uint32_t dynamic_offset_count;
2029    uint32_t dynamic_offset_index;
2030 
2031    /* Offset into the descriptor set where this descriptor lives (final offset
2032     * on the descriptor bo need to take into account set->base_offset)
2033     */
2034    uint32_t descriptor_offset;
2035 
2036    /* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0
2037     * if there are no immutable samplers.
2038     */
2039    uint32_t immutable_samplers_offset;
2040 
2041    /* Descriptors for multiplanar combined image samplers are larger.
2042     * For mutable descriptors, this is always 1.
2043     */
2044    uint8_t plane_stride;
2045 };
2046 
2047 struct v3dv_descriptor_set_layout {
2048    struct vk_object_base base;
2049 
2050    VkDescriptorSetLayoutCreateFlags flags;
2051 
2052    /* Number of bindings in this descriptor set */
2053    uint32_t binding_count;
2054 
2055    /* Total bo size needed for this descriptor set
2056     */
2057    uint32_t bo_size;
2058 
2059    /* Shader stages affected by this descriptor set */
2060    uint16_t shader_stages;
2061 
2062    /* Number of dynamic offsets used by this descriptor set */
2063    uint16_t dynamic_offset_count;
2064 
2065    /* Number of descriptors in this descriptor set */
2066    uint32_t descriptor_count;
2067 
2068    /* Descriptor set layouts can be destroyed even if they are still being
2069     * used.
2070     */
2071    uint32_t ref_cnt;
2072 
2073    /* Bindings in this descriptor set */
2074    struct v3dv_descriptor_set_binding_layout binding[0];
2075 };
2076 
2077 void
2078 v3dv_descriptor_set_layout_destroy(struct v3dv_device *device,
2079                                    struct v3dv_descriptor_set_layout *set_layout);
2080 
2081 static inline void
v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout * set_layout)2082 v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout *set_layout)
2083 {
2084    assert(set_layout && set_layout->ref_cnt >= 1);
2085    p_atomic_inc(&set_layout->ref_cnt);
2086 }
2087 
2088 static inline void
v3dv_descriptor_set_layout_unref(struct v3dv_device * device,struct v3dv_descriptor_set_layout * set_layout)2089 v3dv_descriptor_set_layout_unref(struct v3dv_device *device,
2090                                  struct v3dv_descriptor_set_layout *set_layout)
2091 {
2092    assert(set_layout && set_layout->ref_cnt >= 1);
2093    if (p_atomic_dec_zero(&set_layout->ref_cnt))
2094       v3dv_descriptor_set_layout_destroy(device, set_layout);
2095 }
2096 
2097 struct v3dv_pipeline_layout {
2098    struct vk_object_base base;
2099 
2100    struct {
2101       struct v3dv_descriptor_set_layout *layout;
2102       uint32_t dynamic_offset_start;
2103    } set[MAX_SETS];
2104 
2105    uint32_t num_sets;
2106 
2107    /* Shader stages that are declared to use descriptors from this layout */
2108    uint32_t shader_stages;
2109 
2110    uint32_t dynamic_offset_count;
2111    uint32_t push_constant_size;
2112 
2113    /* Pipeline layouts can be destroyed after creating pipelines since
2114     * maintenance4.
2115     */
2116    uint32_t ref_cnt;
2117 
2118    unsigned char sha1[20];
2119 };
2120 
2121 void
2122 v3dv_pipeline_layout_destroy(struct v3dv_device *device,
2123                              struct v3dv_pipeline_layout *layout,
2124                              const VkAllocationCallbacks *alloc);
2125 
2126 static inline void
v3dv_pipeline_layout_ref(struct v3dv_pipeline_layout * layout)2127 v3dv_pipeline_layout_ref(struct v3dv_pipeline_layout *layout)
2128 {
2129    assert(layout && layout->ref_cnt >= 1);
2130    p_atomic_inc(&layout->ref_cnt);
2131 }
2132 
2133 static inline void
v3dv_pipeline_layout_unref(struct v3dv_device * device,struct v3dv_pipeline_layout * layout,const VkAllocationCallbacks * alloc)2134 v3dv_pipeline_layout_unref(struct v3dv_device *device,
2135                            struct v3dv_pipeline_layout *layout,
2136                            const VkAllocationCallbacks *alloc)
2137 {
2138    assert(layout && layout->ref_cnt >= 1);
2139    if (p_atomic_dec_zero(&layout->ref_cnt))
2140       v3dv_pipeline_layout_destroy(device, layout, alloc);
2141 }
2142 
2143 /*
2144  * We are using descriptor maps for ubo/ssbo and texture/samplers, so we need
2145  * it to be big enough to include the max value for all of them.
2146  *
2147  * FIXME: one alternative would be to allocate the map as big as you need for
2148  * each descriptor type. That would means more individual allocations.
2149  */
2150 #define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS,                         \
2151                                  MAX_UNIFORM_BUFFERS + MAX_INLINE_UNIFORM_BUFFERS, \
2152                                  MAX_STORAGE_BUFFERS)
2153 
2154 
2155 struct v3dv_descriptor_map {
2156    /* FIXME: avoid fixed size array/justify the size */
2157    unsigned num_desc; /* Number of descriptors  */
2158    int set[DESCRIPTOR_MAP_SIZE];
2159    int binding[DESCRIPTOR_MAP_SIZE];
2160    int array_index[DESCRIPTOR_MAP_SIZE];
2161    int array_size[DESCRIPTOR_MAP_SIZE];
2162    uint8_t plane[DESCRIPTOR_MAP_SIZE];
2163    bool used[DESCRIPTOR_MAP_SIZE];
2164 
2165    /* NOTE: the following is only for sampler, but this is the easier place to
2166     * put it.
2167     */
2168    uint8_t return_size[DESCRIPTOR_MAP_SIZE];
2169 };
2170 
2171 struct v3dv_sampler {
2172    struct vk_object_base base;
2173    struct vk_ycbcr_conversion *conversion;
2174 
2175    bool compare_enable;
2176    bool unnormalized_coordinates;
2177 
2178    /* Prepacked per plane SAMPLER_STATE, that is referenced as part of the tmu
2179     * configuration. If needed it will be copied to the descriptor info during
2180     * UpdateDescriptorSets
2181     */
2182    uint8_t plane_count;
2183    uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH];
2184 };
2185 
2186 /* We keep two special values for the sampler idx that represents exactly when a
2187  * sampler is not needed/provided. The main use is that even if we don't have
2188  * sampler, we still need to do the output unpacking (through
2189  * nir_lower_tex). The easier way to do this is to add those special "no
2190  * sampler" in the sampler_map, and then use the proper unpacking for that
2191  * case.
2192  *
2193  * We have one when we want a 16bit output size, and other when we want a
2194  * 32bit output size. We use the info coming from the RelaxedPrecision
2195  * decoration to decide between one and the other.
2196  */
2197 #define V3DV_NO_SAMPLER_16BIT_IDX 0
2198 #define V3DV_NO_SAMPLER_32BIT_IDX 1
2199 
2200 struct v3dv_descriptor_maps {
2201    struct v3dv_descriptor_map ubo_map;
2202    struct v3dv_descriptor_map ssbo_map;
2203    struct v3dv_descriptor_map sampler_map;
2204    struct v3dv_descriptor_map texture_map;
2205 };
2206 
2207 /* The structure represents data shared between different objects, like the
2208  * pipeline and the pipeline cache, so we ref count it to know when it should
2209  * be freed.
2210  */
2211 struct v3dv_pipeline_shared_data {
2212    uint32_t ref_cnt;
2213 
2214    unsigned char sha1_key[20];
2215 
2216    struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES];
2217    struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES];
2218 
2219    struct v3dv_bo *assembly_bo;
2220 };
2221 
2222 struct v3dv_pipeline_executable_data {
2223    enum broadcom_shader_stage stage;
2224    char *nir_str;
2225    char *qpu_str;
2226 };
2227 
2228 struct v3dv_pipeline {
2229    struct vk_object_base base;
2230 
2231    struct v3dv_device *device;
2232 
2233    VkShaderStageFlags active_stages;
2234    VkPipelineCreateFlagBits2KHR flags;
2235 
2236    struct v3dv_render_pass *pass;
2237    struct v3dv_subpass *subpass;
2238 
2239    struct v3dv_pipeline_stage *stages[BROADCOM_SHADER_STAGES];
2240 
2241    /* For VK_KHR_dynamic_rendering */
2242    struct vk_render_pass_state rendering_info;
2243 
2244    /* Flags for whether optional pipeline stages are present, for convenience */
2245    bool has_gs;
2246 
2247    /* Whether any stage in this pipeline uses VK_KHR_buffer_device_address */
2248    bool uses_buffer_device_address;
2249 
2250    /* Spilling memory requirements */
2251    struct {
2252       struct v3dv_bo *bo;
2253       uint32_t size_per_thread;
2254    } spill;
2255 
2256    struct vk_dynamic_graphics_state dynamic_graphics_state;
2257    struct v3dv_dynamic_state dynamic;
2258 
2259    struct v3dv_pipeline_layout *layout;
2260 
2261    enum v3dv_ez_state ez_state;
2262 
2263    /* If ez_state is V3D_EZ_DISABLED, if the reason for disabling is that the
2264     * pipeline selects an incompatible depth test function.
2265     */
2266    bool incompatible_ez_test;
2267 
2268    bool rasterization_enabled;
2269    bool msaa;
2270    bool sample_rate_shading;
2271    uint32_t sample_mask;
2272 
2273    bool negative_one_to_one;
2274 
2275    /* Indexed by vertex binding. */
2276    struct v3dv_pipeline_vertex_binding {
2277       uint32_t instance_divisor;
2278    } vb[MAX_VBS];
2279    uint32_t vb_count;
2280 
2281    /* Note that a lot of info from VkVertexInputAttributeDescription is
2282     * already prepacked, so here we are only storing those that need recheck
2283     * later. The array must be indexed by driver location, since that is the
2284     * order in which we need to emit the attributes.
2285     */
2286    struct v3dv_pipeline_vertex_attrib {
2287       uint32_t binding;
2288       uint32_t offset;
2289       VkFormat vk_format;
2290    } va[MAX_VERTEX_ATTRIBS];
2291    uint32_t va_count;
2292 
2293    enum mesa_prim topology;
2294 
2295    bool line_smooth;
2296 
2297    struct v3dv_pipeline_shared_data *shared_data;
2298 
2299    /* It is the combined stages sha1, layout sha1, plus the pipeline key sha1. */
2300    unsigned char sha1[20];
2301 
2302    /* In general we can reuse v3dv_device->default_attribute_float, so note
2303     * that the following can be NULL. In 7.x this is not used, so it will be
2304     * always NULL.
2305     *
2306     * FIXME: the content of this BO will be small, so it could be improved to
2307     * be uploaded to a common BO. But as in most cases it will be NULL, it is
2308     * not a priority.
2309     */
2310    struct v3dv_bo *default_attribute_values;
2311 
2312    struct vpm_config vpm_cfg;
2313    struct vpm_config vpm_cfg_bin;
2314 
2315    /* If the pipeline should emit any of the stencil configuration packets */
2316    bool emit_stencil_cfg[2];
2317 
2318    /* Blend state */
2319    struct {
2320       /* Per-RT bit mask with blend enables */
2321       uint8_t enables;
2322       /* Per-RT prepacked blend config packets */
2323       uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH];
2324       /* Flag indicating whether the blend factors in use require
2325        * color constants.
2326        */
2327       bool needs_color_constants;
2328       /* Mask with enabled color channels for each RT (4 bits per RT) */
2329       uint32_t color_write_masks;
2330    } blend;
2331 
2332    struct {
2333       void *mem_ctx;
2334       struct util_dynarray data; /* Array of v3dv_pipeline_executable_data */
2335    } executables;
2336 
2337    /* Packets prepacked during pipeline creation
2338     */
2339    uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH];
2340    uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH];
2341    uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH];
2342    uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH *
2343                         MAX_VERTEX_ATTRIBS];
2344    uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];
2345 };
2346 
2347 static inline bool
v3dv_texture_shader_state_has_rb_swap_reverse_bits(const struct v3dv_device * device)2348 v3dv_texture_shader_state_has_rb_swap_reverse_bits(const struct v3dv_device *device)
2349 {
2350    return device->devinfo.ver > 71 ||
2351           (device->devinfo.ver == 71 && device->devinfo.rev >= 5);
2352 }
2353 
2354 static inline VkPipelineBindPoint
v3dv_pipeline_get_binding_point(struct v3dv_pipeline * pipeline)2355 v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
2356 {
2357    assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||
2358           !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
2359    return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?
2360       VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
2361 }
2362 
2363 static inline struct v3dv_descriptor_state*
v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline)2364 v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer,
2365                                      struct v3dv_pipeline *pipeline)
2366 {
2367    if (v3dv_pipeline_get_binding_point(pipeline) == VK_PIPELINE_BIND_POINT_COMPUTE)
2368       return &cmd_buffer->state.compute.descriptor_state;
2369    else
2370       return &cmd_buffer->state.gfx.descriptor_state;
2371 }
2372 
2373 const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(const struct v3d_device_info *devinfo);
2374 
2375 uint32_t v3dv_physical_device_vendor_id(const struct v3dv_physical_device *dev);
2376 uint32_t v3dv_physical_device_device_id(const struct v3dv_physical_device *dev);
2377 
2378 const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f,
2379                                        uint8_t plane);
2380 const struct v3dv_format *
2381 v3dv_get_compatible_tfu_format(struct v3dv_device *device,
2382                                uint32_t bpp, VkFormat *out_vk_format);
2383 bool v3dv_buffer_format_supports_features(struct v3dv_device *device,
2384                                           VkFormat vk_format,
2385                                           VkFormatFeatureFlags2 features);
2386 
2387 struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
2388                                          struct v3dv_pipeline *pipeline,
2389                                          struct v3dv_shader_variant *variant);
2390 
2391 struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
2392                                                     struct v3dv_pipeline *pipeline,
2393                                                     struct v3dv_shader_variant *variant,
2394                                                     uint32_t **wg_count_offsets);
2395 
2396 struct v3dv_shader_variant *
2397 v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
2398                         struct v3dv_pipeline_cache *cache,
2399                         struct v3d_key *key,
2400                         size_t key_size,
2401                         const VkAllocationCallbacks *pAllocator,
2402                         VkResult *out_vk_result);
2403 
2404 struct v3dv_shader_variant *
2405 v3dv_shader_variant_create(struct v3dv_device *device,
2406                            enum broadcom_shader_stage stage,
2407                            struct v3d_prog_data *prog_data,
2408                            uint32_t prog_data_size,
2409                            uint32_t assembly_offset,
2410                            uint64_t *qpu_insts,
2411                            uint32_t qpu_insts_size,
2412                            VkResult *out_vk_result);
2413 
2414 void
2415 v3dv_shader_variant_destroy(struct v3dv_device *device,
2416                             struct v3dv_shader_variant *variant);
2417 
2418 static inline void
v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data * shared_data)2419 v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data)
2420 {
2421    assert(shared_data && shared_data->ref_cnt >= 1);
2422    p_atomic_inc(&shared_data->ref_cnt);
2423 }
2424 
2425 void
2426 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
2427                                   struct v3dv_pipeline_shared_data *shared_data);
2428 
2429 static inline void
v3dv_pipeline_shared_data_unref(struct v3dv_device * device,struct v3dv_pipeline_shared_data * shared_data)2430 v3dv_pipeline_shared_data_unref(struct v3dv_device *device,
2431                                 struct v3dv_pipeline_shared_data *shared_data)
2432 {
2433    assert(shared_data && shared_data->ref_cnt >= 1);
2434    if (p_atomic_dec_zero(&shared_data->ref_cnt))
2435       v3dv_pipeline_shared_data_destroy(device, shared_data);
2436 }
2437 
2438 struct v3dv_descriptor *
2439 v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
2440                                    struct v3dv_descriptor_map *map,
2441                                    struct v3dv_pipeline_layout *pipeline_layout,
2442                                    uint32_t index,
2443                                    uint32_t *dynamic_offset);
2444 
2445 struct v3dv_cl_reloc
2446 v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device,
2447                                       struct v3dv_descriptor_state *descriptor_state,
2448                                       struct v3dv_descriptor_map *map,
2449                                       struct v3dv_pipeline_layout *pipeline_layout,
2450                                       uint32_t index,
2451                                       VkDescriptorType *out_type);
2452 
2453 const struct v3dv_sampler *
2454 v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
2455                                 struct v3dv_descriptor_map *map,
2456                                 struct v3dv_pipeline_layout *pipeline_layout,
2457                                 uint32_t index);
2458 
2459 struct v3dv_cl_reloc
2460 v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,
2461                                       struct v3dv_descriptor_state *descriptor_state,
2462                                       struct v3dv_descriptor_map *map,
2463                                       struct v3dv_pipeline_layout *pipeline_layout,
2464                                       uint32_t index);
2465 
2466 struct v3dv_cl_reloc
2467 v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,
2468                                              struct v3dv_descriptor_state *descriptor_state,
2469                                              struct v3dv_descriptor_map *map,
2470                                              struct v3dv_pipeline_layout *pipeline_layout,
2471                                              uint32_t index);
2472 
2473 struct v3dv_bo*
2474 v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state,
2475                                    struct v3dv_descriptor_map *map,
2476                                    struct v3dv_pipeline_layout *pipeline_layout,
2477                                    uint32_t index);
2478 
2479 static inline const struct v3dv_sampler *
v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout * set,const struct v3dv_descriptor_set_binding_layout * binding)2480 v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
2481                         const struct v3dv_descriptor_set_binding_layout *binding)
2482 {
2483    assert(binding->immutable_samplers_offset);
2484    return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
2485 }
2486 
2487 void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
2488                               struct v3dv_device *device,
2489                               VkPipelineCacheCreateFlags,
2490                               bool cache_enabled);
2491 
2492 void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);
2493 
2494 void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
2495                                     struct v3dv_pipeline_cache *cache,
2496                                     nir_shader *nir,
2497                                     unsigned char sha1_key[20]);
2498 
2499 nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
2500                                                struct v3dv_pipeline_cache *cache,
2501                                                const nir_shader_compiler_options *nir_options,
2502                                                unsigned char sha1_key[20]);
2503 
2504 struct v3dv_pipeline_shared_data *
2505 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
2506                                         unsigned char sha1_key[20],
2507                                         bool *cache_hit);
2508 
2509 void
2510 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
2511                                     struct v3dv_pipeline_cache *cache);
2512 
2513 VkResult
2514 v3dv_create_compute_pipeline_from_nir(struct v3dv_device *device,
2515                                       nir_shader *nir,
2516                                       VkPipelineLayout pipeline_layout,
2517                                       VkPipeline *pipeline);
2518 
2519 #define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle)			\
2520    VK_FROM_HANDLE(__v3dv_type, __name, __handle)
2521 
2522 VK_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, vk.base, VkCommandBuffer,
2523                        VK_OBJECT_TYPE_COMMAND_BUFFER)
2524 VK_DEFINE_HANDLE_CASTS(v3dv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
2525 VK_DEFINE_HANDLE_CASTS(v3dv_instance, vk.base, VkInstance,
2526                        VK_OBJECT_TYPE_INSTANCE)
2527 VK_DEFINE_HANDLE_CASTS(v3dv_physical_device, vk.base, VkPhysicalDevice,
2528                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
2529 VK_DEFINE_HANDLE_CASTS(v3dv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
2530 
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer,base,VkBuffer,VK_OBJECT_TYPE_BUFFER)2531 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer,
2532                                VK_OBJECT_TYPE_BUFFER)
2533 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView,
2534                                VK_OBJECT_TYPE_BUFFER_VIEW)
2535 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, vk.base, VkDeviceMemory,
2536                                VK_OBJECT_TYPE_DEVICE_MEMORY)
2537 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool,
2538                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
2539 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, base, VkDescriptorSet,
2540                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
2541 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, base,
2542                                VkDescriptorSetLayout,
2543                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
2544 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
2545 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer,
2546                                VK_OBJECT_TYPE_FRAMEBUFFER)
2547 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage,
2548                                VK_OBJECT_TYPE_IMAGE)
2549 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, vk.base, VkImageView,
2550                                VK_OBJECT_TYPE_IMAGE_VIEW)
2551 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, base, VkPipeline,
2552                                VK_OBJECT_TYPE_PIPELINE)
2553 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, base, VkPipelineCache,
2554                                VK_OBJECT_TYPE_PIPELINE_CACHE)
2555 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, base, VkPipelineLayout,
2556                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
2557 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, base, VkQueryPool,
2558                                VK_OBJECT_TYPE_QUERY_POOL)
2559 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass,
2560                                VK_OBJECT_TYPE_RENDER_PASS)
2561 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler,
2562                                VK_OBJECT_TYPE_SAMPLER)
2563 
2564 static inline int
2565 v3dv_ioctl(int fd, unsigned long request, void *arg)
2566 {
2567 #if USE_V3D_SIMULATOR
2568    return v3d_simulator_ioctl(fd, request, arg);
2569 #else
2570    return drmIoctl(fd, request, arg);
2571 #endif
2572 }
2573 
2574 /* Flags OOM conditions in command buffer state.
2575  *
2576  * Note: notice that no-op jobs don't have a command buffer reference.
2577  */
2578 static inline void
v3dv_flag_oom(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_job * job)2579 v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
2580 {
2581    if (cmd_buffer) {
2582       cmd_buffer->state.oom = true;
2583    } else {
2584       assert(job);
2585       if (job->cmd_buffer)
2586          job->cmd_buffer->state.oom = true;
2587    }
2588 }
2589 
2590 #define v3dv_return_if_oom(_cmd_buffer, _job) do {                  \
2591    const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer;        \
2592    if (__cmd_buffer && __cmd_buffer->state.oom)                     \
2593       return;                                                       \
2594    const struct v3dv_job *__job = _job;                             \
2595    if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom)  \
2596       return;                                                       \
2597 } while(0)                                                          \
2598 
2599 static inline uint32_t
u64_hash(const void * key)2600 u64_hash(const void *key)
2601 {
2602    return _mesa_hash_data(key, sizeof(uint64_t));
2603 }
2604 
2605 static inline bool
u64_compare(const void * key1,const void * key2)2606 u64_compare(const void *key1, const void *key2)
2607 {
2608    return memcmp(key1, key2, sizeof(uint64_t)) == 0;
2609 }
2610 
2611 /* Helper to call hw ver specific functions */
2612 #define v3dv_X(device, thing) ({                      \
2613    __typeof(&v3d42_##thing) v3d_X_thing;              \
2614    switch (device->devinfo.ver) {                     \
2615    case 42:                                           \
2616       v3d_X_thing = &v3d42_##thing;                   \
2617       break;                                          \
2618    case 71:                                           \
2619       v3d_X_thing = &v3d71_##thing;                   \
2620       break;                                          \
2621    default:                                           \
2622       unreachable("Unsupported hardware generation"); \
2623    }                                                  \
2624    v3d_X_thing;                                       \
2625 })
2626 
2627 /* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
2628  * define v3dX for each version supported, because when we compile code that
2629  * is not version-specific, all version-specific macros need to be already
2630  * defined.
2631  */
2632 #ifdef v3dX
2633 #  include "v3dvx_private.h"
2634 #else
2635 #  define v3dX(x) v3d42_##x
2636 #  include "v3dvx_private.h"
2637 #  undef v3dX
2638 
2639 #  define v3dX(x) v3d71_##x
2640 #  include "v3dvx_private.h"
2641 #  undef v3dX
2642 #endif
2643 
2644 VkResult
2645 v3dv_update_image_layout(struct v3dv_device *device,
2646                          struct v3dv_image *image,
2647                          uint64_t modifier,
2648                          bool disjoint,
2649                          const VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod_info);
2650 
2651 float
2652 v3dv_get_aa_line_width(struct v3dv_pipeline *pipeline,
2653                        struct v3dv_cmd_buffer *buffer);
2654 
2655 
2656 void
2657 v3dv_compute_ez_state(struct vk_dynamic_graphics_state *dyn,
2658                       struct v3dv_pipeline *pipeline,
2659                       enum v3dv_ez_state *ez_state,
2660                       bool *incompatible_ez_test);
2661 
2662 uint32_t v3dv_pipeline_primitive(VkPrimitiveTopology vk_prim);
2663 
2664 #endif /* V3DV_PRIVATE_H */
2665