xref: /aosp_15_r20/external/mesa3d/src/imagination/vulkan/pvr_private.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * based in part on anv driver which is:
5  * Copyright © 2015 Intel Corporation
6  *
7  * based in part on radv driver which is:
8  * Copyright © 2016 Red Hat.
9  * Copyright © 2016 Bas Nieuwenhuizen
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a copy
12  * of this software and associated documentation files (the "Software"), to deal
13  * in the Software without restriction, including without limitation the rights
14  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15  * copies of the Software, and to permit persons to whom the Software is
16  * furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28  * SOFTWARE.
29  */
30 
31 #ifndef PVR_PRIVATE_H
32 #define PVR_PRIVATE_H
33 
34 #include <assert.h>
35 #include <stdbool.h>
36 #include <stdint.h>
37 #include <vulkan/vulkan.h>
38 
39 #include "compiler/shader_enums.h"
40 #include "hwdef/rogue_hw_defs.h"
41 #include "pvr_border.h"
42 #include "pvr_clear.h"
43 #include "pvr_common.h"
44 #include "pvr_csb.h"
45 #include "pvr_device_info.h"
46 #include "pvr_entrypoints.h"
47 #include "pvr_hw_pass.h"
48 #include "pvr_job_render.h"
49 #include "pvr_limits.h"
50 #include "pvr_pds.h"
51 #include "pvr_shader_factory.h"
52 #include "pvr_spm.h"
53 #include "pvr_types.h"
54 #include "pvr_winsys.h"
55 #include "rogue/rogue.h"
56 #include "util/bitscan.h"
57 #include "util/format/u_format.h"
58 #include "util/log.h"
59 #include "util/macros.h"
60 #include "util/simple_mtx.h"
61 #include "util/u_dynarray.h"
62 #include "util/u_math.h"
63 #include "vk_buffer.h"
64 #include "vk_command_buffer.h"
65 #include "vk_device.h"
66 #include "vk_enum_to_str.h"
67 #include "vk_graphics_state.h"
68 #include "vk_image.h"
69 #include "vk_instance.h"
70 #include "vk_log.h"
71 #include "vk_physical_device.h"
72 #include "vk_queue.h"
73 #include "vk_sync.h"
74 #include "wsi_common.h"
75 
76 #ifdef HAVE_VALGRIND
77 #   include <valgrind/valgrind.h>
78 #   include <valgrind/memcheck.h>
79 #   define VG(x) x
80 #else
81 #   define VG(x) ((void)0)
82 #endif
83 
84 struct pvr_bo;
85 struct pvr_bo_store;
86 struct pvr_compute_ctx;
87 struct pvr_compute_pipeline;
88 struct pvr_free_list;
89 struct pvr_graphics_pipeline;
90 struct pvr_instance;
91 struct pvr_render_ctx;
92 struct rogue_compiler;
93 
94 struct pvr_physical_device {
95    struct vk_physical_device vk;
96 
97    /* Back-pointer to instance */
98    struct pvr_instance *instance;
99 
100    char *render_path;
101    char *display_path;
102 
103    struct pvr_winsys *ws;
104    struct pvr_device_info dev_info;
105    struct pvr_device_runtime_info dev_runtime_info;
106 
107    VkPhysicalDeviceMemoryProperties memory;
108 
109    uint64_t heap_used;
110 
111    struct wsi_device wsi_device;
112 
113    struct rogue_compiler *compiler;
114 };
115 
116 struct pvr_instance {
117    struct vk_instance vk;
118 
119    uint32_t active_device_count;
120 };
121 
122 struct pvr_queue {
123    struct vk_queue vk;
124 
125    struct pvr_device *device;
126 
127    struct pvr_render_ctx *gfx_ctx;
128    struct pvr_compute_ctx *compute_ctx;
129    struct pvr_compute_ctx *query_ctx;
130    struct pvr_transfer_ctx *transfer_ctx;
131 
132    struct vk_sync *last_job_signal_sync[PVR_JOB_TYPE_MAX];
133    struct vk_sync *next_job_wait_sync[PVR_JOB_TYPE_MAX];
134 };
135 
136 struct pvr_vertex_binding {
137    struct pvr_buffer *buffer;
138    VkDeviceSize offset;
139 };
140 
141 struct pvr_pds_upload {
142    struct pvr_suballoc_bo *pvr_bo;
143    /* Offset from the pds heap base address. */
144    uint32_t data_offset;
145    /* Offset from the pds heap base address. */
146    uint32_t code_offset;
147 
148    /* data_size + code_size = program_size. */
149    uint32_t data_size;
150    uint32_t code_size;
151 };
152 
153 struct pvr_compute_query_shader {
154    struct pvr_suballoc_bo *usc_bo;
155 
156    struct pvr_pds_upload pds_prim_code;
157    uint32_t primary_data_size_dw;
158    uint32_t primary_num_temps;
159 
160    struct pvr_pds_info info;
161    struct pvr_pds_upload pds_sec_code;
162 };
163 
164 struct pvr_device {
165    struct vk_device vk;
166    struct pvr_instance *instance;
167    struct pvr_physical_device *pdevice;
168 
169    struct pvr_winsys *ws;
170    struct pvr_winsys_heaps heaps;
171 
172    struct pvr_free_list *global_free_list;
173 
174    struct pvr_queue *queues;
175    uint32_t queue_count;
176 
177    /* Running count of the number of job submissions across all queue. */
178    uint32_t global_cmd_buffer_submit_count;
179 
180    /* Running count of the number of presentations across all queues. */
181    uint32_t global_queue_present_count;
182 
183    uint32_t pixel_event_data_size_in_dwords;
184 
185    uint64_t input_attachment_sampler;
186 
187    struct pvr_pds_upload pds_compute_fence_program;
188    struct pvr_pds_upload pds_compute_empty_program;
189 
190    /* Compute shaders for queries. */
191    struct pvr_compute_query_shader availability_shader;
192    struct pvr_compute_query_shader *copy_results_shaders;
193    struct pvr_compute_query_shader *reset_queries_shaders;
194 
195    struct pvr_suballocator suballoc_general;
196    struct pvr_suballocator suballoc_pds;
197    struct pvr_suballocator suballoc_transfer;
198    struct pvr_suballocator suballoc_usc;
199    struct pvr_suballocator suballoc_vis_test;
200 
201    struct {
202       struct pvr_pds_upload pds;
203       struct pvr_suballoc_bo *usc;
204    } nop_program;
205 
206    /* Issue Data Fence, Wait for Data Fence state. */
207    struct {
208       uint32_t usc_shareds;
209       struct pvr_suballoc_bo *usc;
210 
211       /* Buffer in which the IDF/WDF program performs store ops. */
212       struct pvr_bo *store_bo;
213       /* Contains the initialization values for the shared registers. */
214       struct pvr_bo *shareds_bo;
215 
216       struct pvr_pds_upload pds;
217       struct pvr_pds_upload sw_compute_barrier_pds;
218    } idfwdf_state;
219 
220    struct pvr_device_static_clear_state {
221       struct pvr_suballoc_bo *usc_vertex_shader_bo;
222       struct pvr_suballoc_bo *vertices_bo;
223       struct pvr_pds_upload pds;
224 
225       /* Only valid if PVR_HAS_FEATURE(dev_info, gs_rta_support). */
226       struct pvr_suballoc_bo *usc_multi_layer_vertex_shader_bo;
227 
228       struct pvr_static_clear_ppp_base ppp_base;
229       /* Indexable using VkImageAspectFlags. */
230       struct pvr_static_clear_ppp_template
231          ppp_templates[PVR_STATIC_CLEAR_VARIANT_COUNT];
232 
233       const uint32_t *vdm_words;
234       const uint32_t *large_clear_vdm_words;
235 
236       struct pvr_suballoc_bo *usc_clear_attachment_programs;
237       struct pvr_suballoc_bo *pds_clear_attachment_programs;
238       /* TODO: See if we can use PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT to save some
239        * memory.
240        */
241       struct pvr_pds_clear_attachment_program_info {
242          pvr_dev_addr_t texture_program_offset;
243          pvr_dev_addr_t pixel_program_offset;
244 
245          uint32_t texture_program_pds_temps_count;
246          /* Size in dwords. */
247          uint32_t texture_program_data_size;
248       } pds_clear_attachment_program_info
249          [PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT_WITH_HOLES];
250    } static_clear_state;
251 
252    struct {
253       struct pvr_suballoc_bo *usc_programs;
254       struct pvr_suballoc_bo *pds_programs;
255 
256       struct pvr_spm_per_load_program_state {
257          pvr_dev_addr_t pds_pixel_program_offset;
258          pvr_dev_addr_t pds_uniform_program_offset;
259 
260          uint32_t pds_texture_program_data_size;
261          uint32_t pds_texture_program_temps_count;
262       } load_program[PVR_SPM_LOAD_PROGRAM_COUNT];
263    } spm_load_state;
264 
265    struct pvr_device_tile_buffer_state {
266       simple_mtx_t mtx;
267 
268 #define PVR_MAX_TILE_BUFFER_COUNT 7U
269       struct pvr_bo *buffers[PVR_MAX_TILE_BUFFER_COUNT];
270       uint32_t buffer_count;
271    } tile_buffer_state;
272 
273    struct pvr_spm_scratch_buffer_store spm_scratch_buffer_store;
274 
275    struct pvr_bo_store *bo_store;
276 
277    struct pvr_bo *robustness_buffer;
278 
279    struct vk_sync *presignaled_sync;
280 
281    struct pvr_border_color_table border_color_table;
282 };
283 
284 struct pvr_device_memory {
285    struct vk_object_base base;
286    struct pvr_winsys_bo *bo;
287 };
288 
289 struct pvr_mip_level {
290    /* Offset of the mip level in bytes */
291    uint32_t offset;
292 
293    /* Aligned mip level size in bytes */
294    uint32_t size;
295 
296    /* Aligned row length in bytes */
297    uint32_t pitch;
298 
299    /* Aligned height in bytes */
300    uint32_t height_pitch;
301 };
302 
303 struct pvr_image {
304    struct vk_image vk;
305 
306    /* vma this image is bound to */
307    struct pvr_winsys_vma *vma;
308 
309    /* Device address the image is mapped to in device virtual address space */
310    pvr_dev_addr_t dev_addr;
311 
312    /* Derived and other state */
313    VkExtent3D physical_extent;
314    enum pvr_memlayout memlayout;
315    VkDeviceSize layer_size;
316    VkDeviceSize size;
317 
318    VkDeviceSize alignment;
319 
320    struct pvr_mip_level mip_levels[14];
321 };
322 
323 struct pvr_buffer {
324    struct vk_buffer vk;
325 
326    /* Derived and other state */
327    uint32_t alignment;
328    /* vma this buffer is bound to */
329    struct pvr_winsys_vma *vma;
330    /* Device address the buffer is mapped to in device virtual address space */
331    pvr_dev_addr_t dev_addr;
332 };
333 
334 struct pvr_image_view {
335    struct vk_image_view vk;
336 
337    /* Prepacked Texture Image dword 0 and 1. It will be copied to the
338     * descriptor info during pvr_UpdateDescriptorSets().
339     *
340     * We create separate texture states for sampling, storage and input
341     * attachment cases.
342     */
343    uint64_t texture_state[PVR_TEXTURE_STATE_MAX_ENUM][2];
344 };
345 
346 struct pvr_buffer_view {
347    struct vk_object_base base;
348 
349    uint64_t range;
350    VkFormat format;
351 
352    /* Prepacked Texture dword 0 and 1. It will be copied to the descriptor
353     * during pvr_UpdateDescriptorSets().
354     */
355    uint64_t texture_state[2];
356 };
357 
358 #define PVR_TRANSFER_MAX_SOURCES 10U
359 #define PVR_TRANSFER_MAX_CUSTOM_MAPPINGS 6U
360 
361 /** A surface describes a source or destination for a transfer operation. */
362 struct pvr_transfer_cmd_surface {
363    pvr_dev_addr_t dev_addr;
364 
365    /* Memory address for extra U/V planes. */
366    pvr_dev_addr_t uv_address[2];
367 
368    /* Surface width in texels. */
369    uint32_t width;
370 
371    /* Surface height in texels. */
372    uint32_t height;
373 
374    uint32_t depth;
375 
376    /* Z position in a 3D tecture. 0.0f <= z_position <= depth. */
377    float z_position;
378 
379    /* Stride in texels. */
380    uint32_t stride;
381 
382    VkFormat vk_format;
383 
384    enum pvr_memlayout mem_layout;
385 
386    uint32_t sample_count;
387 };
388 
389 struct pvr_rect_mapping {
390    VkRect2D src_rect;
391    VkRect2D dst_rect;
392    bool flip_x;
393    bool flip_y;
394 };
395 
396 struct pvr_transfer_cmd_source {
397    struct pvr_transfer_cmd_surface surface;
398 
399    uint32_t mapping_count;
400    struct pvr_rect_mapping mappings[PVR_TRANSFER_MAX_CUSTOM_MAPPINGS];
401 
402    /* In the case of a simple 1:1 copy, this setting does not affect the output
403     * but will affect performance. Use clamp to edge when possible.
404     */
405    /* This is of type enum PVRX(TEXSTATE_ADDRMODE). */
406    int addr_mode;
407 
408    /* Source filtering method. */
409    enum pvr_filter filter;
410 
411    /* MSAA resolve operation. */
412    enum pvr_resolve_op resolve_op;
413 };
414 
415 struct pvr_transfer_cmd {
416    /* Node to link this cmd into the transfer_cmds list in
417     * pvr_sub_cmd::transfer structure.
418     */
419    struct list_head link;
420 
421    uint32_t flags;
422 
423    uint32_t source_count;
424 
425    struct pvr_transfer_cmd_source sources[PVR_TRANSFER_MAX_SOURCES];
426 
427    union fi clear_color[4];
428 
429    struct pvr_transfer_cmd_surface dst;
430 
431    VkRect2D scissor;
432 
433    /* Pointer to cmd buffer this transfer cmd belongs to. This is mainly used
434     * to link buffer objects allocated during job submission into
435     * cmd_buffer::bo_list head.
436     */
437    struct pvr_cmd_buffer *cmd_buffer;
438 
439    /* Deferred RTA clears are allocated from pvr_cmd_buffer->deferred_clears and
440     * cannot be freed directly.
441     */
442    bool is_deferred_clear;
443 };
444 
445 struct pvr_sub_cmd_gfx {
446    const struct pvr_framebuffer *framebuffer;
447 
448    struct pvr_render_job job;
449 
450    struct pvr_suballoc_bo *depth_bias_bo;
451    struct pvr_suballoc_bo *scissor_bo;
452 
453    /* Tracking how the loaded depth/stencil values are being used. */
454    enum pvr_depth_stencil_usage depth_usage;
455    enum pvr_depth_stencil_usage stencil_usage;
456 
457    /* Tracking whether the subcommand modifies depth/stencil. */
458    bool modifies_depth;
459    bool modifies_stencil;
460 
461    /* Store the render to a scratch buffer. */
462    bool barrier_store;
463    /* Load the render (stored with a `barrier_store`) as a background to the
464     * current render.
465     */
466    bool barrier_load;
467 
468    const struct pvr_query_pool *query_pool;
469    struct util_dynarray sec_query_indices;
470 
471    /* Control stream builder object */
472    struct pvr_csb control_stream;
473 
474    /* Required iff pvr_sub_cmd_gfx_requires_split_submit() returns true. */
475    struct pvr_bo *terminate_ctrl_stream;
476 
477    uint32_t hw_render_idx;
478 
479    uint32_t max_tiles_in_flight;
480 
481    bool empty_cmd;
482 
483    /* True if any fragment shader used in this sub command uses atomic
484     * operations.
485     */
486    bool frag_uses_atomic_ops;
487 
488    bool disable_compute_overlap;
489 
490    /* True if any fragment shader used in this sub command has side
491     * effects.
492     */
493    bool frag_has_side_effects;
494 
495    /* True if any vertex shader used in this sub command contains both
496     * texture reads and texture writes.
497     */
498    bool vertex_uses_texture_rw;
499 
500    /* True if any fragment shader used in this sub command contains
501     * both texture reads and texture writes.
502     */
503    bool frag_uses_texture_rw;
504 
505    bool has_occlusion_query;
506 
507    bool wait_on_previous_transfer;
508 };
509 
510 struct pvr_sub_cmd_compute {
511    /* Control stream builder object. */
512    struct pvr_csb control_stream;
513 
514    uint32_t num_shared_regs;
515 
516    /* True if any shader used in this sub command uses atomic
517     * operations.
518     */
519    bool uses_atomic_ops;
520 
521    bool uses_barrier;
522 
523    bool pds_sw_barrier_requires_clearing;
524 };
525 
526 struct pvr_sub_cmd_transfer {
527    bool serialize_with_frag;
528 
529    /* Pointer to the actual transfer command list, allowing primary and
530     * secondary sub-commands to share the same list.
531     */
532    struct list_head *transfer_cmds;
533 
534    /* List of pvr_transfer_cmd type structures. Do not access the list
535     * directly, but always use the transfer_cmds pointer above.
536     */
537    struct list_head transfer_cmds_priv;
538 };
539 
540 struct pvr_sub_cmd_event {
541    enum pvr_event_type type;
542 
543    union {
544       struct pvr_sub_cmd_event_set_reset {
545          struct pvr_event *event;
546          /* Stages to wait for until the event is set or reset. */
547          uint32_t wait_for_stage_mask;
548       } set_reset;
549 
550       struct pvr_sub_cmd_event_wait {
551          uint32_t count;
552          /* Events to wait for before resuming. */
553          struct pvr_event **events;
554          /* Stages to wait at. */
555          uint32_t *wait_at_stage_masks;
556       } wait;
557 
558       struct pvr_sub_cmd_event_barrier {
559          bool in_render_pass;
560 
561          /* Stages to wait for. */
562          uint32_t wait_for_stage_mask;
563          /* Stages to wait at. */
564          uint32_t wait_at_stage_mask;
565       } barrier;
566    };
567 };
568 
569 struct pvr_sub_cmd {
570    /* This links the subcommand in pvr_cmd_buffer:sub_cmds list. */
571    struct list_head link;
572 
573    enum pvr_sub_cmd_type type;
574 
575    /* True if the sub_cmd is owned by this command buffer. False if taken from
576     * a secondary command buffer, in that case we are not supposed to free any
577     * resources associated with the sub_cmd.
578     */
579    bool owned;
580 
581    union {
582       struct pvr_sub_cmd_gfx gfx;
583       struct pvr_sub_cmd_compute compute;
584       struct pvr_sub_cmd_transfer transfer;
585       struct pvr_sub_cmd_event event;
586    };
587 };
588 
589 struct pvr_render_pass_info {
590    const struct pvr_render_pass *pass;
591    struct pvr_framebuffer *framebuffer;
592 
593    struct pvr_image_view **attachments;
594 
595    uint32_t subpass_idx;
596    uint32_t current_hw_subpass;
597 
598    VkRect2D render_area;
599 
600    uint32_t clear_value_count;
601    VkClearValue *clear_values;
602 
603    VkPipelineBindPoint pipeline_bind_point;
604 
605    bool process_empty_tiles;
606    bool enable_bg_tag;
607    uint32_t isp_userpass;
608 };
609 
610 struct pvr_ppp_state {
611    uint32_t header;
612 
613    struct {
614       /* TODO: Can we get rid of the "control" field? */
615       struct PVRX(TA_STATE_ISPCTL) control_struct;
616       uint32_t control;
617 
618       uint32_t front_a;
619       uint32_t front_b;
620       uint32_t back_a;
621       uint32_t back_b;
622    } isp;
623 
624    struct pvr_ppp_dbsc {
625       uint16_t scissor_index;
626       uint16_t depthbias_index;
627    } depthbias_scissor_indices;
628 
629    struct {
630       uint32_t pixel_shader_base;
631       uint32_t texture_uniform_code_base;
632       uint32_t size_info1;
633       uint32_t size_info2;
634       uint32_t varying_base;
635       uint32_t texture_state_data_base;
636       uint32_t uniform_state_data_base;
637    } pds;
638 
639    struct {
640       uint32_t word0;
641       uint32_t word1;
642    } region_clipping;
643 
644    struct {
645       uint32_t a0;
646       uint32_t m0;
647       uint32_t a1;
648       uint32_t m1;
649       uint32_t a2;
650       uint32_t m2;
651    } viewports[PVR_MAX_VIEWPORTS];
652 
653    uint32_t viewport_count;
654 
655    uint32_t output_selects;
656 
657    uint32_t varying_word[2];
658 
659    uint32_t ppp_control;
660 };
661 
662 /* Represents a control stream related command that is deferred for execution in
663  * a secondary command buffer.
664  */
665 struct pvr_deferred_cs_command {
666    enum pvr_deferred_cs_command_type type;
667    union {
668       struct {
669          struct pvr_ppp_dbsc state;
670 
671          uint32_t *vdm_state;
672       } dbsc;
673 
674       struct {
675          struct pvr_ppp_dbsc state;
676 
677          struct pvr_suballoc_bo *ppp_cs_bo;
678          uint32_t patch_offset;
679       } dbsc2;
680    };
681 };
682 
683 struct pvr_cmd_buffer_draw_state {
684    uint32_t base_instance;
685    uint32_t base_vertex;
686    bool draw_indirect;
687    bool draw_indexed;
688 };
689 
690 struct pvr_cmd_buffer_state {
691    /* Pipeline binding. */
692    const struct pvr_graphics_pipeline *gfx_pipeline;
693 
694    const struct pvr_compute_pipeline *compute_pipeline;
695 
696    struct pvr_render_pass_info render_pass_info;
697 
698    struct pvr_sub_cmd *current_sub_cmd;
699 
700    struct pvr_ppp_state ppp_state;
701 
702    struct PVRX(TA_STATE_HEADER) emit_header;
703 
704    struct pvr_vertex_binding vertex_bindings[PVR_MAX_VERTEX_INPUT_BINDINGS];
705 
706    struct {
707       struct pvr_buffer *buffer;
708       VkDeviceSize offset;
709       VkIndexType type;
710    } index_buffer_binding;
711 
712    struct {
713       uint8_t data[PVR_MAX_PUSH_CONSTANTS_SIZE];
714       VkShaderStageFlags dirty_stages;
715       /* Indicates if the whole push constants buffer was uploaded. This avoids
716        * having to upload the same stuff twice when the push constant range
717        * covers both gfx and compute.
718        */
719       bool uploaded;
720       pvr_dev_addr_t dev_addr;
721    } push_constants;
722 
723    /* Array size of barriers_needed is based on number of sync pipeline
724     * stages.
725     */
726    uint32_t barriers_needed[PVR_NUM_SYNC_PIPELINE_STAGES];
727 
728    struct pvr_descriptor_state gfx_desc_state;
729    struct pvr_descriptor_state compute_desc_state;
730 
731    VkFormat depth_format;
732 
733    struct {
734       bool compute_pipeline_binding : 1;
735       bool compute_desc_dirty : 1;
736 
737       bool gfx_pipeline_binding : 1;
738       bool gfx_desc_dirty : 1;
739 
740       bool vertex_bindings : 1;
741       bool index_buffer_binding : 1;
742       bool vertex_descriptors : 1;
743       bool fragment_descriptors : 1;
744 
745       bool isp_userpass : 1;
746 
747       /* Some draw state needs to be tracked for changes between draw calls
748        * i.e. if we get a draw with baseInstance=0, followed by a call with
749        * baseInstance=1 that needs to cause us to select a different PDS
750        * attrib program and update the BASE_INSTANCE PDS const. If only
751        * baseInstance changes then we just have to update the data section.
752        */
753       bool draw_base_instance : 1;
754       bool draw_variant : 1;
755 
756       bool vis_test;
757    } dirty;
758 
759    struct pvr_cmd_buffer_draw_state draw_state;
760 
761    struct {
762       uint32_t code_offset;
763       const struct pvr_pds_info *info;
764    } pds_shader;
765 
766    const struct pvr_query_pool *query_pool;
767    bool vis_test_enabled;
768    uint32_t vis_reg;
769 
770    struct util_dynarray query_indices;
771 
772    uint32_t max_shared_regs;
773 
774    /* Address of data segment for vertex attrib upload program. */
775    uint32_t pds_vertex_attrib_offset;
776 
777    uint32_t pds_fragment_descriptor_data_offset;
778    uint32_t pds_compute_descriptor_data_offset;
779 };
780 
781 /* Do not change this. This is the format used for the depth_bias_array
782  * elements uploaded to the device.
783  */
784 struct pvr_depth_bias_state {
785    /* Saved information from pCreateInfo. */
786    float constant_factor;
787    float slope_factor;
788    float clamp;
789 };
790 
791 /* Do not change this. This is the format used for the scissor_array
792  * elements uploaded to the device.
793  */
794 struct pvr_scissor_words {
795    /* Contains a packed IPF_SCISSOR_WORD_0. */
796    uint32_t w0;
797    /* Contains a packed IPF_SCISSOR_WORD_1. */
798    uint32_t w1;
799 };
800 
801 struct pvr_cmd_buffer {
802    struct vk_command_buffer vk;
803 
804    struct pvr_device *device;
805 
806    /* Buffer usage flags */
807    VkCommandBufferUsageFlags usage_flags;
808 
809    /* Array of struct pvr_depth_bias_state. */
810    struct util_dynarray depth_bias_array;
811 
812    /* Array of struct pvr_scissor_words. */
813    struct util_dynarray scissor_array;
814    struct pvr_scissor_words scissor_words;
815 
816    struct pvr_cmd_buffer_state state;
817 
818    /* List of struct pvr_deferred_cs_command control stream related commands to
819     * execute in secondary command buffer.
820     */
821    struct util_dynarray deferred_csb_commands;
822    /* List of struct pvr_transfer_cmd used to emulate RTA clears on non RTA
823     * capable cores.
824     */
825    struct util_dynarray deferred_clears;
826 
827    /* List of pvr_bo structs associated with this cmd buffer. */
828    struct list_head bo_list;
829 
830    struct list_head sub_cmds;
831 };
832 
833 struct pvr_stage_allocation_descriptor_state {
834    struct pvr_pds_upload pds_code;
835    /* Since we upload the code segment separately from the data segment
836     * pds_code->data_size might be 0 whilst
837     * pds_info->data_size_in_dwords might be >0 in the case of this struct
838     * referring to the code upload.
839     */
840    struct pvr_pds_info pds_info;
841 
842    /* Already setup compile time static consts. */
843    struct pvr_suballoc_bo *static_consts;
844 };
845 
846 struct pvr_pds_attrib_program {
847    struct pvr_pds_info info;
848    /* The uploaded PDS program stored here only contains the code segment,
849     * meaning the data size will be 0, unlike the data size stored in the
850     * 'info' member above.
851     */
852    struct pvr_pds_upload program;
853 };
854 
855 struct pvr_pipeline_stage_state {
856    uint32_t const_shared_reg_count;
857    uint32_t const_shared_reg_offset;
858    uint32_t pds_temps_count;
859 
860    uint32_t coefficient_size;
861 
862    /* True if this shader uses any atomic operations. */
863    bool uses_atomic_ops;
864 
865    /* True if this shader uses both texture reads and texture writes. */
866    bool uses_texture_rw;
867 
868    /* Only used for compute stage. */
869    bool uses_barrier;
870 
871    /* True if this shader has side effects */
872    bool has_side_effects;
873 
874    /* True if this shader is simply a nop.end. */
875    bool empty_program;
876 };
877 
878 struct pvr_compute_shader_state {
879    /* Pointer to a buffer object that contains the shader binary. */
880    struct pvr_suballoc_bo *bo;
881 
882    bool uses_atomic_ops;
883    bool uses_barrier;
884    /* E.g. GLSL shader uses gl_NumWorkGroups. */
885    bool uses_num_workgroups;
886 
887    uint32_t const_shared_reg_count;
888    uint32_t input_register_count;
889    uint32_t work_size;
890    uint32_t coefficient_register_count;
891 };
892 
893 struct pvr_vertex_shader_state {
894    /* Pointer to a buffer object that contains the shader binary. */
895    struct pvr_suballoc_bo *bo;
896    uint32_t entry_offset;
897 
898    /* 2 since we only need STATE_VARYING{0,1} state words. */
899    uint32_t varying[2];
900 
901    struct pvr_pds_attrib_program
902       pds_attrib_programs[PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT];
903 
904    struct pvr_pipeline_stage_state stage_state;
905    /* FIXME: Move this into stage_state? */
906    struct pvr_stage_allocation_descriptor_state descriptor_state;
907    uint32_t vertex_input_size;
908    uint32_t vertex_output_size;
909    uint32_t user_clip_planes_mask;
910 };
911 
912 struct pvr_fragment_shader_state {
913    /* Pointer to a buffer object that contains the shader binary. */
914    struct pvr_suballoc_bo *bo;
915    uint32_t entry_offset;
916 
917    struct pvr_pipeline_stage_state stage_state;
918    /* FIXME: Move this into stage_state? */
919    struct pvr_stage_allocation_descriptor_state descriptor_state;
920    enum PVRX(TA_PASSTYPE) pass_type;
921 
922    struct pvr_pds_upload pds_coeff_program;
923    struct pvr_pds_upload pds_fragment_program;
924 };
925 
926 struct pvr_pipeline {
927    struct vk_object_base base;
928 
929    enum pvr_pipeline_type type;
930 
931    /* Saved information from pCreateInfo. */
932    struct pvr_pipeline_layout *layout;
933 };
934 
935 struct pvr_compute_pipeline {
936    struct pvr_pipeline base;
937 
938    struct pvr_compute_shader_state shader_state;
939 
940    struct {
941       uint32_t base_workgroup : 1;
942    } flags;
943 
944    struct pvr_stage_allocation_descriptor_state descriptor_state;
945 
946    struct pvr_pds_upload primary_program;
947    struct pvr_pds_info primary_program_info;
948 
949    struct pvr_pds_base_workgroup_program {
950       struct pvr_pds_upload code_upload;
951 
952       uint32_t *data_section;
953       /* Offset within the PDS data section at which the base workgroup id
954        * resides.
955        */
956       uint32_t base_workgroup_data_patching_offset;
957 
958       struct pvr_pds_info info;
959    } primary_base_workgroup_variant_program;
960 };
961 
962 struct pvr_graphics_pipeline {
963    struct pvr_pipeline base;
964 
965    struct vk_dynamic_graphics_state dynamic_state;
966 
967    /* Derived and other state */
968    size_t stage_indices[MESA_SHADER_FRAGMENT + 1];
969 
970    struct {
971       struct pvr_vertex_shader_state vertex;
972       struct pvr_fragment_shader_state fragment;
973    } shader_state;
974 };
975 
976 struct pvr_query_pool {
977    struct vk_object_base base;
978 
979    /* Stride of result_buffer to get to the start of the results for the next
980     * Phantom.
981     */
982    uint32_t result_stride;
983 
984    uint32_t query_count;
985 
986    struct pvr_suballoc_bo *result_buffer;
987    struct pvr_suballoc_bo *availability_buffer;
988 };
989 
990 struct pvr_private_compute_pipeline {
991    /* Used by pvr_compute_update_kernel_private(). */
992    uint32_t pds_code_offset;
993    uint32_t pds_data_offset;
994    uint32_t pds_data_size_dw;
995    uint32_t pds_temps_used;
996    uint32_t coeff_regs_count;
997    uint32_t unified_store_regs_count;
998    VkExtent3D workgroup_size;
999 
1000    /* Used by pvr_compute_update_shared_private(). */
1001    uint32_t pds_shared_update_code_offset;
1002    uint32_t pds_shared_update_data_offset;
1003    uint32_t pds_shared_update_data_size_dw;
1004 
1005    /* Used by both pvr_compute_update_{kernel,shared}_private(). */
1006    uint32_t const_shared_regs_count;
1007 
1008    pvr_dev_addr_t const_buffer_addr;
1009 };
1010 
1011 struct pvr_query_info {
1012    enum pvr_query_type type;
1013 
1014    union {
1015       struct {
1016          uint32_t num_query_indices;
1017          struct pvr_suballoc_bo *index_bo;
1018          uint32_t num_queries;
1019          struct pvr_suballoc_bo *availability_bo;
1020       } availability_write;
1021 
1022       struct {
1023          VkQueryPool query_pool;
1024          uint32_t first_query;
1025          uint32_t query_count;
1026       } reset_query_pool;
1027 
1028       struct {
1029          VkQueryPool query_pool;
1030          uint32_t first_query;
1031          uint32_t query_count;
1032          VkBuffer dst_buffer;
1033          VkDeviceSize dst_offset;
1034          VkDeviceSize stride;
1035          VkQueryResultFlags flags;
1036       } copy_query_results;
1037    };
1038 };
1039 
1040 struct pvr_render_target {
1041    struct pvr_rt_dataset *rt_dataset;
1042 
1043    pthread_mutex_t mutex;
1044 
1045    bool valid;
1046 };
1047 
1048 struct pvr_framebuffer {
1049    struct vk_object_base base;
1050 
1051    /* Saved information from pCreateInfo. */
1052    uint32_t width;
1053    uint32_t height;
1054    uint32_t layers;
1055 
1056    uint32_t attachment_count;
1057    struct pvr_image_view **attachments;
1058 
1059    /* Derived and other state. */
1060    struct pvr_suballoc_bo *ppp_state_bo;
1061    /* PPP state size in dwords. */
1062    size_t ppp_state_size;
1063 
1064    uint32_t render_targets_count;
1065    struct pvr_render_target *render_targets;
1066 
1067    struct pvr_spm_scratch_buffer *scratch_buffer;
1068 
1069    uint32_t render_count;
1070    struct pvr_spm_eot_state *spm_eot_state_per_render;
1071    struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
1072 };
1073 
1074 struct pvr_render_pass_attachment {
1075    /* Saved information from pCreateInfo. */
1076    VkAttachmentLoadOp load_op;
1077 
1078    VkAttachmentStoreOp store_op;
1079 
1080    VkAttachmentLoadOp stencil_load_op;
1081 
1082    VkAttachmentStoreOp stencil_store_op;
1083 
1084    VkFormat vk_format;
1085    uint32_t sample_count;
1086    VkImageLayout initial_layout;
1087 
1088    /* Derived and other state. */
1089    VkImageAspectFlags aspects;
1090 
1091    /* Can this surface be resolved by the PBE. */
1092    bool is_pbe_downscalable;
1093 
1094    uint32_t index;
1095 };
1096 
1097 struct pvr_render_subpass {
1098    /* Saved information from pCreateInfo. */
1099    /* The number of samples per color attachment (or depth attachment if
1100     * z-only).
1101     */
1102    /* FIXME: rename to 'samples' to match struct pvr_image */
1103    uint32_t sample_count;
1104 
1105    uint32_t color_count;
1106    uint32_t *color_attachments;
1107    uint32_t *resolve_attachments;
1108 
1109    uint32_t input_count;
1110    uint32_t *input_attachments;
1111 
1112    uint32_t depth_stencil_attachment;
1113 
1114    /*  Derived and other state. */
1115    uint32_t dep_count;
1116    uint32_t *dep_list;
1117 
1118    /* Array with dep_count elements. flush_on_dep[x] is true if this subpass
1119     * and the subpass dep_list[x] can't be in the same hardware render.
1120     */
1121    bool *flush_on_dep;
1122 
1123    uint32_t index;
1124 
1125    uint32_t isp_userpass;
1126 
1127    VkPipelineBindPoint pipeline_bind_point;
1128 };
1129 
1130 struct pvr_render_pass {
1131    struct vk_object_base base;
1132 
1133    /* Saved information from pCreateInfo. */
1134    uint32_t attachment_count;
1135 
1136    struct pvr_render_pass_attachment *attachments;
1137 
1138    uint32_t subpass_count;
1139 
1140    struct pvr_render_subpass *subpasses;
1141 
1142    struct pvr_renderpass_hwsetup *hw_setup;
1143 
1144    /*  Derived and other state. */
1145    /* FIXME: rename to 'max_samples' as we use 'samples' elsewhere */
1146    uint32_t max_sample_count;
1147 
1148    /* The maximum number of tile buffers to use in any subpass. */
1149    uint32_t max_tilebuffer_count;
1150 };
1151 
1152 /* Max render targets for the clears loads state in load op.
1153  * To account for resolve attachments, double the color attachments.
1154  */
1155 #define PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS (PVR_MAX_COLOR_ATTACHMENTS * 2)
1156 
1157 struct pvr_load_op {
1158    bool is_hw_object;
1159 
1160    struct pvr_suballoc_bo *usc_frag_prog_bo;
1161    uint32_t const_shareds_count;
1162    uint32_t shareds_dest_offset;
1163    uint32_t shareds_count;
1164 
1165    struct pvr_pds_upload pds_frag_prog;
1166 
1167    struct pvr_pds_upload pds_tex_state_prog;
1168    uint32_t temps_count;
1169 
1170    union {
1171       const struct pvr_renderpass_hwsetup_render *hw_render;
1172       const struct pvr_render_subpass *subpass;
1173    };
1174 
1175    /* TODO: We might not need to keep all of this around. Some stuff might just
1176     * be for the compiler to ingest which we can then discard.
1177     */
1178    struct {
1179       uint16_t rt_clear_mask;
1180       uint16_t rt_load_mask;
1181 
1182       uint16_t unresolved_msaa_mask;
1183 
1184       /* The format to write to the output regs. */
1185       VkFormat dest_vk_format[PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS];
1186 
1187 #define PVR_NO_DEPTH_CLEAR_TO_REG (-1)
1188       /* If >= 0, write a depth clear value to the specified pixel output. */
1189       int32_t depth_clear_to_reg;
1190    } clears_loads_state;
1191 };
1192 
1193 #define CHECK_MASK_SIZE(_struct_type, _field_name, _nr_bits)               \
1194    static_assert(sizeof(((struct _struct_type *)NULL)->_field_name) * 8 >= \
1195                     _nr_bits,                                              \
1196                  #_field_name " mask of struct " #_struct_type " too small")
1197 
1198 CHECK_MASK_SIZE(pvr_load_op,
1199                 clears_loads_state.rt_clear_mask,
1200                 PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
1201 CHECK_MASK_SIZE(pvr_load_op,
1202                 clears_loads_state.rt_load_mask,
1203                 PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
1204 CHECK_MASK_SIZE(pvr_load_op,
1205                 clears_loads_state.unresolved_msaa_mask,
1206                 PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
1207 
1208 #undef CHECK_MASK_SIZE
1209 
1210 uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
1211    const struct pvr_device_info *dev_info,
1212    const struct pvr_device_runtime_info *dev_runtime_info,
1213    uint32_t fs_common_size,
1214    uint32_t min_tiles_in_flight);
1215 
1216 VkResult pvr_wsi_init(struct pvr_physical_device *pdevice);
1217 void pvr_wsi_finish(struct pvr_physical_device *pdevice);
1218 
1219 VkResult pvr_queues_create(struct pvr_device *device,
1220                            const VkDeviceCreateInfo *pCreateInfo);
1221 void pvr_queues_destroy(struct pvr_device *device);
1222 
1223 VkResult pvr_bind_memory(struct pvr_device *device,
1224                          struct pvr_device_memory *mem,
1225                          VkDeviceSize offset,
1226                          VkDeviceSize size,
1227                          VkDeviceSize alignment,
1228                          struct pvr_winsys_vma **const vma_out,
1229                          pvr_dev_addr_t *const dev_addr_out);
1230 void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma);
1231 VkResult pvr_gpu_upload(struct pvr_device *device,
1232                         struct pvr_winsys_heap *heap,
1233                         const void *data,
1234                         size_t size,
1235                         uint64_t alignment,
1236                         struct pvr_suballoc_bo **const pvr_bo_out);
1237 VkResult pvr_gpu_upload_pds(struct pvr_device *device,
1238                             const uint32_t *data,
1239                             uint32_t data_size_dwords,
1240                             uint32_t data_alignment,
1241                             const uint32_t *code,
1242                             uint32_t code_size_dwords,
1243                             uint32_t code_alignment,
1244                             uint64_t min_alignment,
1245                             struct pvr_pds_upload *const pds_upload_out);
1246 VkResult pvr_gpu_upload_usc(struct pvr_device *device,
1247                             const void *code,
1248                             size_t code_size,
1249                             uint64_t code_alignment,
1250                             struct pvr_suballoc_bo **const pvr_bo_out);
1251 
1252 VkResult pvr_cmd_buffer_add_transfer_cmd(struct pvr_cmd_buffer *cmd_buffer,
1253                                          struct pvr_transfer_cmd *transfer_cmd);
1254 
1255 VkResult pvr_cmd_buffer_alloc_mem(struct pvr_cmd_buffer *cmd_buffer,
1256                                   struct pvr_winsys_heap *heap,
1257                                   uint64_t size,
1258                                   struct pvr_suballoc_bo **const pvr_bo_out);
1259 
1260 void pvr_calculate_vertex_cam_size(const struct pvr_device_info *dev_info,
1261                                    const uint32_t vs_output_size,
1262                                    const bool raster_enable,
1263                                    uint32_t *const cam_size_out,
1264                                    uint32_t *const vs_max_instances_out);
1265 
1266 void pvr_get_image_subresource_layout(const struct pvr_image *image,
1267                                       const VkImageSubresource *subresource,
1268                                       VkSubresourceLayout *layout);
1269 
1270 static inline struct pvr_compute_pipeline *
to_pvr_compute_pipeline(struct pvr_pipeline * pipeline)1271 to_pvr_compute_pipeline(struct pvr_pipeline *pipeline)
1272 {
1273    assert(pipeline->type == PVR_PIPELINE_TYPE_COMPUTE);
1274    return container_of(pipeline, struct pvr_compute_pipeline, base);
1275 }
1276 
1277 static inline struct pvr_graphics_pipeline *
to_pvr_graphics_pipeline(struct pvr_pipeline * pipeline)1278 to_pvr_graphics_pipeline(struct pvr_pipeline *pipeline)
1279 {
1280    assert(pipeline->type == PVR_PIPELINE_TYPE_GRAPHICS);
1281    return container_of(pipeline, struct pvr_graphics_pipeline, base);
1282 }
1283 
1284 static inline const struct pvr_image *
vk_to_pvr_image(const struct vk_image * image)1285 vk_to_pvr_image(const struct vk_image *image)
1286 {
1287    return container_of(image, const struct pvr_image, vk);
1288 }
1289 
1290 static inline const struct pvr_image *
pvr_image_view_get_image(const struct pvr_image_view * const iview)1291 pvr_image_view_get_image(const struct pvr_image_view *const iview)
1292 {
1293    return vk_to_pvr_image(iview->vk.image);
1294 }
1295 
1296 static enum pvr_pipeline_stage_bits
pvr_stage_mask(VkPipelineStageFlags2 stage_mask)1297 pvr_stage_mask(VkPipelineStageFlags2 stage_mask)
1298 {
1299    enum pvr_pipeline_stage_bits stages = 0;
1300 
1301    if (stage_mask & VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)
1302       return PVR_PIPELINE_STAGE_ALL_BITS;
1303 
1304    if (stage_mask & (VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT))
1305       stages |= PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS;
1306 
1307    if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
1308                      VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
1309                      VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
1310                      VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
1311                      VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
1312                      VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) {
1313       stages |= PVR_PIPELINE_STAGE_GEOM_BIT;
1314    }
1315 
1316    if (stage_mask & (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
1317                      VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
1318                      VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
1319                      VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
1320       stages |= PVR_PIPELINE_STAGE_FRAG_BIT;
1321    }
1322 
1323    if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
1324                      VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)) {
1325       stages |= PVR_PIPELINE_STAGE_COMPUTE_BIT;
1326    }
1327 
1328    if (stage_mask & (VK_PIPELINE_STAGE_TRANSFER_BIT))
1329       stages |= PVR_PIPELINE_STAGE_TRANSFER_BIT;
1330 
1331    return stages;
1332 }
1333 
1334 static inline enum pvr_pipeline_stage_bits
pvr_stage_mask_src(VkPipelineStageFlags2 stage_mask)1335 pvr_stage_mask_src(VkPipelineStageFlags2 stage_mask)
1336 {
1337    /* If the source is bottom of pipe, all stages will need to be waited for. */
1338    if (stage_mask & VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
1339       return PVR_PIPELINE_STAGE_ALL_BITS;
1340 
1341    return pvr_stage_mask(stage_mask);
1342 }
1343 
1344 static inline enum pvr_pipeline_stage_bits
pvr_stage_mask_dst(VkPipelineStageFlags2 stage_mask)1345 pvr_stage_mask_dst(VkPipelineStageFlags2 stage_mask)
1346 {
1347    /* If the destination is top of pipe, all stages should be blocked by prior
1348     * commands.
1349     */
1350    if (stage_mask & VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)
1351       return PVR_PIPELINE_STAGE_ALL_BITS;
1352 
1353    return pvr_stage_mask(stage_mask);
1354 }
1355 
pvr_sub_cmd_gfx_requires_split_submit(const struct pvr_sub_cmd_gfx * const sub_cmd)1356 static inline bool pvr_sub_cmd_gfx_requires_split_submit(
1357    const struct pvr_sub_cmd_gfx *const sub_cmd)
1358 {
1359    return sub_cmd->job.run_frag && sub_cmd->framebuffer->layers > 1;
1360 }
1361 
1362 /* This function is intended to be used when the error being set has been
1363  * returned from a function call, i.e. the error happened further down the
1364  * stack. `vk_command_buffer_set_error()` should be used at the point an error
1365  * occurs, i.e. VK_ERROR_* is being passed in.
1366  * This ensures we only ever get the error printed once.
1367  */
1368 static inline VkResult
pvr_cmd_buffer_set_error_unwarned(struct pvr_cmd_buffer * cmd_buffer,VkResult error)1369 pvr_cmd_buffer_set_error_unwarned(struct pvr_cmd_buffer *cmd_buffer,
1370                                   VkResult error)
1371 {
1372    assert(error != VK_SUCCESS);
1373 
1374    if (cmd_buffer->vk.record_result == VK_SUCCESS)
1375       cmd_buffer->vk.record_result = error;
1376 
1377    return error;
1378 }
1379 
1380 VkResult pvr_pds_fragment_program_create_and_upload(
1381    struct pvr_device *device,
1382    const VkAllocationCallbacks *allocator,
1383    const struct pvr_suballoc_bo *fragment_shader_bo,
1384    uint32_t fragment_temp_count,
1385    enum rogue_msaa_mode msaa_mode,
1386    bool has_phase_rate_change,
1387    struct pvr_pds_upload *const pds_upload_out);
1388 
1389 VkResult pvr_pds_unitex_state_program_create_and_upload(
1390    struct pvr_device *device,
1391    const VkAllocationCallbacks *allocator,
1392    uint32_t texture_kicks,
1393    uint32_t uniform_kicks,
1394    struct pvr_pds_upload *const pds_upload_out);
1395 
1396 VkResult pvr_device_tile_buffer_ensure_cap(struct pvr_device *device,
1397                                            uint32_t capacity,
1398                                            uint32_t size_in_bytes);
1399 
1400 VkResult
1401 pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer,
1402                               const void *const data,
1403                               const size_t size,
1404                               struct pvr_suballoc_bo **const pvr_bo_out);
1405 VkResult pvr_cmd_buffer_upload_pds(struct pvr_cmd_buffer *const cmd_buffer,
1406                                    const uint32_t *data,
1407                                    uint32_t data_size_dwords,
1408                                    uint32_t data_alignment,
1409                                    const uint32_t *code,
1410                                    uint32_t code_size_dwords,
1411                                    uint32_t code_alignment,
1412                                    uint64_t min_alignment,
1413                                    struct pvr_pds_upload *const pds_upload_out);
1414 
1415 VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
1416                                       enum pvr_sub_cmd_type type);
1417 VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer);
1418 
1419 void pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer,
1420                                 struct pvr_sub_cmd_compute *const sub_cmd,
1421                                 bool deallocate_shareds);
1422 void pvr_compute_update_shared_private(
1423    struct pvr_cmd_buffer *cmd_buffer,
1424    struct pvr_sub_cmd_compute *const sub_cmd,
1425    struct pvr_private_compute_pipeline *pipeline);
1426 void pvr_compute_update_kernel_private(
1427    struct pvr_cmd_buffer *cmd_buffer,
1428    struct pvr_sub_cmd_compute *const sub_cmd,
1429    struct pvr_private_compute_pipeline *pipeline,
1430    const uint32_t global_workgroup_size[static const PVR_WORKGROUP_DIMENSIONS]);
1431 
1432 size_t pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes(void);
1433 
1434 VkResult pvr_pds_compute_shader_create_and_upload(
1435    struct pvr_device *device,
1436    struct pvr_pds_compute_shader_program *program,
1437    struct pvr_pds_upload *const pds_upload_out);
1438 
1439 VkResult pvr_device_create_compute_query_programs(struct pvr_device *device);
1440 void pvr_device_destroy_compute_query_programs(struct pvr_device *device);
1441 
1442 VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
1443                                const struct pvr_query_info *query_info);
1444 
1445 void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer,
1446                                     bool start_geom);
1447 
1448 const struct pvr_renderpass_hwsetup_subpass *
1449 pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass);
1450 
1451 void pvr_descriptor_size_info_init(
1452    const struct pvr_device *device,
1453    VkDescriptorType type,
1454    struct pvr_descriptor_size_info *const size_info_out);
1455 
1456 #define PVR_FROM_HANDLE(__pvr_type, __name, __handle) \
1457    VK_FROM_HANDLE(__pvr_type, __name, __handle)
1458 
1459 VK_DEFINE_HANDLE_CASTS(pvr_cmd_buffer,
1460                        vk.base,
1461                        VkCommandBuffer,
1462                        VK_OBJECT_TYPE_COMMAND_BUFFER)
1463 VK_DEFINE_HANDLE_CASTS(pvr_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
1464 VK_DEFINE_HANDLE_CASTS(pvr_instance,
1465                        vk.base,
1466                        VkInstance,
1467                        VK_OBJECT_TYPE_INSTANCE)
1468 VK_DEFINE_HANDLE_CASTS(pvr_physical_device,
1469                        vk.base,
1470                        VkPhysicalDevice,
1471                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
1472 VK_DEFINE_HANDLE_CASTS(pvr_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
1473 
1474 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_device_memory,
1475                                base,
1476                                VkDeviceMemory,
1477                                VK_OBJECT_TYPE_DEVICE_MEMORY)
1478 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
1479 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_buffer,
1480                                vk.base,
1481                                VkBuffer,
1482                                VK_OBJECT_TYPE_BUFFER)
1483 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_image_view,
1484                                vk.base,
1485                                VkImageView,
1486                                VK_OBJECT_TYPE_IMAGE_VIEW)
1487 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_buffer_view,
1488                                base,
1489                                VkBufferView,
1490                                VK_OBJECT_TYPE_BUFFER_VIEW)
1491 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_set_layout,
1492                                base,
1493                                VkDescriptorSetLayout,
1494                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
1495 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_set,
1496                                base,
1497                                VkDescriptorSet,
1498                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
1499 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
1500 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_pool,
1501                                base,
1502                                VkDescriptorPool,
1503                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
1504 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_sampler,
1505                                vk.base,
1506                                VkSampler,
1507                                VK_OBJECT_TYPE_SAMPLER)
1508 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline_layout,
1509                                base,
1510                                VkPipelineLayout,
1511                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
1512 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline,
1513                                base,
1514                                VkPipeline,
1515                                VK_OBJECT_TYPE_PIPELINE)
1516 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_query_pool,
1517                                base,
1518                                VkQueryPool,
1519                                VK_OBJECT_TYPE_QUERY_POOL)
1520 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_framebuffer,
1521                                base,
1522                                VkFramebuffer,
1523                                VK_OBJECT_TYPE_FRAMEBUFFER)
1524 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_render_pass,
1525                                base,
1526                                VkRenderPass,
1527                                VK_OBJECT_TYPE_RENDER_PASS)
1528 
1529 #define PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer)                  \
1530    do {                                                                      \
1531       struct pvr_cmd_buffer *const _cmd_buffer = (cmd_buffer);               \
1532       const VkResult _record_result =                                        \
1533          vk_command_buffer_get_record_result(&_cmd_buffer->vk);              \
1534                                                                              \
1535       if (_cmd_buffer->vk.state != MESA_VK_COMMAND_BUFFER_STATE_RECORDING) { \
1536          vk_errorf(_cmd_buffer,                                              \
1537                    VK_ERROR_OUT_OF_DEVICE_MEMORY,                            \
1538                    "Command buffer is not in recording state");              \
1539          return;                                                             \
1540       } else if (_record_result < VK_SUCCESS) {                              \
1541          vk_errorf(_cmd_buffer,                                              \
1542                    _record_result,                                           \
1543                    "Skipping function as command buffer has "                \
1544                    "previous build error");                                  \
1545          return;                                                             \
1546       }                                                                      \
1547    } while (0)
1548 
1549 /**
1550  * Print a FINISHME message, including its source location.
1551  */
1552 #define pvr_finishme(format, ...)              \
1553    do {                                        \
1554       static bool reported = false;            \
1555       if (!reported) {                         \
1556          mesa_logw("%s:%d: FINISHME: " format, \
1557                    __FILE__,                   \
1558                    __LINE__,                   \
1559                    ##__VA_ARGS__);             \
1560          reported = true;                      \
1561       }                                        \
1562    } while (false)
1563 
1564 #define PVR_WRITE(_buffer, _value, _offset, _max)                \
1565    do {                                                          \
1566       __typeof__(_value) __value = _value;                       \
1567       uint64_t __offset = _offset;                               \
1568       uint32_t __nr_dwords = sizeof(__value) / sizeof(uint32_t); \
1569       static_assert(__same_type(*_buffer, __value),              \
1570                     "Buffer and value type mismatch");           \
1571       assert((__offset + __nr_dwords) <= (_max));                \
1572       assert((__offset % __nr_dwords) == 0U);                    \
1573       _buffer[__offset / __nr_dwords] = __value;                 \
1574    } while (0)
1575 
1576 /* A non-fatal assert. Useful for debugging. */
1577 #if MESA_DEBUG
1578 #   define pvr_assert(x)                                           \
1579       ({                                                           \
1580          if (unlikely(!(x)))                                       \
1581             mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
1582       })
1583 #else
1584 #   define pvr_assert(x)
1585 #endif
1586 
1587 #endif /* PVR_PRIVATE_H */
1588