xref: /aosp_15_r20/external/mesa3d/src/amd/common/ac_nir_meta.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2024 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #ifndef AC_NIR_META_H
8 #define AC_NIR_META_H
9 
10 #include "ac_gpu_info.h"
11 #include "nir.h"
12 #include "util/box.h"
13 
14 union ac_ps_resolve_key {
15    struct {
16       bool use_aco:1;
17       bool src_is_array:1;
18       uint8_t log_samples:2;
19       uint8_t last_src_channel:2; /* this shouldn't be greater than last_dst_channel */
20       uint8_t last_dst_channel:2;
21       bool x_clamp_to_edge:1;
22       bool y_clamp_to_edge:1;
23       bool a16:1;
24       bool d16:1;
25    };
26    uint64_t key; /* use with hash_table_u64 */
27 };
28 
29 /* Only immutable settings. */
30 struct ac_ps_resolve_options {
31    const nir_shader_compiler_options *nir_options;
32    const struct radeon_info *info;
33    bool use_aco;     /* global driver setting */
34    bool no_fmask;    /* FMASK disabled by a debug option, ignored on GFX11+ */
35    bool print_key;   /* print ac_ps_resolve_key into stderr */
36 };
37 
38 nir_shader *
39 ac_create_resolve_ps(const struct ac_ps_resolve_options *options,
40                       const union ac_ps_resolve_key *key);
41 
42 /* Universal optimized compute shader for image blits and clears. */
43 #define SI_MAX_COMPUTE_BLIT_LANE_SIZE  16
44 #define SI_MAX_COMPUTE_BLIT_SAMPLES    8
45 
46 /* This describes all possible variants of the compute blit shader. */
47 union ac_cs_blit_key {
48    struct {
49       bool use_aco:1;
50       /* Workgroup settings. */
51       uint8_t wg_dim:2; /* 1, 2, or 3 */
52       bool has_start_xyz:1;
53       /* The size of a block of pixels that a single thread will process. */
54       uint8_t log_lane_width:3;
55       uint8_t log_lane_height:2;
56       uint8_t log_lane_depth:2;
57       /* Declaration modifiers. */
58       bool is_clear:1;
59       bool src_is_1d:1;
60       bool dst_is_1d:1;
61       bool src_is_msaa:1;
62       bool dst_is_msaa:1;
63       bool src_has_z:1;
64       bool dst_has_z:1;
65       bool a16:1;
66       bool d16:1;
67       uint8_t log_samples:2;
68       bool sample0_only:1; /* src is MSAA, dst is not MSAA, log2_samples is ignored */
69       /* Source coordinate modifiers. */
70       bool x_clamp_to_edge:1;
71       bool y_clamp_to_edge:1;
72       bool flip_x:1;
73       bool flip_y:1;
74       /* Output modifiers. */
75       bool sint_to_uint:1;
76       bool uint_to_sint:1;
77       bool dst_is_srgb:1;
78       bool use_integer_one:1;
79       uint8_t last_src_channel:2; /* this shouldn't be greater than last_dst_channel */
80       uint8_t last_dst_channel:2;
81    };
82    uint64_t key;
83 };
84 
85 struct ac_cs_blit_options {
86    /* Global options. */
87    const nir_shader_compiler_options *nir_options;
88    const struct radeon_info *info;
89    bool use_aco;        /* global driver setting */
90    bool no_fmask;       /* FMASK disabled by a global debug option, ignored on GFX11+ */
91    bool print_key;      /* print ac_ps_resolve_key into stderr */
92    bool fail_if_slow;   /* fail if a gfx blit is faster, set to false on compute queues */
93 
94    bool is_nested;      /* for internal use, don't set */
95 };
96 
97 struct ac_cs_blit_description
98 {
99    struct {
100       struct radeon_surf *surf;
101       uint8_t dim;            /* 1 = 1D texture, 2 = 2D texture, 3 = 3D texture */
102       bool is_array;          /* array or cube texture */
103       unsigned width0;        /* level 0 width */
104       unsigned height0;       /* level 0 height */
105       uint8_t num_samples;
106       uint8_t level;
107       struct pipe_box box;       /* negative width, height only legal for src */
108       enum pipe_format format;   /* format reinterpretation */
109    } dst, src;
110 
111    bool is_gfx_queue;
112    bool dst_has_dcc;
113    bool sample0_only;                  /* copy sample 0 instead of resolving */
114    union pipe_color_union clear_color; /* if src.surf == NULL, this is the clear color */
115 };
116 
117 /* Dispatch parameters generated by the blit. */
118 struct ac_cs_blit_dispatch {
119    union ac_cs_blit_key shader_key;
120    uint32_t user_data[8];        /* for nir_intrinsic_load_user_data_amd */
121 
122    unsigned wg_size[3];          /* variable workgroup size (NUM_THREAD_FULL) */
123    unsigned last_wg_size[3];     /* workgroup size of the last workgroup (NUM_THREAD_PARTIAL) */
124    unsigned num_workgroups[3];   /* DISPATCH_DIRECT parameters */
125 };
126 
127 struct ac_cs_blit_dispatches {
128    unsigned num_dispatches;
129    struct ac_cs_blit_dispatch dispatches[7];
130 };
131 
132 nir_shader *
133 ac_create_blit_cs(const struct ac_cs_blit_options *options, const union ac_cs_blit_key *key);
134 
135 bool
136 ac_prepare_compute_blit(const struct ac_cs_blit_options *options,
137                         const struct ac_cs_blit_description *blit,
138                         struct ac_cs_blit_dispatches *dispatches);
139 
140 /* clear_buffer/copy_buffer compute shader. */
141 union ac_cs_clear_copy_buffer_key {
142    struct {
143       bool is_clear:1;
144       unsigned dwords_per_thread:3; /* 1..4 allowed */
145       bool clear_value_size_is_12:1;
146       bool src_is_sparse:1;
147       /* Unaligned clears and copies. */
148       unsigned src_align_offset:2; /* how much is the source address unaligned */
149       unsigned dst_align_offset:4; /* the first thread shouldn't write this many bytes */
150       unsigned dst_last_thread_bytes:4; /* if non-zero, the last thread should write this many bytes */
151       bool dst_single_thread_unaligned:1; /* only 1 thread executes, both previous fields apply */
152       bool has_start_thread:1; /* whether the first few threads should be skipped, making later
153                                   waves start on a 256B boundary */
154    };
155    uint64_t key;
156 };
157 
158 struct ac_cs_clear_copy_buffer_options {
159    const nir_shader_compiler_options *nir_options;
160    const struct radeon_info *info;
161    bool print_key;      /* print the shader key into stderr */
162    bool fail_if_slow;   /* fail if a gfx blit is faster, set to false on compute queues */
163 };
164 
165 struct ac_cs_clear_copy_buffer_info {
166    unsigned dst_offset;
167    unsigned src_offset;
168    unsigned size;
169    unsigned clear_value_size;
170    uint32_t clear_value[4];
171    unsigned dwords_per_thread;   /* Set to 0 to let the code choose the optimal value. */
172    bool render_condition_enabled;
173    bool dst_is_vram;
174    bool src_is_vram;
175    bool src_is_sparse;
176 };
177 
178 struct ac_cs_clear_copy_buffer_dispatch {
179    union ac_cs_clear_copy_buffer_key shader_key;
180    uint32_t user_data[6];        /* for nir_intrinsic_load_user_data_amd */
181    unsigned num_ssbos;
182    unsigned workgroup_size;
183    unsigned num_threads;
184 
185    struct {
186       unsigned offset;
187       unsigned size;
188    } ssbo[2];
189 };
190 
191 nir_shader *
192 ac_create_clear_copy_buffer_cs(struct ac_cs_clear_copy_buffer_options *options,
193                                union ac_cs_clear_copy_buffer_key *key);
194 
195 bool
196 ac_prepare_cs_clear_copy_buffer(const struct ac_cs_clear_copy_buffer_options *options,
197                                 const struct ac_cs_clear_copy_buffer_info *info,
198                                 struct ac_cs_clear_copy_buffer_dispatch *out);
199 
200 #endif
201