xref: /aosp_15_r20/external/mesa3d/src/freedreno/vulkan/tu_util.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2020 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Jonathan Marek <[email protected]>
7  */
8 
9 #ifndef TU_UTIL_H
10 #define TU_UTIL_H
11 
12 #include "tu_common.h"
13 
14 #include "util/macros.h"
15 #include "util/u_math.h"
16 #include "util/format/u_format_pack.h"
17 #include "util/format/u_format_zs.h"
18 #include "compiler/shader_enums.h"
19 
20 #include "vk_util.h"
21 
22 #define TU_DEBUG(name) unlikely(tu_env.debug & TU_DEBUG_##name)
23 
24 enum tu_debug_flags
25 {
26    TU_DEBUG_STARTUP = 1 << 0,
27    TU_DEBUG_NIR = 1 << 1,
28    TU_DEBUG_NOBIN = 1 << 3,
29    TU_DEBUG_SYSMEM = 1 << 4,
30    TU_DEBUG_FORCEBIN = 1 << 5,
31    TU_DEBUG_NOUBWC = 1 << 6,
32    TU_DEBUG_NOMULTIPOS = 1 << 7,
33    TU_DEBUG_NOLRZ = 1 << 8,
34    TU_DEBUG_PERFC = 1 << 9,
35    TU_DEBUG_FLUSHALL = 1 << 10,
36    TU_DEBUG_SYNCDRAW = 1 << 11,
37    TU_DEBUG_PUSH_CONSTS_PER_STAGE = 1 << 12,
38    TU_DEBUG_GMEM = 1 << 13,
39    TU_DEBUG_RAST_ORDER = 1 << 14,
40    TU_DEBUG_UNALIGNED_STORE = 1 << 15,
41    TU_DEBUG_LAYOUT = 1 << 16,
42    TU_DEBUG_LOG_SKIP_GMEM_OPS = 1 << 17,
43    TU_DEBUG_PERF = 1 << 18,
44    TU_DEBUG_NOLRZFC = 1 << 19,
45    TU_DEBUG_DYNAMIC = 1 << 20,
46    TU_DEBUG_BOS = 1 << 21,
47    TU_DEBUG_3D_LOAD = 1 << 22,
48    TU_DEBUG_FDM = 1 << 23,
49    TU_DEBUG_NOCONFORM = 1 << 24,
50    TU_DEBUG_RD = 1 << 25,
51 };
52 
53 struct tu_env {
54     uint32_t debug;
55 };
56 
57 extern struct tu_env tu_env;
58 
59 void
60 tu_env_init(void);
61 
62 /* Whenever we generate an error, pass it through this function. Useful for
63  * debugging, where we can break on it. Only call at error site, not when
64  * propagating errors. Might be useful to plug in a stack trace here.
65  */
66 
67 VkResult
68 __vk_startup_errorf(struct tu_instance *instance,
69                     VkResult error,
70                     const char *file,
71                     int line,
72                     const char *format,
73                     ...) PRINTFLIKE(5, 6);
74 
75 /* Prints startup errors if TU_DEBUG=startup is set or on a debug driver
76  * build.
77  */
78 #define vk_startup_errorf(instance, error, format, ...) \
79    __vk_startup_errorf(instance, error, \
80                        __FILE__, __LINE__, format, ##__VA_ARGS__)
81 
82 void
83 __tu_finishme(const char *file, int line, const char *format, ...)
84    PRINTFLIKE(3, 4);
85 
86 /**
87  * Print a FINISHME message, including its source location.
88  */
89 #define tu_finishme(format, ...)                                             \
90    do {                                                                      \
91       static bool reported = false;                                          \
92       if (!reported) {                                                       \
93          __tu_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__);           \
94          reported = true;                                                    \
95       }                                                                      \
96    } while (0)
97 
98 #define tu_stub()                                                            \
99    do {                                                                      \
100       tu_finishme("stub %s", __func__);                                      \
101    } while (0)
102 
103 void
104 tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
105                              const struct tu_device *device,
106                              const struct tu_render_pass *pass);
107 
108 #define TU_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1)
109 
110 #define tu_foreach_stage(stage, stage_bits)                                  \
111    for (gl_shader_stage stage,                                               \
112         __tmp = (gl_shader_stage) ((stage_bits) &TU_STAGE_MASK);             \
113         stage = (gl_shader_stage) (__builtin_ffs(__tmp) - 1), __tmp;         \
114         __tmp = (gl_shader_stage) (__tmp & ~(1 << (stage))))
115 
116 static inline enum a3xx_msaa_samples
tu_msaa_samples(uint32_t samples)117 tu_msaa_samples(uint32_t samples)
118 {
119    assert(__builtin_popcount(samples) == 1);
120    return (enum a3xx_msaa_samples) util_logbase2(samples);
121 }
122 
123 static inline uint32_t
tu6_stage2opcode(gl_shader_stage stage)124 tu6_stage2opcode(gl_shader_stage stage)
125 {
126    if (stage == MESA_SHADER_FRAGMENT || stage == MESA_SHADER_COMPUTE)
127       return CP_LOAD_STATE6_FRAG;
128    return CP_LOAD_STATE6_GEOM;
129 }
130 
131 static inline enum a6xx_state_block
tu6_stage2texsb(gl_shader_stage stage)132 tu6_stage2texsb(gl_shader_stage stage)
133 {
134    return (enum a6xx_state_block) (SB6_VS_TEX + stage);
135 }
136 
137 static inline enum a6xx_state_block
tu6_stage2shadersb(gl_shader_stage stage)138 tu6_stage2shadersb(gl_shader_stage stage)
139 {
140    return (enum a6xx_state_block) (SB6_VS_SHADER + stage);
141 }
142 
143 static inline enum a3xx_rop_code
tu6_rop(VkLogicOp op)144 tu6_rop(VkLogicOp op)
145 {
146    /* note: hw enum matches the VK enum, but with the 4 bits reversed */
147    static const enum a3xx_rop_code lookup[] = {
148       [VK_LOGIC_OP_CLEAR]           = ROP_CLEAR,
149       [VK_LOGIC_OP_AND]             = ROP_AND,
150       [VK_LOGIC_OP_AND_REVERSE]     = ROP_AND_REVERSE,
151       [VK_LOGIC_OP_COPY]            = ROP_COPY,
152       [VK_LOGIC_OP_AND_INVERTED]    = ROP_AND_INVERTED,
153       [VK_LOGIC_OP_NO_OP]           = ROP_NOOP,
154       [VK_LOGIC_OP_XOR]             = ROP_XOR,
155       [VK_LOGIC_OP_OR]              = ROP_OR,
156       [VK_LOGIC_OP_NOR]             = ROP_NOR,
157       [VK_LOGIC_OP_EQUIVALENT]      = ROP_EQUIV,
158       [VK_LOGIC_OP_INVERT]          = ROP_INVERT,
159       [VK_LOGIC_OP_OR_REVERSE]      = ROP_OR_REVERSE,
160       [VK_LOGIC_OP_COPY_INVERTED]   = ROP_COPY_INVERTED,
161       [VK_LOGIC_OP_OR_INVERTED]     = ROP_OR_INVERTED,
162       [VK_LOGIC_OP_NAND]            = ROP_NAND,
163       [VK_LOGIC_OP_SET]             = ROP_SET,
164    };
165    assert(op < ARRAY_SIZE(lookup));
166    return lookup[op];
167 }
168 
169 static inline bool
tu6_primtype_line(enum pc_di_primtype type)170 tu6_primtype_line(enum pc_di_primtype type)
171 {
172     switch(type) {
173     case DI_PT_LINELIST:
174     case DI_PT_LINESTRIP:
175     case DI_PT_LINE_ADJ:
176     case DI_PT_LINESTRIP_ADJ:
177        return true;
178     default:
179        return false;
180     }
181 }
182 
183 static inline bool
tu6_primtype_patches(enum pc_di_primtype type)184 tu6_primtype_patches(enum pc_di_primtype type)
185 {
186    return type >= DI_PT_PATCHES0 && type <= DI_PT_PATCHES31;
187 }
188 
189 static inline enum pc_di_primtype
tu6_primtype(VkPrimitiveTopology topology)190 tu6_primtype(VkPrimitiveTopology topology)
191 {
192    static const enum pc_di_primtype lookup[] = {
193       [VK_PRIMITIVE_TOPOLOGY_POINT_LIST]                    = DI_PT_POINTLIST,
194       [VK_PRIMITIVE_TOPOLOGY_LINE_LIST]                     = DI_PT_LINELIST,
195       [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP]                    = DI_PT_LINESTRIP,
196       [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST]                 = DI_PT_TRILIST,
197       [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP]                = DI_PT_TRISTRIP,
198       [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN]                  = DI_PT_TRIFAN,
199       [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY]      = DI_PT_LINE_ADJ,
200       [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY]     = DI_PT_LINESTRIP_ADJ,
201       [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY]  = DI_PT_TRI_ADJ,
202       [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = DI_PT_TRISTRIP_ADJ,
203       /* Return PATCH0 and update in tu_pipeline_builder_parse_tessellation */
204       [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST]                    = DI_PT_PATCHES0,
205    };
206    assert(topology < ARRAY_SIZE(lookup));
207    return lookup[topology];
208 }
209 
210 static inline enum adreno_compare_func
tu6_compare_func(VkCompareOp op)211 tu6_compare_func(VkCompareOp op)
212 {
213    return (enum adreno_compare_func) op;
214 }
215 
216 static inline enum adreno_stencil_op
tu6_stencil_op(VkStencilOp op)217 tu6_stencil_op(VkStencilOp op)
218 {
219    return (enum adreno_stencil_op) op;
220 }
221 
222 static inline enum adreno_rb_blend_factor
tu6_blend_factor(VkBlendFactor factor)223 tu6_blend_factor(VkBlendFactor factor)
224 {
225    static const enum adreno_rb_blend_factor lookup[] = {
226       [VK_BLEND_FACTOR_ZERO]                    = FACTOR_ZERO,
227       [VK_BLEND_FACTOR_ONE]                     = FACTOR_ONE,
228       [VK_BLEND_FACTOR_SRC_COLOR]               = FACTOR_SRC_COLOR,
229       [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR]     = FACTOR_ONE_MINUS_SRC_COLOR,
230       [VK_BLEND_FACTOR_DST_COLOR]               = FACTOR_DST_COLOR,
231       [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR]     = FACTOR_ONE_MINUS_DST_COLOR,
232       [VK_BLEND_FACTOR_SRC_ALPHA]               = FACTOR_SRC_ALPHA,
233       [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA]     = FACTOR_ONE_MINUS_SRC_ALPHA,
234       [VK_BLEND_FACTOR_DST_ALPHA]               = FACTOR_DST_ALPHA,
235       [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA]     = FACTOR_ONE_MINUS_DST_ALPHA,
236       [VK_BLEND_FACTOR_CONSTANT_COLOR]          = FACTOR_CONSTANT_COLOR,
237       [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= FACTOR_ONE_MINUS_CONSTANT_COLOR,
238       [VK_BLEND_FACTOR_CONSTANT_ALPHA]          = FACTOR_CONSTANT_ALPHA,
239       [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= FACTOR_ONE_MINUS_CONSTANT_ALPHA,
240       [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE]      = FACTOR_SRC_ALPHA_SATURATE,
241       [VK_BLEND_FACTOR_SRC1_COLOR]              = FACTOR_SRC1_COLOR,
242       [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR]    = FACTOR_ONE_MINUS_SRC1_COLOR,
243       [VK_BLEND_FACTOR_SRC1_ALPHA]              = FACTOR_SRC1_ALPHA,
244       [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA]    = FACTOR_ONE_MINUS_SRC1_ALPHA,
245    };
246    assert(factor < ARRAY_SIZE(lookup));
247    return lookup[factor];
248 }
249 
250 static inline bool
tu_blend_factor_is_dual_src(VkBlendFactor factor)251 tu_blend_factor_is_dual_src(VkBlendFactor factor)
252 {
253    switch (factor) {
254    case VK_BLEND_FACTOR_SRC1_COLOR:
255    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
256    case VK_BLEND_FACTOR_SRC1_ALPHA:
257    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
258       return true;
259    default:
260       return false;
261    }
262 }
263 
264 static inline enum a3xx_rb_blend_opcode
tu6_blend_op(VkBlendOp op)265 tu6_blend_op(VkBlendOp op)
266 {
267    return (enum a3xx_rb_blend_opcode) op;
268 }
269 
270 static inline enum a6xx_tex_type
tu6_tex_type(VkImageViewType type,bool storage)271 tu6_tex_type(VkImageViewType type, bool storage)
272 {
273    switch (type) {
274    default:
275    case VK_IMAGE_VIEW_TYPE_1D:
276    case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
277       return A6XX_TEX_1D;
278    case VK_IMAGE_VIEW_TYPE_2D:
279    case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
280       return A6XX_TEX_2D;
281    case VK_IMAGE_VIEW_TYPE_3D:
282       return A6XX_TEX_3D;
283    case VK_IMAGE_VIEW_TYPE_CUBE:
284    case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
285       return storage ? A6XX_TEX_2D : A6XX_TEX_CUBE;
286    }
287 }
288 
289 static inline enum a6xx_tex_clamp
tu6_tex_wrap(VkSamplerAddressMode address_mode)290 tu6_tex_wrap(VkSamplerAddressMode address_mode)
291 {
292    static const enum a6xx_tex_clamp lookup[] = {
293       [VK_SAMPLER_ADDRESS_MODE_REPEAT]                = A6XX_TEX_REPEAT,
294       [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT]       = A6XX_TEX_MIRROR_REPEAT,
295       [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE]         = A6XX_TEX_CLAMP_TO_EDGE,
296       [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER]       = A6XX_TEX_CLAMP_TO_BORDER,
297       [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE]  = A6XX_TEX_MIRROR_CLAMP,
298    };
299    assert(address_mode < ARRAY_SIZE(lookup));
300    return lookup[address_mode];
301 }
302 
303 static inline enum a6xx_tex_filter
tu6_tex_filter(VkFilter filter,unsigned aniso)304 tu6_tex_filter(VkFilter filter, unsigned aniso)
305 {
306    switch (filter) {
307    case VK_FILTER_NEAREST:
308       return A6XX_TEX_NEAREST;
309    case VK_FILTER_LINEAR:
310       return aniso ? A6XX_TEX_ANISO : A6XX_TEX_LINEAR;
311    case VK_FILTER_CUBIC_EXT:
312       return A6XX_TEX_CUBIC;
313    default:
314       unreachable("illegal texture filter");
315       break;
316    }
317 }
318 
319 static inline enum a6xx_reduction_mode
tu6_reduction_mode(VkSamplerReductionMode reduction_mode)320 tu6_reduction_mode(VkSamplerReductionMode reduction_mode)
321 {
322    return (enum a6xx_reduction_mode) reduction_mode;
323 }
324 
325 static inline enum a6xx_depth_format
tu6_pipe2depth(VkFormat format)326 tu6_pipe2depth(VkFormat format)
327 {
328    switch (format) {
329    case VK_FORMAT_D16_UNORM:
330       return DEPTH6_16;
331    case VK_FORMAT_X8_D24_UNORM_PACK32:
332    case VK_FORMAT_D24_UNORM_S8_UINT:
333       return DEPTH6_24_8;
334    case VK_FORMAT_D32_SFLOAT:
335    case VK_FORMAT_D32_SFLOAT_S8_UINT:
336    case VK_FORMAT_S8_UINT:
337       return DEPTH6_32;
338    default:
339       return DEPTH6_NONE;
340    }
341 }
342 
343 static inline enum a6xx_polygon_mode
tu6_polygon_mode(VkPolygonMode mode)344 tu6_polygon_mode(VkPolygonMode mode)
345 {
346    switch (mode) {
347    case VK_POLYGON_MODE_POINT:
348       return POLYMODE6_POINTS;
349    case VK_POLYGON_MODE_LINE:
350       return POLYMODE6_LINES;
351    case VK_POLYGON_MODE_FILL:
352       return POLYMODE6_TRIANGLES;
353    default:
354       unreachable("bad polygon mode");
355    }
356 }
357 
358 struct bcolor_entry {
359    alignas(128) uint32_t fp32[4];
360    uint64_t ui16;
361    uint64_t si16;
362    uint64_t fp16;
363    uint16_t rgb565;
364    uint16_t rgb5a1;
365    uint16_t rgba4;
366    uint8_t __pad0[2];
367    uint32_t ui8;
368    uint32_t si8;
369    uint32_t rgb10a2;
370    uint32_t z24; /* also s8? */
371    uint64_t srgb;
372    uint8_t  __pad1[56];
373 };
374 static_assert(alignof(struct bcolor_entry) == 128, "");
375 
376 /* vulkan does not want clamping of integer clear values, differs from u_format
377  * see spec for VkClearColorValue
378  */
379 static inline void
pack_int8(uint32_t * dst,const uint32_t * val)380 pack_int8(uint32_t *dst, const uint32_t *val)
381 {
382    *dst = (val[0] & 0xff) |
383           (val[1] & 0xff) << 8 |
384           (val[2] & 0xff) << 16 |
385           (val[3] & 0xff) << 24;
386 }
387 
388 static inline void
pack_int10_2(uint32_t * dst,const uint32_t * val)389 pack_int10_2(uint32_t *dst, const uint32_t *val)
390 {
391    *dst = (val[0] & 0x3ff) |
392           (val[1] & 0x3ff) << 10 |
393           (val[2] & 0x3ff) << 20 |
394           (val[3] & 0x3)   << 30;
395 }
396 
397 static inline void
pack_int16(uint32_t * dst,const uint32_t * val)398 pack_int16(uint32_t *dst, const uint32_t *val)
399 {
400    dst[0] = (val[0] & 0xffff) |
401             (val[1] & 0xffff) << 16;
402    dst[1] = (val[2] & 0xffff) |
403             (val[3] & 0xffff) << 16;
404 }
405 
406 static inline void
tu6_pack_border_color(struct bcolor_entry * bcolor,const VkClearColorValue * val,bool is_int)407 tu6_pack_border_color(struct bcolor_entry *bcolor, const VkClearColorValue *val, bool is_int)
408 {
409    memcpy(bcolor->fp32, val, 4 * sizeof(float));
410    if (is_int) {
411       pack_int16((uint32_t*) &bcolor->fp16, val->uint32);
412       return;
413    }
414 #define PACK_F(x, type) util_format_##type##_pack_rgba_float \
415    ( (uint8_t*) (&bcolor->x), 0, val->float32, 0, 1, 1)
416    PACK_F(ui16, r16g16b16a16_unorm);
417    PACK_F(si16, r16g16b16a16_snorm);
418    PACK_F(fp16, r16g16b16a16_float);
419    PACK_F(rgb565, r5g6b5_unorm);
420    PACK_F(rgb5a1, r5g5b5a1_unorm);
421    PACK_F(rgba4, r4g4b4a4_unorm);
422    PACK_F(ui8, r8g8b8a8_unorm);
423    PACK_F(si8, r8g8b8a8_snorm);
424    PACK_F(rgb10a2, r10g10b10a2_unorm);
425    util_format_z24x8_unorm_pack_z_float((uint8_t*) &bcolor->z24,
426                                         0, val->float32, 0, 1, 1);
427    PACK_F(srgb, r16g16b16a16_float); /* TODO: clamp? */
428 #undef PACK_F
429 }
430 
431 void
432 tu_dbg_log_gmem_load_store_skips(struct tu_device *device);
433 
434 #define perf_debug(device, fmt, ...) do {                               \
435    if (TU_DEBUG(PERF))                                                  \
436       mesa_log(MESA_LOG_WARN, (MESA_LOG_TAG), (fmt), ##__VA_ARGS__);    \
437 } while(0)
438 
439 #define sizeof_field(s, field) sizeof(((s *) NULL)->field)
440 
441 #define offsetof_arr(s, field, idx)                                          \
442    (offsetof(s, field) + sizeof_field(s, field[0]) * (idx))
443 
444 #endif /* TU_UTIL_H */
445