xref: /aosp_15_r20/external/mesa3d/src/amd/common/ac_binary.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright 2014 Advanced Micro Devices, Inc.
3*61046927SAndroid Build Coastguard Worker  *
4*61046927SAndroid Build Coastguard Worker  * SPDX-License-Identifier: MIT
5*61046927SAndroid Build Coastguard Worker  */
6*61046927SAndroid Build Coastguard Worker 
7*61046927SAndroid Build Coastguard Worker #include "ac_binary.h"
8*61046927SAndroid Build Coastguard Worker 
9*61046927SAndroid Build Coastguard Worker #include "ac_gpu_info.h"
10*61046927SAndroid Build Coastguard Worker #include "util/u_math.h"
11*61046927SAndroid Build Coastguard Worker #include "util/u_memory.h"
12*61046927SAndroid Build Coastguard Worker 
13*61046927SAndroid Build Coastguard Worker #include <sid.h>
14*61046927SAndroid Build Coastguard Worker #include <stdio.h>
15*61046927SAndroid Build Coastguard Worker 
16*61046927SAndroid Build Coastguard Worker #define SPILLED_SGPRS 0x4
17*61046927SAndroid Build Coastguard Worker #define SPILLED_VGPRS 0x8
18*61046927SAndroid Build Coastguard Worker 
19*61046927SAndroid Build Coastguard Worker /* Parse configuration data in .AMDGPU.config section format. */
ac_parse_shader_binary_config(const char * data,size_t nbytes,unsigned wave_size,const struct radeon_info * info,struct ac_shader_config * conf)20*61046927SAndroid Build Coastguard Worker void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wave_size,
21*61046927SAndroid Build Coastguard Worker                                    const struct radeon_info *info, struct ac_shader_config *conf)
22*61046927SAndroid Build Coastguard Worker {
23*61046927SAndroid Build Coastguard Worker    for (size_t i = 0; i < nbytes; i += 8) {
24*61046927SAndroid Build Coastguard Worker       unsigned reg = util_le32_to_cpu(*(uint32_t *)(data + i));
25*61046927SAndroid Build Coastguard Worker       unsigned value = util_le32_to_cpu(*(uint32_t *)(data + i + 4));
26*61046927SAndroid Build Coastguard Worker       switch (reg) {
27*61046927SAndroid Build Coastguard Worker       case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
28*61046927SAndroid Build Coastguard Worker       case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
29*61046927SAndroid Build Coastguard Worker       case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
30*61046927SAndroid Build Coastguard Worker       case R_00B848_COMPUTE_PGM_RSRC1:
31*61046927SAndroid Build Coastguard Worker       case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
32*61046927SAndroid Build Coastguard Worker          if (wave_size == 32 || info->wave64_vgpr_alloc_granularity == 8)
33*61046927SAndroid Build Coastguard Worker             conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 8);
34*61046927SAndroid Build Coastguard Worker          else
35*61046927SAndroid Build Coastguard Worker             conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
36*61046927SAndroid Build Coastguard Worker 
37*61046927SAndroid Build Coastguard Worker          conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
38*61046927SAndroid Build Coastguard Worker          /* TODO: LLVM doesn't set FLOAT_MODE for non-compute shaders */
39*61046927SAndroid Build Coastguard Worker          conf->float_mode = G_00B028_FLOAT_MODE(value);
40*61046927SAndroid Build Coastguard Worker          conf->rsrc1 = value;
41*61046927SAndroid Build Coastguard Worker          break;
42*61046927SAndroid Build Coastguard Worker       case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
43*61046927SAndroid Build Coastguard Worker          conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
44*61046927SAndroid Build Coastguard Worker          /* TODO: LLVM doesn't set SHARED_VGPR_CNT for all shader types */
45*61046927SAndroid Build Coastguard Worker          conf->num_shared_vgprs = G_00B02C_SHARED_VGPR_CNT(value);
46*61046927SAndroid Build Coastguard Worker          conf->rsrc2 = value;
47*61046927SAndroid Build Coastguard Worker          break;
48*61046927SAndroid Build Coastguard Worker       case R_00B12C_SPI_SHADER_PGM_RSRC2_VS:
49*61046927SAndroid Build Coastguard Worker          conf->num_shared_vgprs = G_00B12C_SHARED_VGPR_CNT(value);
50*61046927SAndroid Build Coastguard Worker          conf->rsrc2 = value;
51*61046927SAndroid Build Coastguard Worker          break;
52*61046927SAndroid Build Coastguard Worker       case R_00B22C_SPI_SHADER_PGM_RSRC2_GS:
53*61046927SAndroid Build Coastguard Worker          conf->num_shared_vgprs = G_00B22C_SHARED_VGPR_CNT(value);
54*61046927SAndroid Build Coastguard Worker          conf->rsrc2 = value;
55*61046927SAndroid Build Coastguard Worker          break;
56*61046927SAndroid Build Coastguard Worker       case R_00B42C_SPI_SHADER_PGM_RSRC2_HS:
57*61046927SAndroid Build Coastguard Worker          conf->num_shared_vgprs = G_00B42C_SHARED_VGPR_CNT(value);
58*61046927SAndroid Build Coastguard Worker          conf->rsrc2 = value;
59*61046927SAndroid Build Coastguard Worker          break;
60*61046927SAndroid Build Coastguard Worker       case R_00B84C_COMPUTE_PGM_RSRC2:
61*61046927SAndroid Build Coastguard Worker          conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
62*61046927SAndroid Build Coastguard Worker          conf->rsrc2 = value;
63*61046927SAndroid Build Coastguard Worker          break;
64*61046927SAndroid Build Coastguard Worker       case R_00B8A0_COMPUTE_PGM_RSRC3:
65*61046927SAndroid Build Coastguard Worker          conf->num_shared_vgprs = G_00B8A0_SHARED_VGPR_CNT(value);
66*61046927SAndroid Build Coastguard Worker          conf->rsrc3 = value;
67*61046927SAndroid Build Coastguard Worker          break;
68*61046927SAndroid Build Coastguard Worker       case R_02865C_SPI_PS_INPUT_ENA:
69*61046927SAndroid Build Coastguard Worker       case R_0286CC_SPI_PS_INPUT_ENA:
70*61046927SAndroid Build Coastguard Worker          conf->spi_ps_input_ena = value;
71*61046927SAndroid Build Coastguard Worker          break;
72*61046927SAndroid Build Coastguard Worker       case R_028660_SPI_PS_INPUT_ADDR:
73*61046927SAndroid Build Coastguard Worker       case R_0286D0_SPI_PS_INPUT_ADDR:
74*61046927SAndroid Build Coastguard Worker          conf->spi_ps_input_addr = value;
75*61046927SAndroid Build Coastguard Worker          break;
76*61046927SAndroid Build Coastguard Worker       case R_0286E8_SPI_TMPRING_SIZE:
77*61046927SAndroid Build Coastguard Worker       case R_00B860_COMPUTE_TMPRING_SIZE:
78*61046927SAndroid Build Coastguard Worker          if (info->gfx_level >= GFX11)
79*61046927SAndroid Build Coastguard Worker             conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(value) * 256;
80*61046927SAndroid Build Coastguard Worker          else
81*61046927SAndroid Build Coastguard Worker             conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(value) * 1024;
82*61046927SAndroid Build Coastguard Worker          break;
83*61046927SAndroid Build Coastguard Worker       case SPILLED_SGPRS:
84*61046927SAndroid Build Coastguard Worker          conf->spilled_sgprs = value;
85*61046927SAndroid Build Coastguard Worker          break;
86*61046927SAndroid Build Coastguard Worker       case SPILLED_VGPRS:
87*61046927SAndroid Build Coastguard Worker          conf->spilled_vgprs = value;
88*61046927SAndroid Build Coastguard Worker          break;
89*61046927SAndroid Build Coastguard Worker       default: {
90*61046927SAndroid Build Coastguard Worker          static bool printed;
91*61046927SAndroid Build Coastguard Worker 
92*61046927SAndroid Build Coastguard Worker          if (!printed) {
93*61046927SAndroid Build Coastguard Worker             fprintf(stderr,
94*61046927SAndroid Build Coastguard Worker                     "Warning: LLVM emitted unknown "
95*61046927SAndroid Build Coastguard Worker                     "config register: 0x%x\n",
96*61046927SAndroid Build Coastguard Worker                     reg);
97*61046927SAndroid Build Coastguard Worker             printed = true;
98*61046927SAndroid Build Coastguard Worker          }
99*61046927SAndroid Build Coastguard Worker       } break;
100*61046927SAndroid Build Coastguard Worker       }
101*61046927SAndroid Build Coastguard Worker    }
102*61046927SAndroid Build Coastguard Worker 
103*61046927SAndroid Build Coastguard Worker    if (!conf->spi_ps_input_addr)
104*61046927SAndroid Build Coastguard Worker       conf->spi_ps_input_addr = conf->spi_ps_input_ena;
105*61046927SAndroid Build Coastguard Worker 
106*61046927SAndroid Build Coastguard Worker    /* Enable 64-bit and 16-bit denormals, because there is no performance
107*61046927SAndroid Build Coastguard Worker     * cost.
108*61046927SAndroid Build Coastguard Worker     *
109*61046927SAndroid Build Coastguard Worker     * Don't enable denormals for 32-bit floats, because:
110*61046927SAndroid Build Coastguard Worker     * - denormals disable output modifiers
111*61046927SAndroid Build Coastguard Worker     * - denormals break v_mad_f32
112*61046927SAndroid Build Coastguard Worker     * - GFX6 & GFX7 would be very slow
113*61046927SAndroid Build Coastguard Worker     */
114*61046927SAndroid Build Coastguard Worker    conf->float_mode &= ~V_00B028_FP_32_DENORMS;
115*61046927SAndroid Build Coastguard Worker    conf->float_mode |= V_00B028_FP_16_64_DENORMS;
116*61046927SAndroid Build Coastguard Worker }
117*61046927SAndroid Build Coastguard Worker 
ac_align_shader_binary_for_prefetch(const struct radeon_info * info,unsigned size)118*61046927SAndroid Build Coastguard Worker unsigned ac_align_shader_binary_for_prefetch(const struct radeon_info *info, unsigned size)
119*61046927SAndroid Build Coastguard Worker {
120*61046927SAndroid Build Coastguard Worker    /* The SQ fetches up to N cache lines of 16 dwords
121*61046927SAndroid Build Coastguard Worker     * ahead of the PC, configurable by SH_MEM_CONFIG and
122*61046927SAndroid Build Coastguard Worker     * S_INST_PREFETCH. This can cause two issues:
123*61046927SAndroid Build Coastguard Worker     *
124*61046927SAndroid Build Coastguard Worker     * (1) Crossing a page boundary to an unmapped page. The logic
125*61046927SAndroid Build Coastguard Worker     *     does not distinguish between a required fetch and a "mere"
126*61046927SAndroid Build Coastguard Worker     *     prefetch and will fault.
127*61046927SAndroid Build Coastguard Worker     *
128*61046927SAndroid Build Coastguard Worker     * (2) Prefetching instructions that will be changed for a
129*61046927SAndroid Build Coastguard Worker     *     different shader.
130*61046927SAndroid Build Coastguard Worker     *
131*61046927SAndroid Build Coastguard Worker     * (2) is not currently an issue because we flush the I$ at IB
132*61046927SAndroid Build Coastguard Worker     * boundaries, but (1) needs to be addressed. Due to buffer
133*61046927SAndroid Build Coastguard Worker     * suballocation, we just play it safe.
134*61046927SAndroid Build Coastguard Worker     */
135*61046927SAndroid Build Coastguard Worker    unsigned prefetch_distance = 0;
136*61046927SAndroid Build Coastguard Worker 
137*61046927SAndroid Build Coastguard Worker    if (!info->has_graphics && info->family >= CHIP_MI200)
138*61046927SAndroid Build Coastguard Worker       prefetch_distance = 16;
139*61046927SAndroid Build Coastguard Worker    else if (info->gfx_level >= GFX10)
140*61046927SAndroid Build Coastguard Worker       prefetch_distance = 3;
141*61046927SAndroid Build Coastguard Worker 
142*61046927SAndroid Build Coastguard Worker    if (prefetch_distance) {
143*61046927SAndroid Build Coastguard Worker       if (info->gfx_level >= GFX11)
144*61046927SAndroid Build Coastguard Worker          size = align(size + prefetch_distance * 64, 128);
145*61046927SAndroid Build Coastguard Worker       else
146*61046927SAndroid Build Coastguard Worker          size = align(size + prefetch_distance * 64, 64);
147*61046927SAndroid Build Coastguard Worker    }
148*61046927SAndroid Build Coastguard Worker 
149*61046927SAndroid Build Coastguard Worker    return size;
150*61046927SAndroid Build Coastguard Worker }
151