xref: /aosp_15_r20/external/mesa3d/src/asahi/lib/agx_tilebuffer.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2022 Alyssa Rosenzweig
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "agx_tilebuffer.h"
7 #include <assert.h>
8 #include "util/bitscan.h"
9 #include "util/format/u_format.h"
10 #include "agx_usc.h"
11 #include "layout.h"
12 
13 /* Maximum number of bytes per tile on G13G. This may change in future versions
14  * of the architecture.
15  */
16 #define MAX_BYTES_PER_TILE (32768 - 1)
17 
18 /* Maximum bytes per sample in the tilebuffer. Greater allocations require
19  * spilling render targets to memory.
20  */
21 #define MAX_BYTES_PER_SAMPLE (64)
22 
23 /* Minimum tile size in pixels, architectural. */
24 #define MIN_TILE_SIZE_PX (16 * 16)
25 
26 /* Select the largest tile size that fits */
27 static struct agx_tile_size
agx_select_tile_size(unsigned bytes_per_pixel)28 agx_select_tile_size(unsigned bytes_per_pixel)
29 {
30    /* clang-format off */
31    struct agx_tile_size sizes[] = {
32       { 32, 32 },
33       { 32, 16 },
34       { 16, 16 }
35    };
36    /* clang-format on */
37 
38    for (unsigned i = 0; i < ARRAY_SIZE(sizes); ++i) {
39       struct agx_tile_size size = sizes[i];
40 
41       if ((bytes_per_pixel * size.width * size.height) <= MAX_BYTES_PER_TILE)
42          return size;
43    }
44 
45    unreachable("No supported tile size meets the bytes per pixel requirement");
46 }
47 
48 static unsigned
agx_shared_layout_from_tile_size(struct agx_tile_size t)49 agx_shared_layout_from_tile_size(struct agx_tile_size t)
50 {
51    if (t.width == 32 && t.height == 32)
52       return AGX_SHARED_LAYOUT_32X32;
53    else if (t.width == 32 && t.height == 16)
54       return AGX_SHARED_LAYOUT_32X16;
55    else if (t.width == 16 && t.height == 16)
56       return AGX_SHARED_LAYOUT_16X16;
57    else
58       unreachable("Invalid tile size");
59 }
60 
61 struct agx_tilebuffer_layout
agx_build_tilebuffer_layout(const enum pipe_format * formats,uint8_t nr_cbufs,uint8_t nr_samples,bool layered)62 agx_build_tilebuffer_layout(const enum pipe_format *formats, uint8_t nr_cbufs,
63                             uint8_t nr_samples, bool layered)
64 {
65    struct agx_tilebuffer_layout tib = {
66       .nr_samples = nr_samples,
67       .layered = layered,
68    };
69 
70    uint32_t offset_B = 0;
71 
72    for (unsigned rt = 0; rt < nr_cbufs; ++rt) {
73       tib.logical_format[rt] = formats[rt];
74 
75       /* If there are gaps in the layout, don't allocate holes. Obscure,
76        * PIPE_FORMAT_NONE has a size of 1, not 0.
77        */
78       if (formats[rt] == PIPE_FORMAT_NONE)
79          continue;
80 
81       /* Require natural alignment for tilebuffer allocations. This could be
82        * optimized, but this shouldn't be a problem in practice.
83        */
84       enum pipe_format physical_fmt = agx_tilebuffer_physical_format(&tib, rt);
85       unsigned align_B = util_format_get_blocksize(physical_fmt);
86       assert(util_is_power_of_two_nonzero(align_B) &&
87              util_is_power_of_two_nonzero(MAX_BYTES_PER_SAMPLE) &&
88              align_B < MAX_BYTES_PER_SAMPLE &&
89              "max bytes per sample divisible by alignment");
90 
91       offset_B = ALIGN_POT(offset_B, align_B);
92       assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant + above");
93 
94       /* Determine the size, if we were to allocate this render target to the
95        * tilebuffer as desired.
96        */
97       unsigned nr = util_format_get_nr_components(physical_fmt) == 1
98                        ? util_format_get_nr_components(formats[rt])
99                        : 1;
100 
101       unsigned size_B = align_B * nr;
102       unsigned new_offset_B = offset_B + size_B;
103 
104       /* If allocating this render target would exceed any tilebuffer limits, we
105        * need to spill it to memory. We continue processing in case there are
106        * smaller render targets after that would still fit. Otherwise, we
107        * allocate it to the tilebuffer.
108        *
109        * TODO: Suboptimal, we might be able to reorder render targets to
110        * avoid fragmentation causing spilling.
111        */
112       bool fits = (new_offset_B <= MAX_BYTES_PER_SAMPLE) &&
113                   (ALIGN_POT(new_offset_B, 8) * MIN_TILE_SIZE_PX *
114                    nr_samples) <= MAX_BYTES_PER_TILE;
115 
116       if (fits) {
117          tib._offset_B[rt] = offset_B;
118          offset_B = new_offset_B;
119       } else {
120          tib.spilled[rt] = true;
121       }
122    }
123 
124    assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant");
125 
126    /* Multisampling needs a nonempty allocation.
127     * XXX: Check this against hw
128     */
129    if (nr_samples > 1)
130       offset_B = MAX2(offset_B, 1);
131 
132    tib.sample_size_B = ALIGN_POT(offset_B, 8);
133 
134    tib.tile_size = agx_select_tile_size(tib.sample_size_B * nr_samples);
135 
136    agx_tilebuffer_pack_usc(&tib);
137    return tib;
138 }
139 
140 enum pipe_format
agx_tilebuffer_physical_format(struct agx_tilebuffer_layout * tib,unsigned rt)141 agx_tilebuffer_physical_format(struct agx_tilebuffer_layout *tib, unsigned rt)
142 {
143    return ail_pixel_format[tib->logical_format[rt]].renderable;
144 }
145 
146 bool
agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout * tib,unsigned rt)147 agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout *tib, unsigned rt)
148 {
149    /* We don't bother support masking with spilled render targets. This might be
150     * optimized in the future but spilling is so rare anyway it's not worth it.
151     */
152    if (tib->spilled[rt])
153       return false;
154 
155    enum pipe_format fmt = agx_tilebuffer_physical_format(tib, rt);
156    return ail_isa_format_supports_mask((enum ail_isa_format)fmt);
157 }
158 
159 uint32_t
agx_tilebuffer_total_size(struct agx_tilebuffer_layout * tib)160 agx_tilebuffer_total_size(struct agx_tilebuffer_layout *tib)
161 {
162    return tib->sample_size_B * tib->nr_samples * tib->tile_size.width *
163           tib->tile_size.height;
164 }
165 
166 void
agx_tilebuffer_pack_usc(struct agx_tilebuffer_layout * tib)167 agx_tilebuffer_pack_usc(struct agx_tilebuffer_layout *tib)
168 {
169    agx_pack(&tib->usc, USC_SHARED, cfg) {
170       if (tib->nr_samples > 0) {
171          cfg.uses_shared_memory = true;
172          cfg.layout = agx_shared_layout_from_tile_size(tib->tile_size);
173          cfg.sample_stride_in_8_bytes = tib->sample_size_B / 8;
174          cfg.sample_count = tib->nr_samples;
175          cfg.bytes_per_threadgroup = agx_tilebuffer_total_size(tib);
176       } else {
177          cfg.layout = AGX_SHARED_LAYOUT_VERTEX_COMPUTE;
178          cfg.bytes_per_threadgroup = 65536;
179       }
180    }
181 }
182