1 /*
2 * Copyright 2022 Alyssa Rosenzweig
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "agx_tilebuffer.h"
7 #include <assert.h>
8 #include "util/bitscan.h"
9 #include "util/format/u_format.h"
10 #include "agx_usc.h"
11 #include "layout.h"
12
13 /* Maximum number of bytes per tile on G13G. This may change in future versions
14 * of the architecture.
15 */
16 #define MAX_BYTES_PER_TILE (32768 - 1)
17
18 /* Maximum bytes per sample in the tilebuffer. Greater allocations require
19 * spilling render targets to memory.
20 */
21 #define MAX_BYTES_PER_SAMPLE (64)
22
23 /* Minimum tile size in pixels, architectural. */
24 #define MIN_TILE_SIZE_PX (16 * 16)
25
26 /* Select the largest tile size that fits */
27 static struct agx_tile_size
agx_select_tile_size(unsigned bytes_per_pixel)28 agx_select_tile_size(unsigned bytes_per_pixel)
29 {
30 /* clang-format off */
31 struct agx_tile_size sizes[] = {
32 { 32, 32 },
33 { 32, 16 },
34 { 16, 16 }
35 };
36 /* clang-format on */
37
38 for (unsigned i = 0; i < ARRAY_SIZE(sizes); ++i) {
39 struct agx_tile_size size = sizes[i];
40
41 if ((bytes_per_pixel * size.width * size.height) <= MAX_BYTES_PER_TILE)
42 return size;
43 }
44
45 unreachable("No supported tile size meets the bytes per pixel requirement");
46 }
47
48 static unsigned
agx_shared_layout_from_tile_size(struct agx_tile_size t)49 agx_shared_layout_from_tile_size(struct agx_tile_size t)
50 {
51 if (t.width == 32 && t.height == 32)
52 return AGX_SHARED_LAYOUT_32X32;
53 else if (t.width == 32 && t.height == 16)
54 return AGX_SHARED_LAYOUT_32X16;
55 else if (t.width == 16 && t.height == 16)
56 return AGX_SHARED_LAYOUT_16X16;
57 else
58 unreachable("Invalid tile size");
59 }
60
61 struct agx_tilebuffer_layout
agx_build_tilebuffer_layout(const enum pipe_format * formats,uint8_t nr_cbufs,uint8_t nr_samples,bool layered)62 agx_build_tilebuffer_layout(const enum pipe_format *formats, uint8_t nr_cbufs,
63 uint8_t nr_samples, bool layered)
64 {
65 struct agx_tilebuffer_layout tib = {
66 .nr_samples = nr_samples,
67 .layered = layered,
68 };
69
70 uint32_t offset_B = 0;
71
72 for (unsigned rt = 0; rt < nr_cbufs; ++rt) {
73 tib.logical_format[rt] = formats[rt];
74
75 /* If there are gaps in the layout, don't allocate holes. Obscure,
76 * PIPE_FORMAT_NONE has a size of 1, not 0.
77 */
78 if (formats[rt] == PIPE_FORMAT_NONE)
79 continue;
80
81 /* Require natural alignment for tilebuffer allocations. This could be
82 * optimized, but this shouldn't be a problem in practice.
83 */
84 enum pipe_format physical_fmt = agx_tilebuffer_physical_format(&tib, rt);
85 unsigned align_B = util_format_get_blocksize(physical_fmt);
86 assert(util_is_power_of_two_nonzero(align_B) &&
87 util_is_power_of_two_nonzero(MAX_BYTES_PER_SAMPLE) &&
88 align_B < MAX_BYTES_PER_SAMPLE &&
89 "max bytes per sample divisible by alignment");
90
91 offset_B = ALIGN_POT(offset_B, align_B);
92 assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant + above");
93
94 /* Determine the size, if we were to allocate this render target to the
95 * tilebuffer as desired.
96 */
97 unsigned nr = util_format_get_nr_components(physical_fmt) == 1
98 ? util_format_get_nr_components(formats[rt])
99 : 1;
100
101 unsigned size_B = align_B * nr;
102 unsigned new_offset_B = offset_B + size_B;
103
104 /* If allocating this render target would exceed any tilebuffer limits, we
105 * need to spill it to memory. We continue processing in case there are
106 * smaller render targets after that would still fit. Otherwise, we
107 * allocate it to the tilebuffer.
108 *
109 * TODO: Suboptimal, we might be able to reorder render targets to
110 * avoid fragmentation causing spilling.
111 */
112 bool fits = (new_offset_B <= MAX_BYTES_PER_SAMPLE) &&
113 (ALIGN_POT(new_offset_B, 8) * MIN_TILE_SIZE_PX *
114 nr_samples) <= MAX_BYTES_PER_TILE;
115
116 if (fits) {
117 tib._offset_B[rt] = offset_B;
118 offset_B = new_offset_B;
119 } else {
120 tib.spilled[rt] = true;
121 }
122 }
123
124 assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant");
125
126 /* Multisampling needs a nonempty allocation.
127 * XXX: Check this against hw
128 */
129 if (nr_samples > 1)
130 offset_B = MAX2(offset_B, 1);
131
132 tib.sample_size_B = ALIGN_POT(offset_B, 8);
133
134 tib.tile_size = agx_select_tile_size(tib.sample_size_B * nr_samples);
135
136 agx_tilebuffer_pack_usc(&tib);
137 return tib;
138 }
139
140 enum pipe_format
agx_tilebuffer_physical_format(struct agx_tilebuffer_layout * tib,unsigned rt)141 agx_tilebuffer_physical_format(struct agx_tilebuffer_layout *tib, unsigned rt)
142 {
143 return ail_pixel_format[tib->logical_format[rt]].renderable;
144 }
145
146 bool
agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout * tib,unsigned rt)147 agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout *tib, unsigned rt)
148 {
149 /* We don't bother support masking with spilled render targets. This might be
150 * optimized in the future but spilling is so rare anyway it's not worth it.
151 */
152 if (tib->spilled[rt])
153 return false;
154
155 enum pipe_format fmt = agx_tilebuffer_physical_format(tib, rt);
156 return ail_isa_format_supports_mask((enum ail_isa_format)fmt);
157 }
158
159 uint32_t
agx_tilebuffer_total_size(struct agx_tilebuffer_layout * tib)160 agx_tilebuffer_total_size(struct agx_tilebuffer_layout *tib)
161 {
162 return tib->sample_size_B * tib->nr_samples * tib->tile_size.width *
163 tib->tile_size.height;
164 }
165
166 void
agx_tilebuffer_pack_usc(struct agx_tilebuffer_layout * tib)167 agx_tilebuffer_pack_usc(struct agx_tilebuffer_layout *tib)
168 {
169 agx_pack(&tib->usc, USC_SHARED, cfg) {
170 if (tib->nr_samples > 0) {
171 cfg.uses_shared_memory = true;
172 cfg.layout = agx_shared_layout_from_tile_size(tib->tile_size);
173 cfg.sample_stride_in_8_bytes = tib->sample_size_B / 8;
174 cfg.sample_count = tib->nr_samples;
175 cfg.bytes_per_threadgroup = agx_tilebuffer_total_size(tib);
176 } else {
177 cfg.layout = AGX_SHARED_LAYOUT_VERTEX_COMPUTE;
178 cfg.bytes_per_threadgroup = 65536;
179 }
180 }
181 }
182