1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright 2024 Alyssa Rosenzweig
3*61046927SAndroid Build Coastguard Worker * SPDX-License-Identifier: MIT
4*61046927SAndroid Build Coastguard Worker */
5*61046927SAndroid Build Coastguard Worker
6*61046927SAndroid Build Coastguard Worker #include "agx_linker.h"
7*61046927SAndroid Build Coastguard Worker #include <stddef.h>
8*61046927SAndroid Build Coastguard Worker #include <stdint.h>
9*61046927SAndroid Build Coastguard Worker #include "util/ralloc.h"
10*61046927SAndroid Build Coastguard Worker #include "agx_compile.h"
11*61046927SAndroid Build Coastguard Worker #include "agx_device.h"
12*61046927SAndroid Build Coastguard Worker #include "agx_pack.h"
13*61046927SAndroid Build Coastguard Worker #include "agx_scratch.h"
14*61046927SAndroid Build Coastguard Worker
15*61046927SAndroid Build Coastguard Worker /*
16*61046927SAndroid Build Coastguard Worker * When sample shading is used with a non-monolithic fragment shader, we
17*61046927SAndroid Build Coastguard Worker * fast-link a program with the following structure:
18*61046927SAndroid Build Coastguard Worker *
19*61046927SAndroid Build Coastguard Worker * Fragment prolog;
20*61046927SAndroid Build Coastguard Worker *
21*61046927SAndroid Build Coastguard Worker * for (u16 sample_bit = 1; sample_bit < (1 << # of samples); ++sample_bit) {
22*61046927SAndroid Build Coastguard Worker * API fragment shader;
23*61046927SAndroid Build Coastguard Worker * Fragment epilog;
24*61046927SAndroid Build Coastguard Worker * }
25*61046927SAndroid Build Coastguard Worker *
26*61046927SAndroid Build Coastguard Worker * This means the prolog runs per-pixel but the fragment shader and epilog run
27*61046927SAndroid Build Coastguard Worker * per-sample. To do this, we need to generate the loop on the fly. The
28*61046927SAndroid Build Coastguard Worker * following binary sequences form the relevant loop.
29*61046927SAndroid Build Coastguard Worker */
30*61046927SAndroid Build Coastguard Worker
31*61046927SAndroid Build Coastguard Worker /* clang-format off */
32*61046927SAndroid Build Coastguard Worker static const uint8_t sample_loop_header[] = {
33*61046927SAndroid Build Coastguard Worker /* mov_imm r0, 0x10000, 0b0 */
34*61046927SAndroid Build Coastguard Worker 0x62, 0x01, 0x00, 0x00, 0x01, 0x00,
35*61046927SAndroid Build Coastguard Worker };
36*61046927SAndroid Build Coastguard Worker
37*61046927SAndroid Build Coastguard Worker #define STOP \
38*61046927SAndroid Build Coastguard Worker /* stop */ \
39*61046927SAndroid Build Coastguard Worker 0x88, 0x00, \
40*61046927SAndroid Build Coastguard Worker \
41*61046927SAndroid Build Coastguard Worker /* trap */ \
42*61046927SAndroid Build Coastguard Worker 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, \
43*61046927SAndroid Build Coastguard Worker 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00,
44*61046927SAndroid Build Coastguard Worker
45*61046927SAndroid Build Coastguard Worker static const uint8_t stop[] = {STOP};
46*61046927SAndroid Build Coastguard Worker
47*61046927SAndroid Build Coastguard Worker static const uint8_t sample_loop_footer[] = {
48*61046927SAndroid Build Coastguard Worker /* iadd r0h, 0, r0h, lsl 1 */
49*61046927SAndroid Build Coastguard Worker 0x0e, 0x02, 0x00, 0x10, 0x84, 0x00, 0x00, 0x00,
50*61046927SAndroid Build Coastguard Worker
51*61046927SAndroid Build Coastguard Worker /* while_icmp r0l, ult, r0h, 0, 1 */
52*61046927SAndroid Build Coastguard Worker 0x52, 0x2c, 0x41, 0x00, 0x00, 0x00,
53*61046927SAndroid Build Coastguard Worker
54*61046927SAndroid Build Coastguard Worker /* jmp_exec_any */
55*61046927SAndroid Build Coastguard Worker 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00,
56*61046927SAndroid Build Coastguard Worker
57*61046927SAndroid Build Coastguard Worker /* pop_exec r0l, 1 */
58*61046927SAndroid Build Coastguard Worker 0x52, 0x0e, 0x00, 0x00, 0x00, 0x00,
59*61046927SAndroid Build Coastguard Worker
60*61046927SAndroid Build Coastguard Worker STOP
61*61046927SAndroid Build Coastguard Worker };
62*61046927SAndroid Build Coastguard Worker
63*61046927SAndroid Build Coastguard Worker /* Offset in sample_loop_footer to the jmp_exec_any's target */
64*61046927SAndroid Build Coastguard Worker #define SAMPLE_LOOP_FOOTER_JMP_PATCH_OFFS (16)
65*61046927SAndroid Build Coastguard Worker
66*61046927SAndroid Build Coastguard Worker /* Offset of the jmp_exec_any, for calculating the PC offsets */
67*61046927SAndroid Build Coastguard Worker #define SAMPLE_LOOP_FOOTER_JMP_OFFS (14)
68*61046927SAndroid Build Coastguard Worker
69*61046927SAndroid Build Coastguard Worker /* Offset in sample_loop_footer to the while_icmp's sample count immediate. Bit
70*61046927SAndroid Build Coastguard Worker * position in the byte given by the shift.
71*61046927SAndroid Build Coastguard Worker */
72*61046927SAndroid Build Coastguard Worker #define SAMPLE_LOOP_FOOTER_COUNT_PATCH_OFFS (11)
73*61046927SAndroid Build Coastguard Worker #define SAMPLE_LOOP_FOOTER_COUNT_SHIFT (4)
74*61046927SAndroid Build Coastguard Worker /* clang-format on */
75*61046927SAndroid Build Coastguard Worker
76*61046927SAndroid Build Coastguard Worker void
agx_fast_link(struct agx_linked_shader * linked,struct agx_device * dev,bool fragment,struct agx_shader_part * main,struct agx_shader_part * prolog,struct agx_shader_part * epilog,unsigned nr_samples_shaded)77*61046927SAndroid Build Coastguard Worker agx_fast_link(struct agx_linked_shader *linked, struct agx_device *dev,
78*61046927SAndroid Build Coastguard Worker bool fragment, struct agx_shader_part *main,
79*61046927SAndroid Build Coastguard Worker struct agx_shader_part *prolog, struct agx_shader_part *epilog,
80*61046927SAndroid Build Coastguard Worker unsigned nr_samples_shaded)
81*61046927SAndroid Build Coastguard Worker {
82*61046927SAndroid Build Coastguard Worker size_t size = 0;
83*61046927SAndroid Build Coastguard Worker unsigned nr_gprs = 0, scratch_size = 0;
84*61046927SAndroid Build Coastguard Worker bool reads_tib = false, writes_sample_mask = false,
85*61046927SAndroid Build Coastguard Worker disable_tri_merging = false, tag_write_disable = true;
86*61046927SAndroid Build Coastguard Worker
87*61046927SAndroid Build Coastguard Worker if (nr_samples_shaded) {
88*61046927SAndroid Build Coastguard Worker size += sizeof(sample_loop_header);
89*61046927SAndroid Build Coastguard Worker
90*61046927SAndroid Build Coastguard Worker if (nr_samples_shaded > 1)
91*61046927SAndroid Build Coastguard Worker size += sizeof(sample_loop_footer);
92*61046927SAndroid Build Coastguard Worker else
93*61046927SAndroid Build Coastguard Worker size += sizeof(stop);
94*61046927SAndroid Build Coastguard Worker }
95*61046927SAndroid Build Coastguard Worker
96*61046927SAndroid Build Coastguard Worker struct agx_shader_part *parts[] = {prolog, main, epilog};
97*61046927SAndroid Build Coastguard Worker
98*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ARRAY_SIZE(parts); ++i) {
99*61046927SAndroid Build Coastguard Worker struct agx_shader_part *part = parts[i];
100*61046927SAndroid Build Coastguard Worker if (!part)
101*61046927SAndroid Build Coastguard Worker continue;
102*61046927SAndroid Build Coastguard Worker
103*61046927SAndroid Build Coastguard Worker assert(part->info.main_offset == 0);
104*61046927SAndroid Build Coastguard Worker size += part->info.main_size;
105*61046927SAndroid Build Coastguard Worker
106*61046927SAndroid Build Coastguard Worker nr_gprs = MAX2(nr_gprs, part->info.nr_gprs);
107*61046927SAndroid Build Coastguard Worker scratch_size = MAX2(scratch_size, part->info.scratch_size);
108*61046927SAndroid Build Coastguard Worker reads_tib |= part->info.reads_tib;
109*61046927SAndroid Build Coastguard Worker writes_sample_mask |= part->info.writes_sample_mask;
110*61046927SAndroid Build Coastguard Worker disable_tri_merging |= part->info.disable_tri_merging;
111*61046927SAndroid Build Coastguard Worker linked->uses_base_param |= part->info.uses_base_param;
112*61046927SAndroid Build Coastguard Worker linked->uses_txf |= part->info.uses_txf;
113*61046927SAndroid Build Coastguard Worker tag_write_disable &= part->info.tag_write_disable;
114*61046927SAndroid Build Coastguard Worker }
115*61046927SAndroid Build Coastguard Worker
116*61046927SAndroid Build Coastguard Worker assert(size > 0 && "must stop");
117*61046927SAndroid Build Coastguard Worker
118*61046927SAndroid Build Coastguard Worker linked->bo = agx_bo_create(dev, size, 0, AGX_BO_EXEC | AGX_BO_LOW_VA,
119*61046927SAndroid Build Coastguard Worker "Linked executable");
120*61046927SAndroid Build Coastguard Worker
121*61046927SAndroid Build Coastguard Worker size_t offset = 0;
122*61046927SAndroid Build Coastguard Worker
123*61046927SAndroid Build Coastguard Worker /* FS prolog happens per-pixel, outside the sample loop */
124*61046927SAndroid Build Coastguard Worker if (prolog) {
125*61046927SAndroid Build Coastguard Worker size_t sz = prolog->info.main_size;
126*61046927SAndroid Build Coastguard Worker memcpy((uint8_t *)linked->bo->map + offset, prolog->binary, sz);
127*61046927SAndroid Build Coastguard Worker offset += sz;
128*61046927SAndroid Build Coastguard Worker }
129*61046927SAndroid Build Coastguard Worker
130*61046927SAndroid Build Coastguard Worker if (nr_samples_shaded) {
131*61046927SAndroid Build Coastguard Worker memcpy((uint8_t *)linked->bo->map + offset, sample_loop_header,
132*61046927SAndroid Build Coastguard Worker sizeof(sample_loop_header));
133*61046927SAndroid Build Coastguard Worker offset += sizeof(sample_loop_header);
134*61046927SAndroid Build Coastguard Worker }
135*61046927SAndroid Build Coastguard Worker
136*61046927SAndroid Build Coastguard Worker size_t sample_loop_begin = offset;
137*61046927SAndroid Build Coastguard Worker
138*61046927SAndroid Build Coastguard Worker /* Main shader and epilog happen in the sample loop, so start from i=1 */
139*61046927SAndroid Build Coastguard Worker for (unsigned i = 1; i < ARRAY_SIZE(parts); ++i) {
140*61046927SAndroid Build Coastguard Worker struct agx_shader_part *part = parts[i];
141*61046927SAndroid Build Coastguard Worker if (!part)
142*61046927SAndroid Build Coastguard Worker continue;
143*61046927SAndroid Build Coastguard Worker
144*61046927SAndroid Build Coastguard Worker size_t sz = part->info.main_size;
145*61046927SAndroid Build Coastguard Worker memcpy((uint8_t *)linked->bo->map + offset, part->binary, sz);
146*61046927SAndroid Build Coastguard Worker offset += sz;
147*61046927SAndroid Build Coastguard Worker }
148*61046927SAndroid Build Coastguard Worker
149*61046927SAndroid Build Coastguard Worker if (nr_samples_shaded > 1) {
150*61046927SAndroid Build Coastguard Worker assert(sample_loop_footer[SAMPLE_LOOP_FOOTER_COUNT_PATCH_OFFS] == 0);
151*61046927SAndroid Build Coastguard Worker
152*61046927SAndroid Build Coastguard Worker /* Make a stack copy of the footer so we can efficiently patch it */
153*61046927SAndroid Build Coastguard Worker uint8_t footer[sizeof(sample_loop_footer)];
154*61046927SAndroid Build Coastguard Worker memcpy(footer, sample_loop_footer, sizeof(footer));
155*61046927SAndroid Build Coastguard Worker
156*61046927SAndroid Build Coastguard Worker /* Patch in sample end */
157*61046927SAndroid Build Coastguard Worker uint8_t end = (1u << nr_samples_shaded) - 1;
158*61046927SAndroid Build Coastguard Worker footer[SAMPLE_LOOP_FOOTER_COUNT_PATCH_OFFS] =
159*61046927SAndroid Build Coastguard Worker end << SAMPLE_LOOP_FOOTER_COUNT_SHIFT;
160*61046927SAndroid Build Coastguard Worker
161*61046927SAndroid Build Coastguard Worker /* Patch in the branch target */
162*61046927SAndroid Build Coastguard Worker int32_t loop_size = offset - sample_loop_begin;
163*61046927SAndroid Build Coastguard Worker int32_t branch_offs = -(SAMPLE_LOOP_FOOTER_JMP_OFFS + loop_size);
164*61046927SAndroid Build Coastguard Worker int32_t *target = (int32_t *)(footer + SAMPLE_LOOP_FOOTER_JMP_PATCH_OFFS);
165*61046927SAndroid Build Coastguard Worker *target = branch_offs;
166*61046927SAndroid Build Coastguard Worker
167*61046927SAndroid Build Coastguard Worker /* Copy in the patched footer */
168*61046927SAndroid Build Coastguard Worker memcpy((uint8_t *)linked->bo->map + offset, footer, sizeof(footer));
169*61046927SAndroid Build Coastguard Worker offset += sizeof(footer);
170*61046927SAndroid Build Coastguard Worker } else if (nr_samples_shaded) {
171*61046927SAndroid Build Coastguard Worker /* Just end after the first sample, no need to loop for a single sample */
172*61046927SAndroid Build Coastguard Worker memcpy((uint8_t *)linked->bo->map + offset, stop, sizeof(stop));
173*61046927SAndroid Build Coastguard Worker offset += sizeof(stop);
174*61046927SAndroid Build Coastguard Worker }
175*61046927SAndroid Build Coastguard Worker
176*61046927SAndroid Build Coastguard Worker assert(offset == size);
177*61046927SAndroid Build Coastguard Worker
178*61046927SAndroid Build Coastguard Worker agx_pack(&linked->shader, USC_SHADER, cfg) {
179*61046927SAndroid Build Coastguard Worker cfg.code = agx_usc_addr(dev, linked->bo->va->addr);
180*61046927SAndroid Build Coastguard Worker cfg.unk_2 = fragment ? 2 : 3;
181*61046927SAndroid Build Coastguard Worker
182*61046927SAndroid Build Coastguard Worker if (fragment)
183*61046927SAndroid Build Coastguard Worker cfg.loads_varyings = linked->cf.nr_bindings > 0;
184*61046927SAndroid Build Coastguard Worker }
185*61046927SAndroid Build Coastguard Worker
186*61046927SAndroid Build Coastguard Worker agx_pack(&linked->regs, USC_REGISTERS, cfg) {
187*61046927SAndroid Build Coastguard Worker cfg.register_count = nr_gprs;
188*61046927SAndroid Build Coastguard Worker cfg.unk_1 = fragment;
189*61046927SAndroid Build Coastguard Worker cfg.spill_size = scratch_size ? agx_scratch_get_bucket(scratch_size) : 0;
190*61046927SAndroid Build Coastguard Worker }
191*61046927SAndroid Build Coastguard Worker
192*61046927SAndroid Build Coastguard Worker if (fragment) {
193*61046927SAndroid Build Coastguard Worker agx_pack(&linked->fragment_props, USC_FRAGMENT_PROPERTIES, cfg) {
194*61046927SAndroid Build Coastguard Worker cfg.early_z_testing = !writes_sample_mask;
195*61046927SAndroid Build Coastguard Worker cfg.unk_4 = 0x2;
196*61046927SAndroid Build Coastguard Worker cfg.unk_5 = 0x0;
197*61046927SAndroid Build Coastguard Worker }
198*61046927SAndroid Build Coastguard Worker
199*61046927SAndroid Build Coastguard Worker agx_pack(&linked->fragment_control, FRAGMENT_CONTROL, cfg) {
200*61046927SAndroid Build Coastguard Worker cfg.tag_write_disable = tag_write_disable;
201*61046927SAndroid Build Coastguard Worker cfg.disable_tri_merging = disable_tri_merging;
202*61046927SAndroid Build Coastguard Worker
203*61046927SAndroid Build Coastguard Worker if (reads_tib && writes_sample_mask)
204*61046927SAndroid Build Coastguard Worker cfg.pass_type = AGX_PASS_TYPE_TRANSLUCENT_PUNCH_THROUGH;
205*61046927SAndroid Build Coastguard Worker else if (reads_tib)
206*61046927SAndroid Build Coastguard Worker cfg.pass_type = AGX_PASS_TYPE_TRANSLUCENT;
207*61046927SAndroid Build Coastguard Worker else if (writes_sample_mask)
208*61046927SAndroid Build Coastguard Worker cfg.pass_type = AGX_PASS_TYPE_PUNCH_THROUGH;
209*61046927SAndroid Build Coastguard Worker else
210*61046927SAndroid Build Coastguard Worker cfg.pass_type = AGX_PASS_TYPE_OPAQUE;
211*61046927SAndroid Build Coastguard Worker }
212*61046927SAndroid Build Coastguard Worker
213*61046927SAndroid Build Coastguard Worker /* Merge the CF binding lists from the prolog to handle cull distance */
214*61046927SAndroid Build Coastguard Worker memcpy(&linked->cf, &main->info.varyings.fs,
215*61046927SAndroid Build Coastguard Worker sizeof(struct agx_varyings_fs));
216*61046927SAndroid Build Coastguard Worker
217*61046927SAndroid Build Coastguard Worker struct agx_varyings_fs *prolog_vary =
218*61046927SAndroid Build Coastguard Worker prolog ? &prolog->info.varyings.fs : NULL;
219*61046927SAndroid Build Coastguard Worker
220*61046927SAndroid Build Coastguard Worker if (prolog_vary && prolog_vary->nr_bindings) {
221*61046927SAndroid Build Coastguard Worker assert(!prolog_vary->reads_z);
222*61046927SAndroid Build Coastguard Worker linked->cf.nr_cf = MAX2(linked->cf.nr_cf, prolog_vary->nr_cf);
223*61046927SAndroid Build Coastguard Worker
224*61046927SAndroid Build Coastguard Worker assert(linked->cf.nr_bindings + prolog_vary->nr_bindings <=
225*61046927SAndroid Build Coastguard Worker ARRAY_SIZE(linked->cf.bindings) &&
226*61046927SAndroid Build Coastguard Worker "bounded by # of coeff registers");
227*61046927SAndroid Build Coastguard Worker
228*61046927SAndroid Build Coastguard Worker memcpy(linked->cf.bindings + linked->cf.nr_bindings,
229*61046927SAndroid Build Coastguard Worker prolog_vary->bindings,
230*61046927SAndroid Build Coastguard Worker sizeof(struct agx_cf_binding) * prolog_vary->nr_bindings);
231*61046927SAndroid Build Coastguard Worker
232*61046927SAndroid Build Coastguard Worker linked->cf.nr_bindings += prolog_vary->nr_bindings;
233*61046927SAndroid Build Coastguard Worker }
234*61046927SAndroid Build Coastguard Worker
235*61046927SAndroid Build Coastguard Worker agx_pack(&linked->osel, OUTPUT_SELECT, cfg) {
236*61046927SAndroid Build Coastguard Worker cfg.varyings = linked->cf.nr_bindings > 0;
237*61046927SAndroid Build Coastguard Worker cfg.frag_coord_z = linked->cf.reads_z;
238*61046927SAndroid Build Coastguard Worker }
239*61046927SAndroid Build Coastguard Worker }
240*61046927SAndroid Build Coastguard Worker }
241