xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/a5xx/fd5_emit.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Rob Clark <[email protected]>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Rob Clark <[email protected]>
7  */
8 
9 #include "pipe/p_state.h"
10 #include "util/format/u_format.h"
11 #include "util/u_helpers.h"
12 #include "util/u_memory.h"
13 #include "util/u_string.h"
14 #include "util/u_viewport.h"
15 
16 #include "freedreno_query_hw.h"
17 #include "freedreno_resource.h"
18 
19 #include "fd5_blend.h"
20 #include "fd5_blitter.h"
21 #include "fd5_context.h"
22 #include "fd5_emit.h"
23 #include "fd5_format.h"
24 #include "fd5_image.h"
25 #include "fd5_program.h"
26 #include "fd5_rasterizer.h"
27 #include "fd5_screen.h"
28 #include "fd5_texture.h"
29 #include "fd5_zsa.h"
30 
31 #define emit_const_user fd5_emit_const_user
32 #define emit_const_bo   fd5_emit_const_bo
33 #include "ir3_const.h"
34 
35 /* regid:          base const register
36  * prsc or dwords: buffer containing constant values
37  * sizedwords:     size of const value buffer
38  */
39 static void
fd5_emit_const_user(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t sizedwords,const uint32_t * dwords)40 fd5_emit_const_user(struct fd_ringbuffer *ring,
41                     const struct ir3_shader_variant *v, uint32_t regid,
42                     uint32_t sizedwords, const uint32_t *dwords)
43 {
44    emit_const_asserts(ring, v, regid, sizedwords);
45 
46    OUT_PKT7(ring, CP_LOAD_STATE4, 3 + sizedwords);
47    OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid / 4) |
48                      CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
49                      CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) |
50                      CP_LOAD_STATE4_0_NUM_UNIT(sizedwords / 4));
51    OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
52                      CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS));
53    OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
54    for (int i = 0; i < sizedwords; i++)
55       OUT_RING(ring, ((uint32_t *)dwords)[i]);
56 }
57 
58 static void
fd5_emit_const_bo(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t offset,uint32_t sizedwords,struct fd_bo * bo)59 fd5_emit_const_bo(struct fd_ringbuffer *ring,
60                   const struct ir3_shader_variant *v, uint32_t regid,
61                   uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
62 {
63    uint32_t dst_off = regid / 4;
64    assert(dst_off % 4 == 0);
65    uint32_t num_unit = sizedwords / 4;
66    assert(num_unit % 4 == 0);
67 
68    emit_const_asserts(ring, v, regid, sizedwords);
69 
70    OUT_PKT7(ring, CP_LOAD_STATE4, 3);
71    OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(dst_off) |
72                      CP_LOAD_STATE4_0_STATE_SRC(SS4_INDIRECT) |
73                      CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) |
74                      CP_LOAD_STATE4_0_NUM_UNIT(num_unit));
75    OUT_RELOC(ring, bo, offset, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS), 0);
76 }
77 
78 static void
fd5_emit_const_ptrs(struct fd_ringbuffer * ring,gl_shader_stage type,uint32_t regid,uint32_t num,struct fd_bo ** bos,uint32_t * offsets)79 fd5_emit_const_ptrs(struct fd_ringbuffer *ring, gl_shader_stage type,
80                     uint32_t regid, uint32_t num, struct fd_bo **bos,
81                     uint32_t *offsets)
82 {
83    uint32_t anum = align(num, 2);
84    uint32_t i;
85 
86    assert((regid % 4) == 0);
87 
88    OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * anum));
89    OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid / 4) |
90                      CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
91                      CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(type)) |
92                      CP_LOAD_STATE4_0_NUM_UNIT(anum / 2));
93    OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
94                      CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS));
95    OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
96 
97    for (i = 0; i < num; i++) {
98       if (bos[i]) {
99          OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
100       } else {
101          OUT_RING(ring, 0xbad00000 | (i << 16));
102          OUT_RING(ring, 0xbad00000 | (i << 16));
103       }
104    }
105 
106    for (; i < anum; i++) {
107       OUT_RING(ring, 0xffffffff);
108       OUT_RING(ring, 0xffffffff);
109    }
110 }
111 
112 static bool
is_stateobj(struct fd_ringbuffer * ring)113 is_stateobj(struct fd_ringbuffer *ring)
114 {
115    return false;
116 }
117 
118 static void
emit_const_ptrs(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t dst_offset,uint32_t num,struct fd_bo ** bos,uint32_t * offsets)119 emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
120                 uint32_t dst_offset, uint32_t num, struct fd_bo **bos,
121                 uint32_t *offsets)
122 {
123    /* TODO inline this */
124    assert(dst_offset + num <= v->constlen * 4);
125    fd5_emit_const_ptrs(ring, v->type, dst_offset, num, bos, offsets);
126 }
127 
128 void
fd5_emit_cs_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_grid_info * info)129 fd5_emit_cs_consts(const struct ir3_shader_variant *v,
130                    struct fd_ringbuffer *ring, struct fd_context *ctx,
131                    const struct pipe_grid_info *info)
132 {
133    ir3_emit_cs_consts(v, ring, ctx, info);
134 }
135 
136 /* Border color layout is diff from a4xx/a5xx.. if it turns out to be
137  * the same as a6xx then move this somewhere common ;-)
138  *
139  * Entry layout looks like (total size, 0x60 bytes):
140  */
141 
142 struct PACKED bcolor_entry {
143    uint32_t fp32[4];
144    uint16_t ui16[4];
145    int16_t si16[4];
146 
147    uint16_t fp16[4];
148    uint16_t rgb565;
149    uint16_t rgb5a1;
150    uint16_t rgba4;
151    uint8_t __pad0[2];
152    uint8_t ui8[4];
153    int8_t si8[4];
154    uint32_t rgb10a2;
155    uint32_t z24; /* also s8? */
156 
157    uint16_t
158       srgb[4]; /* appears to duplicate fp16[], but clamped, used for srgb */
159    uint8_t __pad1[56];
160 };
161 
162 #define FD5_BORDER_COLOR_SIZE 0x80
163 #define FD5_BORDER_COLOR_UPLOAD_SIZE                                           \
164    (2 * PIPE_MAX_SAMPLERS * FD5_BORDER_COLOR_SIZE)
165 
166 static void
setup_border_colors(struct fd_texture_stateobj * tex,struct bcolor_entry * entries)167 setup_border_colors(struct fd_texture_stateobj *tex,
168                     struct bcolor_entry *entries)
169 {
170    unsigned i, j;
171    STATIC_ASSERT(sizeof(struct bcolor_entry) == FD5_BORDER_COLOR_SIZE);
172 
173    for (i = 0; i < tex->num_samplers; i++) {
174       struct bcolor_entry *e = &entries[i];
175       struct pipe_sampler_state *sampler = tex->samplers[i];
176       union pipe_color_union *bc;
177 
178       if (!sampler)
179          continue;
180 
181       bc = &sampler->border_color;
182 
183       enum pipe_format format = sampler->border_color_format;
184       const struct util_format_description *desc =
185          util_format_description(format);
186 
187       e->rgb565 = 0;
188       e->rgb5a1 = 0;
189       e->rgba4 = 0;
190       e->rgb10a2 = 0;
191       e->z24 = 0;
192 
193       for (j = 0; j < 4; j++) {
194          int c = desc->swizzle[j];
195          int cd = c;
196 
197          /*
198           * HACK: for PIPE_FORMAT_X24S8_UINT we end up w/ the
199           * stencil border color value in bc->ui[0] but according
200           * to desc->swizzle and desc->channel, the .x component
201           * is NONE and the stencil value is in the y component.
202           * Meanwhile the hardware wants this in the .x componetn.
203           */
204          if ((format == PIPE_FORMAT_X24S8_UINT) ||
205              (format == PIPE_FORMAT_X32_S8X24_UINT)) {
206             if (j == 0) {
207                c = 1;
208                cd = 0;
209             } else {
210                continue;
211             }
212          }
213 
214          if (c >= 4)
215             continue;
216 
217          if (desc->channel[c].pure_integer) {
218             uint16_t clamped;
219             switch (desc->channel[c].size) {
220             case 2:
221                assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED);
222                clamped = CLAMP(bc->ui[j], 0, 0x3);
223                break;
224             case 8:
225                if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)
226                   clamped = CLAMP(bc->i[j], -128, 127);
227                else
228                   clamped = CLAMP(bc->ui[j], 0, 255);
229                break;
230             case 10:
231                assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED);
232                clamped = CLAMP(bc->ui[j], 0, 0x3ff);
233                break;
234             case 16:
235                if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)
236                   clamped = CLAMP(bc->i[j], -32768, 32767);
237                else
238                   clamped = CLAMP(bc->ui[j], 0, 65535);
239                break;
240             default:
241                unreachable("Unexpected bit size");
242             case 32:
243                clamped = 0;
244                break;
245             }
246             e->fp32[cd] = bc->ui[j];
247             e->fp16[cd] = clamped;
248          } else {
249             float f = bc->f[j];
250             float f_u = CLAMP(f, 0, 1);
251             float f_s = CLAMP(f, -1, 1);
252 
253             e->fp32[c] = fui(f);
254             e->fp16[c] = _mesa_float_to_half(f);
255             e->srgb[c] = _mesa_float_to_half(f_u);
256             e->ui16[c] = f_u * 0xffff;
257             e->si16[c] = f_s * 0x7fff;
258             e->ui8[c] = f_u * 0xff;
259             e->si8[c] = f_s * 0x7f;
260             if (c == 1)
261                e->rgb565 |= (int)(f_u * 0x3f) << 5;
262             else if (c < 3)
263                e->rgb565 |= (int)(f_u * 0x1f) << (c ? 11 : 0);
264             if (c == 3)
265                e->rgb5a1 |= (f_u > 0.5f) ? 0x8000 : 0;
266             else
267                e->rgb5a1 |= (int)(f_u * 0x1f) << (c * 5);
268             if (c == 3)
269                e->rgb10a2 |= (int)(f_u * 0x3) << 30;
270             else
271                e->rgb10a2 |= (int)(f_u * 0x3ff) << (c * 10);
272             e->rgba4 |= (int)(f_u * 0xf) << (c * 4);
273             if (c == 0)
274                e->z24 = f_u * 0xffffff;
275          }
276       }
277 
278 #if MESA_DEBUG
279       memset(&e->__pad0, 0, sizeof(e->__pad0));
280       memset(&e->__pad1, 0, sizeof(e->__pad1));
281 #endif
282    }
283 }
284 
285 static void
emit_border_color(struct fd_context * ctx,struct fd_ringbuffer * ring)286 emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring) assert_dt
287 {
288    struct fd5_context *fd5_ctx = fd5_context(ctx);
289    struct bcolor_entry *entries;
290    unsigned off;
291    void *ptr;
292 
293    STATIC_ASSERT(sizeof(struct bcolor_entry) == FD5_BORDER_COLOR_SIZE);
294 
295    const unsigned int alignment =
296       util_next_power_of_two(FD5_BORDER_COLOR_UPLOAD_SIZE);
297    u_upload_alloc(fd5_ctx->border_color_uploader, 0,
298                   FD5_BORDER_COLOR_UPLOAD_SIZE, alignment,
299                   &off, &fd5_ctx->border_color_buf, &ptr);
300 
301    entries = ptr;
302 
303    setup_border_colors(&ctx->tex[PIPE_SHADER_VERTEX], &entries[0]);
304    setup_border_colors(&ctx->tex[PIPE_SHADER_FRAGMENT],
305                        &entries[ctx->tex[PIPE_SHADER_VERTEX].num_samplers]);
306 
307    OUT_PKT4(ring, REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, 2);
308    OUT_RELOC(ring, fd_resource(fd5_ctx->border_color_buf)->bo, off, 0, 0);
309 
310    u_upload_unmap(fd5_ctx->border_color_uploader);
311 }
312 
313 static bool
emit_textures(struct fd_context * ctx,struct fd_ringbuffer * ring,enum a4xx_state_block sb,struct fd_texture_stateobj * tex)314 emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
315               enum a4xx_state_block sb,
316               struct fd_texture_stateobj *tex) assert_dt
317 {
318    bool needs_border = false;
319    unsigned bcolor_offset =
320       (sb == SB4_FS_TEX) ? ctx->tex[PIPE_SHADER_VERTEX].num_samplers : 0;
321    unsigned i;
322 
323    if (tex->num_samplers > 0) {
324       /* output sampler state: */
325       OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (4 * tex->num_samplers));
326       OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
327                         CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
328                         CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
329                         CP_LOAD_STATE4_0_NUM_UNIT(tex->num_samplers));
330       OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) |
331                         CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
332       OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
333       for (i = 0; i < tex->num_samplers; i++) {
334          static const struct fd5_sampler_stateobj dummy_sampler = {};
335          const struct fd5_sampler_stateobj *sampler =
336             tex->samplers[i] ? fd5_sampler_stateobj(tex->samplers[i])
337                              : &dummy_sampler;
338          OUT_RING(ring, sampler->texsamp0);
339          OUT_RING(ring, sampler->texsamp1);
340          OUT_RING(ring, sampler->texsamp2 |
341                            A5XX_TEX_SAMP_2_BCOLOR_OFFSET(bcolor_offset + i));
342          OUT_RING(ring, sampler->texsamp3);
343 
344          needs_border |= sampler->needs_border;
345       }
346    }
347 
348    if (tex->num_textures > 0) {
349       unsigned num_textures = tex->num_textures;
350 
351       /* emit texture state: */
352       OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (12 * num_textures));
353       OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
354                         CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
355                         CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
356                         CP_LOAD_STATE4_0_NUM_UNIT(num_textures));
357       OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
358                         CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
359       OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
360       for (i = 0; i < tex->num_textures; i++) {
361          static const struct fd5_pipe_sampler_view dummy_view = {};
362          const struct fd5_pipe_sampler_view *view =
363             tex->textures[i] ? fd5_pipe_sampler_view(tex->textures[i])
364                              : &dummy_view;
365          enum a5xx_tile_mode tile_mode = TILE5_LINEAR;
366 
367          if (view->base.texture)
368             tile_mode = fd_resource(view->base.texture)->layout.tile_mode;
369 
370          OUT_RING(ring,
371                   view->texconst0 | A5XX_TEX_CONST_0_TILE_MODE(tile_mode));
372          OUT_RING(ring, view->texconst1);
373          OUT_RING(ring, view->texconst2);
374          OUT_RING(ring, view->texconst3);
375          if (view->base.texture) {
376             struct fd_resource *rsc = fd_resource(view->base.texture);
377             if (view->base.format == PIPE_FORMAT_X32_S8X24_UINT)
378                rsc = rsc->stencil;
379             OUT_RELOC(ring, rsc->bo, view->offset,
380                       (uint64_t)view->texconst5 << 32, 0);
381          } else {
382             OUT_RING(ring, 0x00000000);
383             OUT_RING(ring, view->texconst5);
384          }
385          OUT_RING(ring, view->texconst6);
386          OUT_RING(ring, view->texconst7);
387          OUT_RING(ring, view->texconst8);
388          OUT_RING(ring, view->texconst9);
389          OUT_RING(ring, view->texconst10);
390          OUT_RING(ring, view->texconst11);
391       }
392    }
393 
394    return needs_border;
395 }
396 
397 static void
emit_ssbos(struct fd_context * ctx,struct fd_ringbuffer * ring,enum a4xx_state_block sb,struct fd_shaderbuf_stateobj * so,const struct ir3_shader_variant * v)398 emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
399            enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so,
400            const struct ir3_shader_variant *v)
401 {
402    unsigned count = util_last_bit(so->enabled_mask);
403 
404    if (count == 0)
405       return;
406 
407    OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2 * count);
408    OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
409                      CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
410                      CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
411                      CP_LOAD_STATE4_0_NUM_UNIT(count));
412    OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
413                      CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
414    OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
415 
416    for (unsigned i = 0; i < count; i++) {
417       struct pipe_shader_buffer *buf = &so->sb[i];
418       unsigned sz = buf->buffer_size;
419 
420       /* Unlike a6xx, SSBO size is in bytes. */
421       OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz & MASK(16)));
422       OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16));
423    }
424 
425    OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2 * count);
426    OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
427                      CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
428                      CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
429                      CP_LOAD_STATE4_0_NUM_UNIT(count));
430    OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_UBO) |
431                      CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
432    OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
433    for (unsigned i = 0; i < count; i++) {
434       struct pipe_shader_buffer *buf = &so->sb[i];
435 
436       if (buf->buffer) {
437          struct fd_resource *rsc = fd_resource(buf->buffer);
438          OUT_RELOC(ring, rsc->bo, buf->buffer_offset, 0, 0);
439       } else {
440          OUT_RING(ring, 0x00000000);
441          OUT_RING(ring, 0x00000000);
442       }
443    }
444 }
445 
446 void
fd5_emit_vertex_bufs(struct fd_ringbuffer * ring,struct fd5_emit * emit)447 fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit)
448 {
449    int32_t i, j;
450    const struct fd_vertex_state *vtx = emit->vtx;
451    const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit);
452 
453    for (i = 0, j = 0; i <= vp->inputs_count; i++) {
454       if (vp->inputs[i].sysval)
455          continue;
456       if (vp->inputs[i].compmask) {
457          struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
458          const struct pipe_vertex_buffer *vb =
459             &vtx->vertexbuf.vb[elem->vertex_buffer_index];
460          struct fd_resource *rsc = fd_resource(vb->buffer.resource);
461          enum pipe_format pfmt = elem->src_format;
462          enum a5xx_vtx_fmt fmt = fd5_pipe2vtx(pfmt);
463          bool isint = util_format_is_pure_integer(pfmt);
464          uint32_t off = vb->buffer_offset + elem->src_offset;
465          uint32_t size = vb->buffer.resource->width0 - off;
466          assert(fmt != VFMT5_NONE);
467 
468          OUT_PKT4(ring, REG_A5XX_VFD_FETCH(j), 4);
469          OUT_RELOC(ring, rsc->bo, off, 0, 0);
470          OUT_RING(ring, size);       /* VFD_FETCH[j].SIZE */
471          OUT_RING(ring, elem->src_stride); /* VFD_FETCH[j].STRIDE */
472 
473          OUT_PKT4(ring, REG_A5XX_VFD_DECODE(j), 2);
474          OUT_RING(
475             ring,
476             A5XX_VFD_DECODE_INSTR_IDX(j) | A5XX_VFD_DECODE_INSTR_FORMAT(fmt) |
477                COND(elem->instance_divisor, A5XX_VFD_DECODE_INSTR_INSTANCED) |
478                A5XX_VFD_DECODE_INSTR_SWAP(fd5_pipe2swap(pfmt)) |
479                A5XX_VFD_DECODE_INSTR_UNK30 |
480                COND(!isint, A5XX_VFD_DECODE_INSTR_FLOAT));
481          OUT_RING(
482             ring,
483             MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */
484 
485          OUT_PKT4(ring, REG_A5XX_VFD_DEST_CNTL(j), 1);
486          OUT_RING(ring,
487                   A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vp->inputs[i].compmask) |
488                      A5XX_VFD_DEST_CNTL_INSTR_REGID(vp->inputs[i].regid));
489 
490          j++;
491       }
492    }
493 
494    OUT_PKT4(ring, REG_A5XX_VFD_CONTROL_0, 1);
495    OUT_RING(ring, A5XX_VFD_CONTROL_0_VTXCNT(j));
496 }
497 
498 void
fd5_emit_state(struct fd_context * ctx,struct fd_ringbuffer * ring,struct fd5_emit * emit)499 fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
500                struct fd5_emit *emit)
501 {
502    struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
503    const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit);
504    const struct ir3_shader_variant *fp = fd5_emit_get_fp(emit);
505    const enum fd_dirty_3d_state dirty = emit->dirty;
506    bool needs_border = false;
507 
508    emit_marker5(ring, 5);
509 
510    if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->binning_pass) {
511       unsigned char mrt_comp[A5XX_MAX_RENDER_TARGETS] = {0};
512 
513       for (unsigned i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
514          mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
515       }
516 
517       OUT_PKT4(ring, REG_A5XX_RB_RENDER_COMPONENTS, 1);
518       OUT_RING(ring, A5XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
519                         A5XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
520                         A5XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
521                         A5XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
522                         A5XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
523                         A5XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
524                         A5XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
525                         A5XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
526    }
527 
528    if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
529       struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
530       uint32_t rb_alpha_control = zsa->rb_alpha_control;
531 
532       if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
533          rb_alpha_control &= ~A5XX_RB_ALPHA_CONTROL_ALPHA_TEST;
534 
535       OUT_PKT4(ring, REG_A5XX_RB_ALPHA_CONTROL, 1);
536       OUT_RING(ring, rb_alpha_control);
537 
538       OUT_PKT4(ring, REG_A5XX_RB_STENCIL_CONTROL, 1);
539       OUT_RING(ring, zsa->rb_stencil_control);
540    }
541 
542    if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG)) {
543       struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend);
544       struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
545 
546       if (pfb->zsbuf) {
547          struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
548          uint32_t gras_lrz_cntl = zsa->gras_lrz_cntl;
549 
550          if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid)
551             gras_lrz_cntl = 0;
552          else if (emit->binning_pass && blend->lrz_write && zsa->lrz_write)
553             gras_lrz_cntl |= A5XX_GRAS_LRZ_CNTL_LRZ_WRITE;
554 
555          OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
556          OUT_RING(ring, gras_lrz_cntl);
557       }
558    }
559 
560    if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
561       struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
562       struct pipe_stencil_ref *sr = &ctx->stencil_ref;
563 
564       OUT_PKT4(ring, REG_A5XX_RB_STENCILREFMASK, 2);
565       OUT_RING(ring, zsa->rb_stencilrefmask |
566                         A5XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
567       OUT_RING(ring, zsa->rb_stencilrefmask_bf |
568                         A5XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
569    }
570 
571    if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
572       struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
573       bool fragz = fp->no_earlyz || fp->has_kill || zsa->base.alpha_enabled ||
574                    fp->writes_pos;
575 
576       OUT_PKT4(ring, REG_A5XX_RB_DEPTH_CNTL, 1);
577       OUT_RING(ring, zsa->rb_depth_cntl);
578 
579       OUT_PKT4(ring, REG_A5XX_RB_DEPTH_PLANE_CNTL, 1);
580       OUT_RING(ring, COND(fragz, A5XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z) |
581                         COND(fragz && fp->fragcoord_compmask != 0,
582                              A5XX_RB_DEPTH_PLANE_CNTL_UNK1));
583 
584       OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_PLANE_CNTL, 1);
585       OUT_RING(ring, COND(fragz, A5XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z) |
586                         COND(fragz && fp->fragcoord_compmask != 0,
587                              A5XX_GRAS_SU_DEPTH_PLANE_CNTL_UNK1));
588    }
589 
590    /* NOTE: scissor enabled bit is part of rasterizer state: */
591    if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) {
592       struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
593 
594       OUT_PKT4(ring, REG_A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2);
595       OUT_RING(ring, A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->minx) |
596                         A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->miny));
597       OUT_RING(ring, A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->maxx) |
598                         A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->maxy));
599 
600       OUT_PKT4(ring, REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2);
601       OUT_RING(ring, A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->minx) |
602                         A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->miny));
603       OUT_RING(ring,
604                A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->maxx) |
605                   A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->maxy));
606 
607       ctx->batch->max_scissor.minx =
608          MIN2(ctx->batch->max_scissor.minx, scissor->minx);
609       ctx->batch->max_scissor.miny =
610          MIN2(ctx->batch->max_scissor.miny, scissor->miny);
611       ctx->batch->max_scissor.maxx =
612          MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
613       ctx->batch->max_scissor.maxy =
614          MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
615    }
616 
617    if (dirty & FD_DIRTY_VIEWPORT) {
618       struct pipe_viewport_state *vp = & ctx->viewport[0];
619 
620       fd_wfi(ctx->batch, ring);
621 
622       OUT_PKT4(ring, REG_A5XX_GRAS_CL_VPORT_XOFFSET_0, 6);
623       OUT_RING(ring, A5XX_GRAS_CL_VPORT_XOFFSET_0(vp->translate[0]));
624       OUT_RING(ring, A5XX_GRAS_CL_VPORT_XSCALE_0(vp->scale[0]));
625       OUT_RING(ring, A5XX_GRAS_CL_VPORT_YOFFSET_0(vp->translate[1]));
626       OUT_RING(ring, A5XX_GRAS_CL_VPORT_YSCALE_0(vp->scale[1]));
627       OUT_RING(ring, A5XX_GRAS_CL_VPORT_ZOFFSET_0(vp->translate[2]));
628       OUT_RING(ring, A5XX_GRAS_CL_VPORT_ZSCALE_0(vp->scale[2]));
629    }
630 
631    if (dirty & FD_DIRTY_PROG)
632       fd5_program_emit(ctx, ring, emit);
633 
634    if (dirty & FD_DIRTY_RASTERIZER) {
635       struct fd5_rasterizer_stateobj *rasterizer =
636          fd5_rasterizer_stateobj(ctx->rasterizer);
637 
638       OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1);
639       OUT_RING(ring, rasterizer->gras_su_cntl |
640                         A5XX_GRAS_SU_CNTL_LINE_MODE(pfb->samples > 1 ?
641                                                     RECTANGULAR : BRESENHAM));
642 
643       OUT_PKT4(ring, REG_A5XX_GRAS_SU_POINT_MINMAX, 2);
644       OUT_RING(ring, rasterizer->gras_su_point_minmax);
645       OUT_RING(ring, rasterizer->gras_su_point_size);
646 
647       OUT_PKT4(ring, REG_A5XX_GRAS_SU_POLY_OFFSET_SCALE, 3);
648       OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
649       OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
650       OUT_RING(ring, rasterizer->gras_su_poly_offset_clamp);
651 
652       OUT_PKT4(ring, REG_A5XX_PC_RASTER_CNTL, 1);
653       OUT_RING(ring, rasterizer->pc_raster_cntl);
654 
655       OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
656       OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
657    }
658 
659    /* note: must come after program emit.. because there is some overlap
660     * in registers, ex. PC_PRIMITIVE_CNTL and we rely on some cached
661     * values from fd5_program_emit() to avoid having to re-emit the prog
662     * every time rast state changes.
663     *
664     * Since the primitive restart state is not part of a tracked object, we
665     * re-emit this register every time.
666     */
667    if (emit->info && ctx->rasterizer) {
668       struct fd5_rasterizer_stateobj *rasterizer =
669          fd5_rasterizer_stateobj(ctx->rasterizer);
670       unsigned max_loc = fd5_context(ctx)->max_loc;
671 
672       OUT_PKT4(ring, REG_A5XX_PC_PRIMITIVE_CNTL, 1);
673       OUT_RING(ring,
674                rasterizer->pc_primitive_cntl |
675                   A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(max_loc) |
676                   COND(emit->info->primitive_restart && emit->info->index_size,
677                        A5XX_PC_PRIMITIVE_CNTL_PRIMITIVE_RESTART));
678    }
679 
680    if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
681       uint32_t posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
682       unsigned nr = pfb->nr_cbufs;
683 
684       if (emit->binning_pass)
685          nr = 0;
686       else if (ctx->rasterizer->rasterizer_discard)
687          nr = 0;
688 
689       OUT_PKT4(ring, REG_A5XX_RB_FS_OUTPUT_CNTL, 1);
690       OUT_RING(ring,
691                A5XX_RB_FS_OUTPUT_CNTL_MRT(nr) |
692                   COND(fp->writes_pos, A5XX_RB_FS_OUTPUT_CNTL_FRAG_WRITES_Z));
693 
694       OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_CNTL, 1);
695       OUT_RING(ring, A5XX_SP_FS_OUTPUT_CNTL_MRT(nr) |
696                         A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID(posz_regid) |
697                         A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID(regid(63, 0)));
698    }
699 
700    ir3_emit_vs_consts(vp, ring, ctx, emit->info, emit->indirect, emit->draw);
701    if (!emit->binning_pass)
702       ir3_emit_fs_consts(fp, ring, ctx);
703 
704    const struct ir3_stream_output_info *info = &vp->stream_output;
705    if (info->num_outputs) {
706       struct fd_streamout_stateobj *so = &ctx->streamout;
707 
708       for (unsigned i = 0; i < so->num_targets; i++) {
709          struct fd_stream_output_target *target =
710             fd_stream_output_target(so->targets[i]);
711 
712          if (!target)
713             continue;
714 
715          OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO(i), 3);
716          /* VPC_SO[i].BUFFER_BASE_LO: */
717          OUT_RELOC(ring, fd_resource(target->base.buffer)->bo, 0, 0, 0);
718          OUT_RING(ring, target->base.buffer_size + target->base.buffer_offset);
719 
720          struct fd_bo *offset_bo = fd_resource(target->offset_buf)->bo;
721 
722          if (so->reset & (1 << i)) {
723             assert(so->offsets[i] == 0);
724 
725             OUT_PKT7(ring, CP_MEM_WRITE, 3);
726             OUT_RELOC(ring, offset_bo, 0, 0, 0);
727             OUT_RING(ring, target->base.buffer_offset);
728 
729             OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(i), 1);
730             OUT_RING(ring, target->base.buffer_offset);
731          } else {
732             OUT_PKT7(ring, CP_MEM_TO_REG, 3);
733             OUT_RING(ring,
734                      CP_MEM_TO_REG_0_REG(REG_A5XX_VPC_SO_BUFFER_OFFSET(i)) |
735                         CP_MEM_TO_REG_0_SHIFT_BY_2 | CP_MEM_TO_REG_0_UNK31 |
736                         CP_MEM_TO_REG_0_CNT(0));
737             OUT_RELOC(ring, offset_bo, 0, 0, 0);
738          }
739 
740          // After a draw HW would write the new offset to offset_bo
741          OUT_PKT4(ring, REG_A5XX_VPC_SO_FLUSH_BASE_LO(i), 2);
742          OUT_RELOC(ring, offset_bo, 0, 0, 0);
743 
744          so->reset &= ~(1 << i);
745 
746          emit->streamout_mask |= (1 << i);
747       }
748    }
749 
750    if (!emit->streamout_mask && info->num_outputs) {
751       OUT_PKT7(ring, CP_CONTEXT_REG_BUNCH, 4);
752       OUT_RING(ring, REG_A5XX_VPC_SO_CNTL);
753       OUT_RING(ring, 0);
754       OUT_RING(ring, REG_A5XX_VPC_SO_BUF_CNTL);
755       OUT_RING(ring, 0);
756    } else if (emit->streamout_mask && !(dirty & FD_DIRTY_PROG)) {
757       /* reemit the program (if we haven't already) to re-enable streamout.  We
758        * really should switch to setting up program state at compile time so we
759        * can separate the SO state from the rest, and not recompute all the
760        * time.
761        */
762       fd5_program_emit(ctx, ring, emit);
763    }
764 
765    if (dirty & FD_DIRTY_BLEND) {
766       struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend);
767       uint32_t i;
768 
769       for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
770          enum pipe_format format = pipe_surface_format(pfb->cbufs[i]);
771          bool is_int = util_format_is_pure_integer(format);
772          bool has_alpha = util_format_has_alpha(format);
773          uint32_t control = blend->rb_mrt[i].control;
774 
775          if (is_int) {
776             control &= A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
777             control |= A5XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
778          }
779 
780          if (!has_alpha) {
781             control &= ~A5XX_RB_MRT_CONTROL_BLEND2;
782          }
783 
784          OUT_PKT4(ring, REG_A5XX_RB_MRT_CONTROL(i), 1);
785          OUT_RING(ring, control);
786 
787          OUT_PKT4(ring, REG_A5XX_RB_MRT_BLEND_CONTROL(i), 1);
788          OUT_RING(ring, blend->rb_mrt[i].blend_control);
789       }
790 
791       OUT_PKT4(ring, REG_A5XX_SP_BLEND_CNTL, 1);
792       OUT_RING(ring, blend->sp_blend_cntl);
793    }
794 
795    if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_SAMPLE_MASK)) {
796       struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend);
797 
798       OUT_PKT4(ring, REG_A5XX_RB_BLEND_CNTL, 1);
799       OUT_RING(ring, blend->rb_blend_cntl |
800                         A5XX_RB_BLEND_CNTL_SAMPLE_MASK(ctx->sample_mask));
801    }
802 
803    if (dirty & FD_DIRTY_BLEND_COLOR) {
804       struct pipe_blend_color *bcolor = &ctx->blend_color;
805 
806       OUT_PKT4(ring, REG_A5XX_RB_BLEND_RED, 8);
807       OUT_RING(ring, A5XX_RB_BLEND_RED_FLOAT(bcolor->color[0]) |
808                         A5XX_RB_BLEND_RED_UINT(CLAMP(bcolor->color[0], 0.f, 1.f) * 0xff) |
809                         A5XX_RB_BLEND_RED_SINT(CLAMP(bcolor->color[0], -1.f, 1.f) * 0x7f));
810       OUT_RING(ring, A5XX_RB_BLEND_RED_F32(bcolor->color[0]));
811       OUT_RING(ring, A5XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]) |
812                         A5XX_RB_BLEND_GREEN_UINT(CLAMP(bcolor->color[1], 0.f, 1.f) * 0xff) |
813                         A5XX_RB_BLEND_GREEN_SINT(CLAMP(bcolor->color[1], -1.f, 1.f) * 0x7f));
814       OUT_RING(ring, A5XX_RB_BLEND_RED_F32(bcolor->color[1]));
815       OUT_RING(ring, A5XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]) |
816                         A5XX_RB_BLEND_BLUE_UINT(CLAMP(bcolor->color[2], 0.f, 1.f) * 0xff) |
817                         A5XX_RB_BLEND_BLUE_SINT(CLAMP(bcolor->color[2], -1.f, 1.f) * 0x7f));
818       OUT_RING(ring, A5XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
819       OUT_RING(ring, A5XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]) |
820                         A5XX_RB_BLEND_ALPHA_UINT(CLAMP(bcolor->color[3], 0.f, 1.f) * 0xff) |
821                         A5XX_RB_BLEND_ALPHA_SINT(CLAMP(bcolor->color[3], -1.f, 1.f) * 0x7f));
822       OUT_RING(ring, A5XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
823    }
824 
825    if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) {
826       needs_border |=
827          emit_textures(ctx, ring, SB4_VS_TEX, &ctx->tex[PIPE_SHADER_VERTEX]);
828       OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 1);
829       OUT_RING(ring, ctx->tex[PIPE_SHADER_VERTEX].num_textures);
830    }
831 
832    if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) {
833       needs_border |=
834          emit_textures(ctx, ring, SB4_FS_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT]);
835    }
836 
837    OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1);
838    OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask
839                      ? ~0
840                      : ctx->tex[PIPE_SHADER_FRAGMENT].num_textures);
841 
842    OUT_PKT4(ring, REG_A5XX_TPL1_CS_TEX_COUNT, 1);
843    OUT_RING(ring, 0);
844 
845    if (needs_border)
846       emit_border_color(ctx, ring);
847 
848    if (!emit->binning_pass) {
849       if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO)
850          emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT],
851                   fp);
852 
853       if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE)
854          fd5_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT, fp);
855    }
856 }
857 
858 void
fd5_emit_cs_state(struct fd_context * ctx,struct fd_ringbuffer * ring,struct ir3_shader_variant * cp)859 fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
860                   struct ir3_shader_variant *cp)
861 {
862    enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE];
863 
864    if (dirty & FD_DIRTY_SHADER_TEX) {
865       bool needs_border = false;
866       needs_border |=
867          emit_textures(ctx, ring, SB4_CS_TEX, &ctx->tex[PIPE_SHADER_COMPUTE]);
868 
869       if (needs_border)
870          emit_border_color(ctx, ring);
871 
872       OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 1);
873       OUT_RING(ring, 0);
874 
875       OUT_PKT4(ring, REG_A5XX_TPL1_HS_TEX_COUNT, 1);
876       OUT_RING(ring, 0);
877 
878       OUT_PKT4(ring, REG_A5XX_TPL1_DS_TEX_COUNT, 1);
879       OUT_RING(ring, 0);
880 
881       OUT_PKT4(ring, REG_A5XX_TPL1_GS_TEX_COUNT, 1);
882       OUT_RING(ring, 0);
883 
884       OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1);
885       OUT_RING(ring, 0);
886    }
887 
888    OUT_PKT4(ring, REG_A5XX_TPL1_CS_TEX_COUNT, 1);
889    OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask
890                      ? ~0
891                      : ctx->tex[PIPE_SHADER_COMPUTE].num_textures);
892 
893    if (dirty & FD_DIRTY_SHADER_SSBO)
894       emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE],
895                  cp);
896 
897    if (dirty & FD_DIRTY_SHADER_IMAGE)
898       fd5_emit_images(ctx, ring, PIPE_SHADER_COMPUTE, cp);
899 }
900 
901 /* emit setup at begin of new cmdstream buffer (don't rely on previous
902  * state, there could have been a context switch between ioctls):
903  */
904 void
fd5_emit_restore(struct fd_batch * batch,struct fd_ringbuffer * ring)905 fd5_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
906 {
907    struct fd_context *ctx = batch->ctx;
908 
909    fd5_set_render_mode(ctx, ring, BYPASS);
910    fd5_cache_flush(batch, ring);
911 
912    OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
913    OUT_RING(ring, 0xfffff);
914 
915    /*
916    t7              opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords)
917    0000000500024048:               70d08003 00000000 001c5000 00000005
918    t7              opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords)
919    0000000500024058:               70d08003 00000010 001c7000 00000005
920 
921    t7              opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
922    0000000500024068:               70268000
923    */
924 
925    OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1);
926    OUT_RING(ring, 0xffffffff);
927 
928    OUT_PKT4(ring, REG_A5XX_PC_RASTER_CNTL, 1);
929    OUT_RING(ring, 0x00000012);
930 
931    OUT_PKT4(ring, REG_A5XX_GRAS_SU_POINT_MINMAX, 2);
932    OUT_RING(ring, A5XX_GRAS_SU_POINT_MINMAX_MIN(1.0f) |
933                      A5XX_GRAS_SU_POINT_MINMAX_MAX(4092.0f));
934    OUT_RING(ring, A5XX_GRAS_SU_POINT_SIZE(0.5f));
935 
936    OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1);
937    OUT_RING(ring, 0x00000000); /* GRAS_SU_CONSERVATIVE_RAS_CNTL */
938 
939    OUT_PKT4(ring, REG_A5XX_GRAS_SC_SCREEN_SCISSOR_CNTL, 1);
940    OUT_RING(ring, 0x00000000); /* GRAS_SC_SCREEN_SCISSOR_CNTL */
941 
942    OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG_MAX_CONST, 1);
943    OUT_RING(ring, 0); /* SP_VS_CONFIG_MAX_CONST */
944 
945    OUT_PKT4(ring, REG_A5XX_SP_FS_CONFIG_MAX_CONST, 1);
946    OUT_RING(ring, 0); /* SP_FS_CONFIG_MAX_CONST */
947 
948    OUT_PKT4(ring, REG_A5XX_UNKNOWN_E292, 2);
949    OUT_RING(ring, 0x00000000); /* UNKNOWN_E292 */
950    OUT_RING(ring, 0x00000000); /* UNKNOWN_E293 */
951 
952    OUT_PKT4(ring, REG_A5XX_RB_MODE_CNTL, 1);
953    OUT_RING(ring, 0x00000044); /* RB_MODE_CNTL */
954 
955    OUT_PKT4(ring, REG_A5XX_RB_DBG_ECO_CNTL, 1);
956    OUT_RING(ring, 0x00100000); /* RB_DBG_ECO_CNTL */
957 
958    OUT_PKT4(ring, REG_A5XX_VFD_MODE_CNTL, 1);
959    OUT_RING(ring, 0x00000000); /* VFD_MODE_CNTL */
960 
961    OUT_PKT4(ring, REG_A5XX_PC_MODE_CNTL, 1);
962    OUT_RING(ring, 0x0000001f); /* PC_MODE_CNTL */
963 
964    OUT_PKT4(ring, REG_A5XX_SP_MODE_CNTL, 1);
965    OUT_RING(ring, 0x0000001e); /* SP_MODE_CNTL */
966 
967    if (ctx->screen->gpu_id == 540) {
968       OUT_PKT4(ring, REG_A5XX_SP_DBG_ECO_CNTL, 1);
969       OUT_RING(ring, 0x800); /* SP_DBG_ECO_CNTL */
970 
971       OUT_PKT4(ring, REG_A5XX_HLSQ_DBG_ECO_CNTL, 1);
972       OUT_RING(ring, 0x0);
973 
974       OUT_PKT4(ring, REG_A5XX_VPC_DBG_ECO_CNTL, 1);
975       OUT_RING(ring, 0x800400);
976    } else {
977       OUT_PKT4(ring, REG_A5XX_SP_DBG_ECO_CNTL, 1);
978       OUT_RING(ring, 0x40000800); /* SP_DBG_ECO_CNTL */
979    }
980 
981    OUT_PKT4(ring, REG_A5XX_TPL1_MODE_CNTL, 1);
982    OUT_RING(ring, 0x00000544); /* TPL1_MODE_CNTL */
983 
984    OUT_PKT4(ring, REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_0, 2);
985    OUT_RING(ring, 0x00000080); /* HLSQ_TIMEOUT_THRESHOLD_0 */
986    OUT_RING(ring, 0x00000000); /* HLSQ_TIMEOUT_THRESHOLD_1 */
987 
988    OUT_PKT4(ring, REG_A5XX_VPC_DBG_ECO_CNTL, 1);
989    OUT_RING(ring, 0x00000400); /* VPC_DBG_ECO_CNTL */
990 
991    OUT_PKT4(ring, REG_A5XX_HLSQ_MODE_CNTL, 1);
992    OUT_RING(ring, 0x00000001); /* HLSQ_MODE_CNTL */
993 
994    OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
995    OUT_RING(ring, 0x00000000); /* VPC_MODE_CNTL */
996 
997    /* we don't use this yet.. probably best to disable.. */
998    OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
999    OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
1000                      CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
1001                      CP_SET_DRAW_STATE__0_GROUP_ID(0));
1002    OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
1003    OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
1004 
1005    OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1);
1006    OUT_RING(ring, 0x00000000); /* GRAS_SU_CONSERVATIVE_RAS_CNTL */
1007 
1008    OUT_PKT4(ring, REG_A5XX_GRAS_SC_BIN_CNTL, 1);
1009    OUT_RING(ring, 0x00000000); /* GRAS_SC_BIN_CNTL */
1010 
1011    OUT_PKT4(ring, REG_A5XX_GRAS_SC_BIN_CNTL, 1);
1012    OUT_RING(ring, 0x00000000); /* GRAS_SC_BIN_CNTL */
1013 
1014    OUT_PKT4(ring, REG_A5XX_VPC_FS_PRIMITIVEID_CNTL, 1);
1015    OUT_RING(ring, 0x000000ff); /* VPC_FS_PRIMITIVEID_CNTL */
1016 
1017    OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
1018    OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);
1019 
1020    OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO(0), 3);
1021    OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_BASE_LO_0 */
1022    OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_BASE_HI_0 */
1023    OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_SIZE_0 */
1024 
1025    OUT_PKT4(ring, REG_A5XX_VPC_SO_FLUSH_BASE_LO(0), 2);
1026    OUT_RING(ring, 0x00000000); /* VPC_SO_FLUSH_BASE_LO_0 */
1027    OUT_RING(ring, 0x00000000); /* VPC_SO_FLUSH_BASE_HI_0 */
1028 
1029    OUT_PKT4(ring, REG_A5XX_PC_GS_PARAM, 1);
1030    OUT_RING(ring, 0x00000000); /* PC_GS_PARAM */
1031 
1032    OUT_PKT4(ring, REG_A5XX_PC_HS_PARAM, 1);
1033    OUT_RING(ring, 0x00000000); /* PC_HS_PARAM */
1034 
1035    OUT_PKT4(ring, REG_A5XX_TPL1_TP_FS_ROTATION_CNTL, 1);
1036    OUT_RING(ring, 0x00000000); /* TPL1_TP_FS_ROTATION_CNTL */
1037 
1038    OUT_PKT4(ring, REG_A5XX_UNKNOWN_E004, 1);
1039    OUT_RING(ring, 0x00000000); /* UNKNOWN_E004 */
1040 
1041    OUT_PKT4(ring, REG_A5XX_GRAS_SU_LAYERED, 1);
1042    OUT_RING(ring, 0x00000000); /* GRAS_SU_LAYERED */
1043 
1044    OUT_PKT4(ring, REG_A5XX_VPC_SO_BUF_CNTL, 1);
1045    OUT_RING(ring, 0x00000000); /* VPC_SO_BUF_CNTL */
1046 
1047    OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(0), 1);
1048    OUT_RING(ring, 0x00000000); /* UNKNOWN_E2AB */
1049 
1050    OUT_PKT4(ring, REG_A5XX_PC_GS_LAYERED, 1);
1051    OUT_RING(ring, 0x00000000); /* PC_GS_LAYERED */
1052 
1053    OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5AB, 1);
1054    OUT_RING(ring, 0x00000000); /* UNKNOWN_E5AB */
1055 
1056    OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5C2, 1);
1057    OUT_RING(ring, 0x00000000); /* UNKNOWN_E5C2 */
1058 
1059    OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO(1), 3);
1060    OUT_RING(ring, 0x00000000);
1061    OUT_RING(ring, 0x00000000);
1062    OUT_RING(ring, 0x00000000);
1063 
1064    OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(1), 6);
1065    OUT_RING(ring, 0x00000000);
1066    OUT_RING(ring, 0x00000000);
1067    OUT_RING(ring, 0x00000000);
1068    OUT_RING(ring, 0x00000000);
1069    OUT_RING(ring, 0x00000000);
1070    OUT_RING(ring, 0x00000000);
1071 
1072    OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(2), 6);
1073    OUT_RING(ring, 0x00000000);
1074    OUT_RING(ring, 0x00000000);
1075    OUT_RING(ring, 0x00000000);
1076    OUT_RING(ring, 0x00000000);
1077    OUT_RING(ring, 0x00000000);
1078    OUT_RING(ring, 0x00000000);
1079 
1080    OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(3), 3);
1081    OUT_RING(ring, 0x00000000);
1082    OUT_RING(ring, 0x00000000);
1083    OUT_RING(ring, 0x00000000);
1084 
1085    OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5DB, 1);
1086    OUT_RING(ring, 0x00000000);
1087 
1088    OUT_PKT4(ring, REG_A5XX_SP_HS_CTRL_REG0, 1);
1089    OUT_RING(ring, 0x00000000);
1090 
1091    OUT_PKT4(ring, REG_A5XX_SP_GS_CTRL_REG0, 1);
1092    OUT_RING(ring, 0x00000000);
1093 
1094    OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 4);
1095    OUT_RING(ring, 0x00000000);
1096    OUT_RING(ring, 0x00000000);
1097    OUT_RING(ring, 0x00000000);
1098    OUT_RING(ring, 0x00000000);
1099 
1100    OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 2);
1101    OUT_RING(ring, 0x00000000);
1102    OUT_RING(ring, 0x00000000);
1103 
1104    OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7C0, 3);
1105    OUT_RING(ring, 0x00000000);
1106    OUT_RING(ring, 0x00000000);
1107    OUT_RING(ring, 0x00000000);
1108 
1109    OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7C5, 3);
1110    OUT_RING(ring, 0x00000000);
1111    OUT_RING(ring, 0x00000000);
1112    OUT_RING(ring, 0x00000000);
1113 
1114    OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7CA, 3);
1115    OUT_RING(ring, 0x00000000);
1116    OUT_RING(ring, 0x00000000);
1117    OUT_RING(ring, 0x00000000);
1118 
1119    OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7CF, 3);
1120    OUT_RING(ring, 0x00000000);
1121    OUT_RING(ring, 0x00000000);
1122    OUT_RING(ring, 0x00000000);
1123 
1124    OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7D4, 3);
1125    OUT_RING(ring, 0x00000000);
1126    OUT_RING(ring, 0x00000000);
1127    OUT_RING(ring, 0x00000000);
1128 
1129    OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7D9, 3);
1130    OUT_RING(ring, 0x00000000);
1131    OUT_RING(ring, 0x00000000);
1132    OUT_RING(ring, 0x00000000);
1133 
1134    OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
1135    OUT_RING(ring, 0x00000000);
1136 }
1137 
1138 static void
fd5_mem_to_mem(struct fd_ringbuffer * ring,struct pipe_resource * dst,unsigned dst_off,struct pipe_resource * src,unsigned src_off,unsigned sizedwords)1139 fd5_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst,
1140                unsigned dst_off, struct pipe_resource *src, unsigned src_off,
1141                unsigned sizedwords)
1142 {
1143    struct fd_bo *src_bo = fd_resource(src)->bo;
1144    struct fd_bo *dst_bo = fd_resource(dst)->bo;
1145    unsigned i;
1146 
1147    for (i = 0; i < sizedwords; i++) {
1148       OUT_PKT7(ring, CP_MEM_TO_MEM, 5);
1149       OUT_RING(ring, 0x00000000);
1150       OUT_RELOC(ring, dst_bo, dst_off, 0, 0);
1151       OUT_RELOC(ring, src_bo, src_off, 0, 0);
1152 
1153       dst_off += 4;
1154       src_off += 4;
1155    }
1156 }
1157 
1158 void
fd5_emit_init_screen(struct pipe_screen * pscreen)1159 fd5_emit_init_screen(struct pipe_screen *pscreen)
1160 {
1161    struct fd_screen *screen = fd_screen(pscreen);
1162    screen->emit_ib = fd5_emit_ib;
1163    screen->mem_to_mem = fd5_mem_to_mem;
1164 }
1165 
1166 void
fd5_emit_init(struct pipe_context * pctx)1167 fd5_emit_init(struct pipe_context *pctx)
1168 {
1169 }
1170