xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/i915/i915_state_emit.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2003 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include "i915_batch.h"
29 #include "i915_context.h"
30 #include "i915_debug.h"
31 #include "i915_fpc.h"
32 #include "i915_reg.h"
33 #include "i915_resource.h"
34 
35 #include "pipe/p_context.h"
36 #include "pipe/p_defines.h"
37 #include "util/format/u_formats.h"
38 
39 #include "util/format/u_format.h"
40 #include "util/u_math.h"
41 #include "util/u_memory.h"
42 
43 static void
validate_flush(struct i915_context * i915,unsigned * batch_space)44 validate_flush(struct i915_context *i915, unsigned *batch_space)
45 {
46    *batch_space = i915->flush_dirty ? 1 : 0;
47 }
48 
49 static void
emit_flush(struct i915_context * i915)50 emit_flush(struct i915_context *i915)
51 {
52    /* Cache handling is very cheap atm. State handling can request to flushes:
53     * - I915_FLUSH_CACHE which is a flush everything request and
54     * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush.
55     * Because the cache handling is so dumb, no explicit "invalidate map cache".
56     * Also, the first is a strict superset of the latter, so the following logic
57     * works. */
58    if (i915->flush_dirty & I915_FLUSH_CACHE)
59       OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE);
60    else if (i915->flush_dirty & I915_PIPELINE_FLUSH)
61       OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
62 }
63 
64 uint32_t invariant_state[] = {
65    _3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 |
66       AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0,
67 
68    _3DSTATE_DFLT_DIFFUSE_CMD, 0,
69 
70    _3DSTATE_DFLT_SPEC_CMD, 0,
71 
72    _3DSTATE_DFLT_Z_CMD, 0,
73 
74    _3DSTATE_COORD_SET_BINDINGS | CSB_TCB(0, 0) | CSB_TCB(1, 1) | CSB_TCB(2, 2) |
75       CSB_TCB(3, 3) | CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) |
76       CSB_TCB(7, 7),
77 
78    _3DSTATE_RASTER_RULES_CMD | ENABLE_POINT_RASTER_RULE |
79       OGL_POINT_RASTER_RULE | ENABLE_LINE_STRIP_PROVOKE_VRTX |
80       ENABLE_TRI_FAN_PROVOKE_VRTX | LINE_STRIP_PROVOKE_VRTX(1) |
81       TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D,
82 
83    _3DSTATE_DEPTH_SUBRECT_DISABLE,
84 
85    /* disable indirect state for now
86     */
87    _3DSTATE_LOAD_INDIRECT | 0, 0};
88 
89 static void
emit_invariant(struct i915_context * i915)90 emit_invariant(struct i915_context *i915)
91 {
92    i915_winsys_batchbuffer_write(
93       i915->batch, invariant_state,
94       ARRAY_SIZE(invariant_state) * sizeof(uint32_t));
95 }
96 
97 static void
validate_immediate(struct i915_context * i915,unsigned * batch_space)98 validate_immediate(struct i915_context *i915, unsigned *batch_space)
99 {
100    unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
101                      1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
102                      1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
103                      1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
104                     i915->immediate_dirty;
105 
106    if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0) && i915->vbo)
107       i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo;
108 
109    *batch_space = 1 + util_bitcount(dirty);
110 }
111 
112 static void
emit_immediate_s5(struct i915_context * i915,uint32_t imm)113 emit_immediate_s5(struct i915_context *i915, uint32_t imm)
114 {
115    struct i915_surface *surf = i915_surface(i915->framebuffer.cbufs[0]);
116 
117    if (surf) {
118       uint32_t writemask = imm & S5_WRITEDISABLE_MASK;
119       imm &= ~S5_WRITEDISABLE_MASK;
120 
121       /* The register bits are not in order. */
122       static const uint32_t writedisables[4] = {
123          S5_WRITEDISABLE_RED,
124          S5_WRITEDISABLE_GREEN,
125          S5_WRITEDISABLE_BLUE,
126          S5_WRITEDISABLE_ALPHA,
127       };
128 
129       for (int i = 0; i < 4; i++) {
130          if (writemask & writedisables[surf->color_swizzle[i]])
131             imm |= writedisables[i];
132       }
133    }
134 
135    OUT_BATCH(imm);
136 }
137 
138 static void
emit_immediate(struct i915_context * i915)139 emit_immediate(struct i915_context *i915)
140 {
141    /* remove unwanted bits and S7 */
142    unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
143                      1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
144                      1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
145                      1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
146                     i915->immediate_dirty;
147    int i, num = util_bitcount(dirty);
148    assert(num && num <= I915_MAX_IMMEDIATE);
149 
150    OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | dirty << 4 | (num - 1));
151 
152    if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) {
153       if (i915->vbo)
154          OUT_RELOC(i915->vbo, I915_USAGE_VERTEX,
155                    i915->current.immediate[I915_IMMEDIATE_S0]);
156       else
157          OUT_BATCH(0);
158    }
159 
160    for (i = 1; i < I915_MAX_IMMEDIATE; i++) {
161       if (dirty & (1 << i)) {
162          if (i == I915_IMMEDIATE_S5)
163             emit_immediate_s5(i915, i915->current.immediate[i]);
164          else
165             OUT_BATCH(i915->current.immediate[i]);
166       }
167    }
168 }
169 
170 static void
validate_dynamic(struct i915_context * i915,unsigned * batch_space)171 validate_dynamic(struct i915_context *i915, unsigned *batch_space)
172 {
173    *batch_space =
174       util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1));
175 }
176 
177 static void
emit_dynamic(struct i915_context * i915)178 emit_dynamic(struct i915_context *i915)
179 {
180    int i;
181    for (i = 0; i < I915_MAX_DYNAMIC; i++) {
182       if (i915->dynamic_dirty & (1 << i))
183          OUT_BATCH(i915->current.dynamic[i]);
184    }
185 }
186 
187 static void
validate_static(struct i915_context * i915,unsigned * batch_space)188 validate_static(struct i915_context *i915, unsigned *batch_space)
189 {
190    *batch_space = 0;
191 
192    if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) {
193       i915->validation_buffers[i915->num_validation_buffers++] =
194          i915->current.cbuf_bo;
195       *batch_space += 3;
196    }
197 
198    if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) {
199       i915->validation_buffers[i915->num_validation_buffers++] =
200          i915->current.depth_bo;
201       *batch_space += 3;
202    }
203 
204    if (i915->static_dirty & I915_DST_VARS)
205       *batch_space += 2;
206 
207    if (i915->static_dirty & I915_DST_RECT)
208       *batch_space += 5;
209 }
210 
211 static void
emit_static(struct i915_context * i915)212 emit_static(struct i915_context *i915)
213 {
214    if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) {
215       OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
216       OUT_BATCH(i915->current.cbuf_flags);
217       OUT_RELOC(i915->current.cbuf_bo, I915_USAGE_RENDER, i915->current.cbuf_offset);
218    }
219 
220    /* What happens if no zbuf??
221     */
222    if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) {
223       OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
224       OUT_BATCH(i915->current.depth_flags);
225       OUT_RELOC(i915->current.depth_bo, I915_USAGE_RENDER, 0);
226    }
227 
228    if (i915->static_dirty & I915_DST_VARS) {
229       OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
230       OUT_BATCH(i915->current.dst_buf_vars);
231    }
232 }
233 
234 static void
validate_map(struct i915_context * i915,unsigned * batch_space)235 validate_map(struct i915_context *i915, unsigned *batch_space)
236 {
237    const uint32_t enabled = i915->current.sampler_enable_flags;
238    uint32_t unit;
239    struct i915_texture *tex;
240 
241    *batch_space = i915->current.sampler_enable_nr
242                      ? 2 + 3 * i915->current.sampler_enable_nr
243                      : 0;
244 
245    for (unit = 0; unit < I915_TEX_UNITS; unit++) {
246       if (enabled & (1 << unit)) {
247          tex = i915_texture(i915->fragment_sampler_views[unit]->texture);
248          i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer;
249       }
250    }
251 }
252 
253 static void
emit_map(struct i915_context * i915)254 emit_map(struct i915_context *i915)
255 {
256    const uint32_t nr = i915->current.sampler_enable_nr;
257    if (nr) {
258       const uint32_t enabled = i915->current.sampler_enable_flags;
259       uint32_t unit;
260       uint32_t count = 0;
261       OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
262       OUT_BATCH(enabled);
263       for (unit = 0; unit < I915_TEX_UNITS; unit++) {
264          if (enabled & (1 << unit)) {
265             struct i915_texture *texture =
266                i915_texture(i915->fragment_sampler_views[unit]->texture);
267             struct i915_winsys_buffer *buf = texture->buffer;
268             unsigned offset = i915->current.texbuffer[unit][2];
269 
270             assert(buf);
271 
272             count++;
273 
274             OUT_RELOC(buf, I915_USAGE_SAMPLER, offset);
275             OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */
276             OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */
277          }
278       }
279       assert(count == nr);
280    }
281 }
282 
283 static void
validate_sampler(struct i915_context * i915,unsigned * batch_space)284 validate_sampler(struct i915_context *i915, unsigned *batch_space)
285 {
286    *batch_space = i915->current.sampler_enable_nr
287                      ? 2 + 3 * i915->current.sampler_enable_nr
288                      : 0;
289 }
290 
291 static void
emit_sampler(struct i915_context * i915)292 emit_sampler(struct i915_context *i915)
293 {
294    if (i915->current.sampler_enable_nr) {
295       int i;
296 
297       OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * i915->current.sampler_enable_nr));
298 
299       OUT_BATCH(i915->current.sampler_enable_flags);
300 
301       for (i = 0; i < I915_TEX_UNITS; i++) {
302          if (i915->current.sampler_enable_flags & (1 << i)) {
303             OUT_BATCH(i915->current.sampler[i][0]);
304             OUT_BATCH(i915->current.sampler[i][1]);
305             OUT_BATCH(i915->current.sampler[i][2]);
306          }
307       }
308    }
309 }
310 
311 static void
validate_constants(struct i915_context * i915,unsigned * batch_space)312 validate_constants(struct i915_context *i915, unsigned *batch_space)
313 {
314    int nr = i915->fs->num_constants ? 2 + 4 * i915->fs->num_constants : 0;
315 
316    *batch_space = nr;
317 }
318 
319 static void
emit_constants(struct i915_context * i915)320 emit_constants(struct i915_context *i915)
321 {
322    /* Collate the user-defined constants with the fragment shader's
323     * immediates according to the constant_flags[] array.
324     */
325    const uint32_t nr = i915->fs->num_constants;
326 
327    assert(nr <= I915_MAX_CONSTANT);
328    if (nr) {
329       uint32_t i;
330 
331       OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4));
332       OUT_BATCH((1 << nr) - 1);
333 
334       for (i = 0; i < nr; i++) {
335          const uint32_t *c;
336          if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
337             /* grab user-defined constant */
338             c = (uint32_t *)i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])
339                    ->data;
340             c += 4 * i;
341          } else {
342             /* emit program constant */
343             c = (uint32_t *)i915->fs->constants[i];
344          }
345 #if 0 /* debug */
346          {
347             float *f = (float *) c;
348             printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
349                    (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
350                     ? "user" : "immediate"));
351          }
352 #endif
353          OUT_BATCH(*c++);
354          OUT_BATCH(*c++);
355          OUT_BATCH(*c++);
356          OUT_BATCH(*c++);
357       }
358    }
359 }
360 
361 static void
validate_program(struct i915_context * i915,unsigned * batch_space)362 validate_program(struct i915_context *i915, unsigned *batch_space)
363 {
364    /* we need more batch space if we want to emulate rgba framebuffers */
365    *batch_space = i915->fs->program_len + (i915->current.fixup_swizzle ? 3 : 0);
366 }
367 
368 static void
emit_program(struct i915_context * i915)369 emit_program(struct i915_context *i915)
370 {
371    /* we should always have, at least, a pass-through program */
372    assert(i915->fs->program_len > 0);
373 
374    /* If we're doing a fixup swizzle, that's 3 more dwords to add. */
375    uint32_t additional_size = 0;
376    if (i915->current.fixup_swizzle)
377       additional_size = 3;
378 
379    /* output the program: 1 dword of header, then 3 dwords per decl/instruction */
380    assert(i915->fs->program_len % 3 == 1);
381 
382    /* first word has the size, adjust it for fixup swizzle */
383    OUT_BATCH(i915->fs->program[0] + additional_size);
384 
385    for (int i = 1; i < i915->fs->program_len; i++)
386       OUT_BATCH(i915->fs->program[i]);
387 
388    /* we emit an additional mov with swizzle to fake RGBA framebuffers */
389    if (i915->current.fixup_swizzle) {
390       /* mov out_color, out_color.zyxw */
391       OUT_BATCH(A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) |
392                 A0_DEST_CHANNEL_ALL | (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) |
393                 (T_DIFFUSE << A0_SRC0_NR_SHIFT));
394       OUT_BATCH(i915->current.fixup_swizzle);
395       OUT_BATCH(0);
396    }
397 }
398 
399 static void
emit_draw_rect(struct i915_context * i915)400 emit_draw_rect(struct i915_context *i915)
401 {
402    if (i915->static_dirty & I915_DST_RECT) {
403       OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
404       OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);
405       OUT_BATCH(i915->current.draw_offset);
406       OUT_BATCH(i915->current.draw_size);
407       OUT_BATCH(i915->current.draw_offset);
408    }
409 }
410 
411 static bool
i915_validate_state(struct i915_context * i915,unsigned * batch_space)412 i915_validate_state(struct i915_context *i915, unsigned *batch_space)
413 {
414    unsigned tmp;
415 
416    i915->num_validation_buffers = 0;
417    if (i915->hardware_dirty & I915_HW_INVARIANT)
418       *batch_space = ARRAY_SIZE(invariant_state);
419    else
420       *batch_space = 0;
421 
422 #if 0
423 static int counter_total = 0;
424 #define VALIDATE_ATOM(atom, hw_dirty)                                          \
425    if (i915->hardware_dirty & hw_dirty) {                                      \
426       static int counter_##atom = 0;                                           \
427       validate_##atom(i915, &tmp);                                             \
428       *batch_space += tmp;                                                     \
429       counter_##atom += tmp;                                                   \
430       counter_total += tmp;                                                    \
431       printf("%s: \t%d/%d \t%2.2f\n", #atom, counter_##atom, counter_total,    \
432              counter_##atom * 100.f / counter_total);                          \
433    }
434 #else
435 #define VALIDATE_ATOM(atom, hw_dirty)                                          \
436    if (i915->hardware_dirty & hw_dirty) {                                      \
437       validate_##atom(i915, &tmp);                                             \
438       *batch_space += tmp;                                                     \
439    }
440 #endif
441    VALIDATE_ATOM(flush, I915_HW_FLUSH);
442    VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE);
443    VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC);
444    VALIDATE_ATOM(static, I915_HW_STATIC);
445    VALIDATE_ATOM(map, I915_HW_MAP);
446    VALIDATE_ATOM(sampler, I915_HW_SAMPLER);
447    VALIDATE_ATOM(constants, I915_HW_CONSTANTS);
448    VALIDATE_ATOM(program, I915_HW_PROGRAM);
449 #undef VALIDATE_ATOM
450 
451    if (i915->num_validation_buffers == 0)
452       return true;
453 
454    if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers,
455                                      i915->num_validation_buffers))
456       return false;
457 
458    return true;
459 }
460 
461 /* Push the state into the sarea and/or texture memory.
462  */
463 void
i915_emit_hardware_state(struct i915_context * i915)464 i915_emit_hardware_state(struct i915_context *i915)
465 {
466    unsigned batch_space;
467    uintptr_t save_ptr;
468 
469    assert(i915->dirty == 0);
470 
471    if (I915_DBG_ON(DBG_ATOMS))
472       i915_dump_hardware_dirty(i915, __func__);
473 
474    if (!i915_validate_state(i915, &batch_space)) {
475       FLUSH_BATCH(NULL, I915_FLUSH_ASYNC);
476       assert(i915_validate_state(i915, &batch_space));
477    }
478 
479    if (!BEGIN_BATCH(batch_space)) {
480       FLUSH_BATCH(NULL, I915_FLUSH_ASYNC);
481       assert(i915_validate_state(i915, &batch_space));
482       assert(BEGIN_BATCH(batch_space));
483    }
484 
485    save_ptr = (uintptr_t)i915->batch->ptr;
486 
487 #define EMIT_ATOM(atom, hw_dirty)                                              \
488    if (i915->hardware_dirty & hw_dirty)                                        \
489       emit_##atom(i915);
490    EMIT_ATOM(flush, I915_HW_FLUSH);
491    EMIT_ATOM(invariant, I915_HW_INVARIANT);
492    EMIT_ATOM(immediate, I915_HW_IMMEDIATE);
493    EMIT_ATOM(dynamic, I915_HW_DYNAMIC);
494    EMIT_ATOM(static, I915_HW_STATIC);
495    EMIT_ATOM(map, I915_HW_MAP);
496    EMIT_ATOM(sampler, I915_HW_SAMPLER);
497    EMIT_ATOM(constants, I915_HW_CONSTANTS);
498    EMIT_ATOM(program, I915_HW_PROGRAM);
499    EMIT_ATOM(draw_rect, I915_HW_STATIC);
500 #undef EMIT_ATOM
501 
502    I915_DBG(DBG_EMIT, "%s: used %lu dwords, %d dwords reserved\n", __func__,
503             ((uintptr_t)i915->batch->ptr - save_ptr) / 4, batch_space);
504    assert(((uintptr_t)i915->batch->ptr - save_ptr) / 4 == batch_space);
505 
506    i915->hardware_dirty = 0;
507    i915->immediate_dirty = 0;
508    i915->dynamic_dirty = 0;
509    i915->static_dirty = 0;
510    i915->flush_dirty = 0;
511 }
512