1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_resolve.c
25 *
26 * This file handles resolve tracking for main and auxiliary surfaces.
27 *
28 * It also handles our cache tracking. We have sets for the render cache,
29 * depth cache, and so on. If a BO is in a cache's set, then it may have
30 * data in that cache. The helpers take care of emitting flushes for
31 * render-to-texture, format reinterpretation issues, and other situations.
32 */
33
34 #include "util/hash_table.h"
35 #include "util/set.h"
36 #include "iris_context.h"
37 #include "compiler/nir/nir.h"
38
39 /**
40 * Disable auxiliary buffers if a renderbuffer is also bound as a texture.
41 * This causes a self-dependency, where both rendering and sampling may
42 * concurrently read or write the CCS buffer, causing incorrect pixels.
43 */
44 static bool
disable_rb_aux_buffer(struct iris_context * ice,bool * draw_aux_buffer_disabled,struct iris_resource * tex_res,unsigned min_level,unsigned num_levels,const char * usage)45 disable_rb_aux_buffer(struct iris_context *ice,
46 bool *draw_aux_buffer_disabled,
47 struct iris_resource *tex_res,
48 unsigned min_level, unsigned num_levels,
49 const char *usage)
50 {
51 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
52 bool found = false;
53
54 /* We only need to worry about color compression and fast clears. */
55 if (tex_res->aux.usage != ISL_AUX_USAGE_CCS_D &&
56 tex_res->aux.usage != ISL_AUX_USAGE_CCS_E &&
57 tex_res->aux.usage != ISL_AUX_USAGE_FCV_CCS_E)
58 return false;
59
60 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
61 struct iris_surface *surf = (void *) cso_fb->cbufs[i];
62 if (!surf)
63 continue;
64
65 struct iris_resource *rb_res = (void *) surf->base.texture;
66
67 if (rb_res->bo == tex_res->bo &&
68 surf->base.u.tex.level >= min_level &&
69 surf->base.u.tex.level < min_level + num_levels) {
70 found = draw_aux_buffer_disabled[i] = true;
71 }
72 }
73
74 if (found) {
75 perf_debug(&ice->dbg,
76 "Disabling CCS because a renderbuffer is also bound %s.\n",
77 usage);
78 }
79
80 return found;
81 }
82
83 static void
resolve_sampler_views(struct iris_context * ice,struct iris_batch * batch,struct iris_shader_state * shs,const struct shader_info * info,bool * draw_aux_buffer_disabled,bool consider_framebuffer)84 resolve_sampler_views(struct iris_context *ice,
85 struct iris_batch *batch,
86 struct iris_shader_state *shs,
87 const struct shader_info *info,
88 bool *draw_aux_buffer_disabled,
89 bool consider_framebuffer)
90 {
91 if (info == NULL)
92 return;
93
94 int i;
95 BITSET_FOREACH_SET(i, shs->bound_sampler_views, IRIS_MAX_TEXTURES) {
96 if (!BITSET_TEST(info->textures_used, i))
97 continue;
98
99 struct iris_sampler_view *isv = shs->textures[i];
100
101 if (isv->res->base.b.target != PIPE_BUFFER) {
102 if (consider_framebuffer) {
103 disable_rb_aux_buffer(ice, draw_aux_buffer_disabled, isv->res,
104 isv->view.base_level, isv->view.levels,
105 "for sampling");
106 }
107
108 iris_resource_prepare_texture(ice, isv->res, isv->view.format,
109 isv->view.base_level, isv->view.levels,
110 isv->view.base_array_layer,
111 isv->view.array_len);
112 }
113
114 iris_emit_buffer_barrier_for(batch, isv->res->bo,
115 IRIS_DOMAIN_SAMPLER_READ);
116 }
117 }
118
119 static void
resolve_image_views(struct iris_context * ice,struct iris_batch * batch,struct iris_shader_state * shs,const struct shader_info * info)120 resolve_image_views(struct iris_context *ice,
121 struct iris_batch *batch,
122 struct iris_shader_state *shs,
123 const struct shader_info *info)
124 {
125 if (info == NULL)
126 return;
127
128 const uint64_t images_used =
129 (info->images_used[0] | ((uint64_t)info->images_used[1]) << 32);
130 uint64_t views = shs->bound_image_views & images_used;
131
132 while (views) {
133 const int i = u_bit_scan64(&views);
134 struct pipe_image_view *pview = &shs->image[i].base;
135 struct iris_resource *res = (void *) pview->resource;
136
137 if (res->base.b.target != PIPE_BUFFER) {
138 unsigned num_layers =
139 pview->u.tex.last_layer - pview->u.tex.first_layer + 1;
140
141 enum isl_aux_usage aux_usage =
142 iris_image_view_aux_usage(ice, pview, info);
143
144 enum isl_format view_format = iris_image_view_get_format(ice, pview);
145
146 bool clear_supported = isl_aux_usage_has_fast_clears(aux_usage);
147
148 if (!iris_render_formats_color_compatible(view_format,
149 res->surf.format,
150 res->aux.clear_color,
151 res->aux.clear_color_unknown))
152 clear_supported = false;
153
154 iris_resource_prepare_access(ice, res,
155 pview->u.tex.level, 1,
156 pview->u.tex.first_layer, num_layers,
157 aux_usage, clear_supported);
158
159 shs->image_aux_usage[i] = aux_usage;
160 } else {
161 shs->image_aux_usage[i] = ISL_AUX_USAGE_NONE;
162 }
163
164 iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_DATA_WRITE);
165 }
166 }
167
168 /**
169 * \brief Resolve buffers before drawing.
170 *
171 * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
172 * enabled depth texture, and flush the render cache for any dirty textures.
173 */
174 void
iris_predraw_resolve_inputs(struct iris_context * ice,struct iris_batch * batch,bool * draw_aux_buffer_disabled,gl_shader_stage stage,bool consider_framebuffer)175 iris_predraw_resolve_inputs(struct iris_context *ice,
176 struct iris_batch *batch,
177 bool *draw_aux_buffer_disabled,
178 gl_shader_stage stage,
179 bool consider_framebuffer)
180 {
181 struct iris_shader_state *shs = &ice->state.shaders[stage];
182 const struct shader_info *info = iris_get_shader_info(ice, stage);
183
184 uint64_t stage_dirty = (IRIS_STAGE_DIRTY_BINDINGS_VS << stage) |
185 (consider_framebuffer ? IRIS_STAGE_DIRTY_BINDINGS_FS : 0);
186
187 if (ice->state.stage_dirty & stage_dirty) {
188 resolve_sampler_views(ice, batch, shs, info, draw_aux_buffer_disabled,
189 consider_framebuffer);
190 resolve_image_views(ice, batch, shs, info);
191 }
192 }
193
194 void
iris_predraw_resolve_framebuffer(struct iris_context * ice,struct iris_batch * batch,bool * draw_aux_buffer_disabled)195 iris_predraw_resolve_framebuffer(struct iris_context *ice,
196 struct iris_batch *batch,
197 bool *draw_aux_buffer_disabled)
198 {
199 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
200 struct iris_screen *screen = (void *) ice->ctx.screen;
201 const struct intel_device_info *devinfo = screen->devinfo;
202 struct iris_uncompiled_shader *ish =
203 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
204 const nir_shader *nir = ish->nir;
205
206 if (ice->state.dirty & IRIS_DIRTY_DEPTH_BUFFER) {
207 struct pipe_surface *zs_surf = cso_fb->zsbuf;
208
209 if (zs_surf) {
210 struct iris_resource *z_res, *s_res;
211 iris_get_depth_stencil_resources(zs_surf->texture, &z_res, &s_res);
212 unsigned num_layers =
213 zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1;
214
215 if (z_res) {
216 iris_resource_prepare_render(ice, z_res, z_res->surf.format,
217 zs_surf->u.tex.level,
218 zs_surf->u.tex.first_layer,
219 num_layers, ice->state.hiz_usage);
220 iris_emit_buffer_barrier_for(batch, z_res->bo,
221 IRIS_DOMAIN_DEPTH_WRITE);
222 }
223
224 if (s_res) {
225 iris_emit_buffer_barrier_for(batch, s_res->bo,
226 IRIS_DOMAIN_DEPTH_WRITE);
227 }
228 }
229 }
230
231 if (devinfo->ver == 8 && nir->info.outputs_read != 0) {
232 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
233 if (cso_fb->cbufs[i]) {
234 struct iris_surface *surf = (void *) cso_fb->cbufs[i];
235 struct iris_resource *res = (void *) cso_fb->cbufs[i]->texture;
236
237 iris_resource_prepare_texture(ice, res, surf->view.format,
238 surf->view.base_level, 1,
239 surf->view.base_array_layer,
240 surf->view.array_len);
241 }
242 }
243 }
244
245 if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_FS) {
246 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
247 struct iris_surface *surf = (void *) cso_fb->cbufs[i];
248 if (!surf)
249 continue;
250
251 struct iris_resource *res = (void *) surf->base.texture;
252
253 /* Undocumented workaround:
254 *
255 * Disable auxiliary buffer if MSRT is bound as texture.
256 */
257 if (intel_device_info_is_dg2(devinfo) && res->surf.samples > 1 &&
258 nir->info.outputs_read != 0)
259 draw_aux_buffer_disabled[i] = true;
260
261 /* Xe2 can maintain compression if RT is bound as texture. */
262 if (devinfo->ver >= 20)
263 draw_aux_buffer_disabled[i] = false;
264
265 enum isl_aux_usage aux_usage =
266 iris_resource_render_aux_usage(ice, res, surf->view.format,
267 surf->view.base_level,
268 draw_aux_buffer_disabled[i]);
269
270 if (ice->state.draw_aux_usage[i] != aux_usage) {
271 ice->state.draw_aux_usage[i] = aux_usage;
272 /* XXX: Need to track which bindings to make dirty */
273 ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER;
274 ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS;
275 }
276
277 iris_resource_prepare_render(ice, res, surf->view.format,
278 surf->view.base_level,
279 surf->view.base_array_layer,
280 surf->view.array_len,
281 aux_usage);
282
283 iris_emit_buffer_barrier_for(batch, res->bo,
284 IRIS_DOMAIN_RENDER_WRITE);
285 }
286 }
287 }
288
289 void
iris_postdraw_update_image_resolve_tracking(struct iris_context * ice,gl_shader_stage stage)290 iris_postdraw_update_image_resolve_tracking(struct iris_context *ice,
291 gl_shader_stage stage)
292 {
293 struct iris_screen *screen = (void *) ice->ctx.screen;
294 ASSERTED const struct intel_device_info *devinfo = screen->devinfo;
295
296 assert(devinfo->ver >= 12);
297
298 const struct iris_shader_state *shs = &ice->state.shaders[stage];
299 const struct shader_info *info = iris_get_shader_info(ice, stage);
300
301 const uint64_t images_used = !info ? 0 :
302 (info->images_used[0] | ((uint64_t)info->images_used[1]) << 32);
303 uint64_t views = shs->bound_image_views & images_used;
304
305 while (views) {
306 const int i = u_bit_scan64(&views);
307 const struct pipe_image_view *pview = &shs->image[i].base;
308 struct iris_resource *res = (void *) pview->resource;
309
310 if (pview->shader_access & PIPE_IMAGE_ACCESS_WRITE &&
311 res->base.b.target != PIPE_BUFFER) {
312 unsigned num_layers =
313 pview->u.tex.last_layer - pview->u.tex.first_layer + 1;
314
315 iris_resource_finish_write(ice, res, pview->u.tex.level,
316 pview->u.tex.first_layer, num_layers,
317 shs->image_aux_usage[i]);
318 }
319 }
320 }
321
322 /**
323 * \brief Call this after drawing to mark which buffers need resolving
324 *
325 * If the depth buffer was written to and if it has an accompanying HiZ
326 * buffer, then mark that it needs a depth resolve.
327 *
328 * If the color buffer is a multisample window system buffer, then
329 * mark that it needs a downsample.
330 *
331 * Also mark any render targets which will be textured as needing a render
332 * cache flush.
333 */
334 void
iris_postdraw_update_resolve_tracking(struct iris_context * ice)335 iris_postdraw_update_resolve_tracking(struct iris_context *ice)
336 {
337 struct iris_screen *screen = (void *) ice->ctx.screen;
338 const struct intel_device_info *devinfo = screen->devinfo;
339 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
340
341 // XXX: front buffer drawing?
342
343 bool may_have_resolved_depth =
344 ice->state.dirty & (IRIS_DIRTY_DEPTH_BUFFER |
345 IRIS_DIRTY_WM_DEPTH_STENCIL);
346
347 struct pipe_surface *zs_surf = cso_fb->zsbuf;
348 if (zs_surf) {
349 struct iris_resource *z_res, *s_res;
350 iris_get_depth_stencil_resources(zs_surf->texture, &z_res, &s_res);
351 unsigned num_layers =
352 zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1;
353
354 if (z_res) {
355 if (may_have_resolved_depth && ice->state.depth_writes_enabled) {
356 iris_resource_finish_render(ice, z_res, zs_surf->u.tex.level,
357 zs_surf->u.tex.first_layer,
358 num_layers, ice->state.hiz_usage);
359 }
360 }
361
362 if (s_res) {
363 if (may_have_resolved_depth && ice->state.stencil_writes_enabled) {
364 iris_resource_finish_write(ice, s_res, zs_surf->u.tex.level,
365 zs_surf->u.tex.first_layer, num_layers,
366 s_res->aux.usage);
367 }
368 }
369 }
370
371 bool may_have_resolved_color =
372 ice->state.stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_FS;
373
374 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
375 struct iris_surface *surf = (void *) cso_fb->cbufs[i];
376 if (!surf)
377 continue;
378
379 struct iris_resource *res = (void *) surf->base.texture;
380 enum isl_aux_usage aux_usage = ice->state.draw_aux_usage[i];
381
382 if (may_have_resolved_color) {
383 union pipe_surface_desc *desc = &surf->base.u;
384 unsigned num_layers =
385 desc->tex.last_layer - desc->tex.first_layer + 1;
386 iris_resource_finish_render(ice, res, desc->tex.level,
387 desc->tex.first_layer, num_layers,
388 aux_usage);
389 }
390 }
391
392 if (devinfo->ver >= 12) {
393 for (gl_shader_stage stage = 0; stage < MESA_SHADER_COMPUTE; stage++) {
394 iris_postdraw_update_image_resolve_tracking(ice, stage);
395 }
396 }
397 }
398
399 static void
flush_previous_aux_mode(struct iris_batch * batch,const struct iris_bo * bo,enum isl_aux_usage aux_usage)400 flush_previous_aux_mode(struct iris_batch *batch,
401 const struct iris_bo *bo,
402 enum isl_aux_usage aux_usage)
403 {
404 /* Check to see if this BO has been put into caches by a previous operation
405 * but with a different aux usage. If it has, flush those caches to ensure
406 * that it's only in there with one aux usage at a time.
407 *
408 * Even though it's not obvious, this could easily happen in practice.
409 * Suppose a client is blending on a surface with sRGB encode enabled on
410 * gfx9. This implies that you get AUX_USAGE_CCS_D at best. If the client
411 * then disables sRGB decode and continues blending we could flip on
412 * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is
413 * perfectly valid since CCS_E is a subset of CCS_D). However, this means
414 * that we have fragments in-flight which are rendering with UNORM+CCS_E
415 * and other fragments in-flight with SRGB+CCS_D on the same surface at the
416 * same time and the pixel scoreboard and color blender are trying to sort
417 * it all out. This ends badly (i.e. GPU hangs).
418 *
419 * There are comments in various docs which indicate that the render cache
420 * isn't 100% resilient to format changes. However, to date, we have never
421 * observed GPU hangs or even corruption to be associated with switching the
422 * format, only the aux usage. So we let that slide for now.
423 *
424 * We haven't seen issues on gfx12 hardware when switching between
425 * FCV_CCS_E and plain CCS_E. A switch could indicate a transition in
426 * accessing data through a different cache domain. The flushes and
427 * invalidates that come from the cache tracker and memory barrier
428 * functions seem to be enough to handle this. Treat the two as equivalent
429 * to avoid extra cache flushing.
430 */
431 void *v_aux_usage = (void *) (uintptr_t)
432 (aux_usage == ISL_AUX_USAGE_FCV_CCS_E ?
433 ISL_AUX_USAGE_CCS_E : aux_usage);
434
435 struct hash_entry *entry =
436 _mesa_hash_table_search_pre_hashed(batch->bo_aux_modes, bo->hash, bo);
437 if (!entry) {
438 _mesa_hash_table_insert_pre_hashed(batch->bo_aux_modes, bo->hash, bo,
439 v_aux_usage);
440 } else if (entry->data != v_aux_usage) {
441 iris_emit_pipe_control_flush(batch,
442 "cache tracker: aux usage mismatch",
443 PIPE_CONTROL_RENDER_TARGET_FLUSH |
444 PIPE_CONTROL_TILE_CACHE_FLUSH |
445 PIPE_CONTROL_CS_STALL);
446 entry->data = v_aux_usage;
447 }
448 }
449
450 static void
flush_ubos(struct iris_batch * batch,struct iris_shader_state * shs)451 flush_ubos(struct iris_batch *batch,
452 struct iris_shader_state *shs)
453 {
454 uint32_t cbufs = shs->dirty_cbufs & shs->bound_cbufs;
455
456 while (cbufs) {
457 const int i = u_bit_scan(&cbufs);
458 struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
459 struct iris_resource *res = (void *)cbuf->buffer;
460 iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_PULL_CONSTANT_READ);
461 }
462
463 shs->dirty_cbufs = 0;
464 }
465
466 static void
flush_ssbos(struct iris_batch * batch,struct iris_shader_state * shs)467 flush_ssbos(struct iris_batch *batch,
468 struct iris_shader_state *shs)
469 {
470 uint32_t ssbos = shs->bound_ssbos;
471
472 while (ssbos) {
473 const int i = u_bit_scan(&ssbos);
474 struct pipe_shader_buffer *ssbo = &shs->ssbo[i];
475 struct iris_resource *res = (void *)ssbo->buffer;
476 iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_DATA_WRITE);
477 }
478 }
479
480 void
iris_predraw_flush_buffers(struct iris_context * ice,struct iris_batch * batch,gl_shader_stage stage)481 iris_predraw_flush_buffers(struct iris_context *ice,
482 struct iris_batch *batch,
483 gl_shader_stage stage)
484 {
485 struct iris_shader_state *shs = &ice->state.shaders[stage];
486
487 if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << stage))
488 flush_ubos(batch, shs);
489
490 if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_BINDINGS_VS << stage))
491 flush_ssbos(batch, shs);
492
493 if (ice->state.streamout_active &&
494 (ice->state.dirty & IRIS_DIRTY_SO_BUFFERS)) {
495 for (int i = 0; i < 4; i++) {
496 struct iris_stream_output_target *tgt = (void *)ice->state.so_target[i];
497 if (tgt) {
498 struct iris_bo *bo = iris_resource_bo(tgt->base.buffer);
499 iris_emit_buffer_barrier_for(batch, bo, IRIS_DOMAIN_OTHER_WRITE);
500 }
501 }
502 }
503 }
504
505 static void
iris_resolve_color(struct iris_context * ice,struct iris_batch * batch,struct iris_resource * res,unsigned level,unsigned layer,enum isl_aux_op resolve_op)506 iris_resolve_color(struct iris_context *ice,
507 struct iris_batch *batch,
508 struct iris_resource *res,
509 unsigned level, unsigned layer,
510 enum isl_aux_op resolve_op)
511 {
512 //DBG("%s to mt %p level %u layer %u\n", __func__, mt, level, layer);
513
514 struct blorp_surf surf;
515 iris_blorp_surf_for_resource(batch, &surf, &res->base.b,
516 res->aux.usage, level, true);
517
518 iris_batch_maybe_flush(batch, 1500);
519
520 /* Ivybridge PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
521 *
522 * "Any transition from any value in {Clear, Render, Resolve} to a
523 * different value in {Clear, Render, Resolve} requires end of pipe
524 * synchronization."
525 *
526 * In other words, fast clear ops are not properly synchronized with
527 * other drawing. We need to use a PIPE_CONTROL to ensure that the
528 * contents of the previous draw hit the render target before we resolve
529 * and again afterwards to ensure that the resolve is complete before we
530 * do any more regular drawing.
531 */
532 iris_emit_end_of_pipe_sync(batch, "color resolve: pre-flush",
533 PIPE_CONTROL_RENDER_TARGET_FLUSH);
534
535 if (intel_needs_workaround(batch->screen->devinfo, 1508744258)) {
536 /* The suggested workaround is:
537 *
538 * Disable RHWO by setting 0x7010[14] by default except during resolve
539 * pass.
540 *
541 * We implement global disabling of the RHWO optimization during
542 * iris_init_render_context. We toggle it around the blorp resolve call.
543 */
544 assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE ||
545 resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE);
546 batch->screen->vtbl.disable_rhwo_optimization(batch, false);
547 }
548
549 iris_batch_sync_region_start(batch);
550 struct blorp_batch blorp_batch;
551 blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
552 blorp_ccs_resolve(&blorp_batch, &surf, level, layer, 1, res->surf.format,
553 resolve_op);
554 blorp_batch_finish(&blorp_batch);
555
556 /* See comment above */
557 iris_emit_end_of_pipe_sync(batch, "color resolve: post-flush",
558 PIPE_CONTROL_RENDER_TARGET_FLUSH);
559
560 if (intel_needs_workaround(batch->screen->devinfo, 1508744258)) {
561 batch->screen->vtbl.disable_rhwo_optimization(batch, true);
562 }
563
564 iris_batch_sync_region_end(batch);
565 }
566
567 static void
iris_mcs_exec(struct iris_context * ice,struct iris_batch * batch,struct iris_resource * res,uint32_t start_layer,uint32_t num_layers,enum isl_aux_op op)568 iris_mcs_exec(struct iris_context *ice,
569 struct iris_batch *batch,
570 struct iris_resource *res,
571 uint32_t start_layer,
572 uint32_t num_layers,
573 enum isl_aux_op op)
574 {
575 //DBG("%s to mt %p layers %u-%u\n", __func__, mt,
576 //start_layer, start_layer + num_layers - 1);
577
578 assert(isl_aux_usage_has_mcs(res->aux.usage));
579
580 iris_batch_maybe_flush(batch, 1500);
581
582 struct blorp_surf surf;
583 iris_blorp_surf_for_resource(batch, &surf, &res->base.b,
584 res->aux.usage, 0, true);
585
586 /* MCS partial resolve will read from the MCS surface. */
587 assert(res->aux.bo == res->bo);
588 iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_SAMPLER_READ);
589 iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_RENDER_WRITE);
590
591 struct blorp_batch blorp_batch;
592 iris_batch_sync_region_start(batch);
593 blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
594
595 if (op == ISL_AUX_OP_PARTIAL_RESOLVE) {
596 blorp_mcs_partial_resolve(&blorp_batch, &surf, res->surf.format,
597 start_layer, num_layers);
598 } else if (op == ISL_AUX_OP_FULL_RESOLVE) {
599 /* Simply copy compressed surface to uncompressed surface in order to do
600 * the full resolve.
601 */
602 struct blorp_surf src_surf, dst_surf;
603 iris_blorp_surf_for_resource(batch, &src_surf, &res->base.b,
604 res->aux.usage, 0, false);
605 iris_blorp_surf_for_resource(batch, &dst_surf, &res->base.b,
606 ISL_AUX_USAGE_NONE, 0, true);
607
608 blorp_copy(&blorp_batch, &src_surf, 0, 0, &dst_surf, 0, 0,
609 0, 0, 0, 0, surf.surf->logical_level0_px.width,
610 surf.surf->logical_level0_px.height);
611 } else {
612 assert(op == ISL_AUX_OP_AMBIGUATE);
613 blorp_mcs_ambiguate(&blorp_batch, &surf, start_layer, num_layers);
614 }
615
616 blorp_batch_finish(&blorp_batch);
617 iris_batch_sync_region_end(batch);
618 }
619
620 bool
iris_sample_with_depth_aux(const struct intel_device_info * devinfo,const struct iris_resource * res)621 iris_sample_with_depth_aux(const struct intel_device_info *devinfo,
622 const struct iris_resource *res)
623 {
624 switch (res->aux.usage) {
625 case ISL_AUX_USAGE_HIZ_CCS_WT:
626 /* Always support sampling with HIZ_CCS_WT. Although the sampler
627 * doesn't comprehend HiZ, write-through means that the correct data
628 * will be in the CCS, and the sampler can simply rely on that.
629 */
630 return true;
631 case ISL_AUX_USAGE_HIZ_CCS:
632 /* Without write-through, the CCS data may be out of sync with HiZ
633 * and the sampler won't see the correct data. Skip both.
634 */
635 return false;
636 case ISL_AUX_USAGE_HIZ:
637 /* From the Broadwell PRM (Volume 2d: Command Reference: Structures
638 * RENDER_SURFACE_STATE.AuxiliarySurfaceMode):
639 *
640 * "If this field is set to AUX_HIZ, Number of Multisamples must be
641 * MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D.
642 *
643 * There is no such blurb for 1D textures, but there is sufficient
644 * evidence that this is broken on SKL+.
645 */
646 if (!devinfo->has_sample_with_hiz ||
647 res->surf.samples != 1 ||
648 res->surf.dim != ISL_SURF_DIM_2D)
649 return false;
650
651 /* Make sure that HiZ exists for all necessary miplevels. */
652 for (unsigned level = 0; level < res->surf.levels; ++level) {
653 if (!iris_resource_level_has_hiz(devinfo, res, level))
654 return false;
655 }
656
657 /* We can sample directly from HiZ in this case. */
658 return true;
659 default:
660 return false;
661 }
662 }
663
664 /**
665 * Perform a HiZ or depth resolve operation.
666 *
667 * For an overview of HiZ ops, see the following sections of the Sandy Bridge
668 * PRM, Volume 1, Part 2:
669 * - 7.5.3.1 Depth Buffer Clear
670 * - 7.5.3.2 Depth Buffer Resolve
671 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
672 */
673 void
iris_hiz_exec(struct iris_context * ice,struct iris_batch * batch,struct iris_resource * res,unsigned int level,unsigned int start_layer,unsigned int num_layers,enum isl_aux_op op)674 iris_hiz_exec(struct iris_context *ice,
675 struct iris_batch *batch,
676 struct iris_resource *res,
677 unsigned int level, unsigned int start_layer,
678 unsigned int num_layers, enum isl_aux_op op)
679 {
680 ASSERTED const struct intel_device_info *devinfo = batch->screen->devinfo;
681
682 assert(iris_resource_level_has_hiz(devinfo, res, level));
683 assert(op != ISL_AUX_OP_NONE);
684 UNUSED const char *name = NULL;
685
686 iris_batch_maybe_flush(batch, 1500);
687
688 switch (op) {
689 case ISL_AUX_OP_FULL_RESOLVE:
690 name = "depth resolve";
691 break;
692 case ISL_AUX_OP_AMBIGUATE:
693 name = "hiz ambiguate";
694 break;
695 case ISL_AUX_OP_FAST_CLEAR:
696 name = "depth clear";
697 break;
698 case ISL_AUX_OP_PARTIAL_RESOLVE:
699 case ISL_AUX_OP_NONE:
700 unreachable("Invalid HiZ op");
701 }
702
703 //DBG("%s %s to mt %p level %d layers %d-%d\n",
704 //__func__, name, mt, level, start_layer, start_layer + num_layers - 1);
705
706 /* A data cache flush is not suggested by HW docs, but we found it to fix
707 * a number of failures.
708 */
709 unsigned wa_flush = devinfo->verx10 >= 125 &&
710 res->aux.usage == ISL_AUX_USAGE_HIZ_CCS ?
711 PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
712
713 /* The following stalls and flushes are only documented to be required
714 * for HiZ clear operations. However, they also seem to be required for
715 * resolve operations.
716 *
717 * From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
718 *
719 * "If other rendering operations have preceded this clear, a
720 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
721 * enabled must be issued before the rectangle primitive used for
722 * the depth buffer clear operation."
723 *
724 * Same applies for Gfx8 and Gfx9.
725 */
726 iris_emit_pipe_control_flush(batch,
727 "hiz op: pre-flush",
728 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
729 wa_flush |
730 PIPE_CONTROL_DEPTH_STALL |
731 PIPE_CONTROL_CS_STALL);
732
733 iris_batch_sync_region_start(batch);
734
735 struct blorp_surf surf;
736 iris_blorp_surf_for_resource(batch, &surf, &res->base.b,
737 res->aux.usage, level, true);
738
739 struct blorp_batch blorp_batch;
740 blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
741 blorp_hiz_op(&blorp_batch, &surf, level, start_layer, num_layers, op);
742 blorp_batch_finish(&blorp_batch);
743
744 /* For gfx8-11, the following stalls and flushes are only documented to be
745 * required for HiZ clear operations. However, they also seem to be
746 * required for resolve operations.
747 *
748 * From the Broadwell PRM, volume 7, "Depth Buffer Clear":
749 *
750 * "Depth buffer clear pass using any of the methods (WM_STATE,
751 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a
752 * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits
753 * "set" before starting to render. DepthStall and DepthFlush are
754 * not needed between consecutive depth clear passes nor is it
755 * required if the depth clear pass was done with
756 * 'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP."
757 *
758 * TODO: Such as the spec says, this could be conditional.
759 *
760 * From Bspec 46959, a programming note applicable to Gfx12+:
761 *
762 * " Since HZ_OP has to be sent twice (first time set the clear/resolve
763 * state and 2nd time to clear the state), and HW internally flushes the
764 * depth cache on HZ_OP, there is no need to explicitly send a Depth
765 * Cache flush after Clear or Resolve."
766 */
767 if (devinfo->verx10 < 120) {
768 iris_emit_pipe_control_flush(batch,
769 "hiz op: post flush",
770 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
771 PIPE_CONTROL_DEPTH_STALL);
772 }
773
774 iris_batch_sync_region_end(batch);
775 }
776
777 /**
778 * Does the resource's slice have hiz enabled?
779 */
780 bool
iris_resource_level_has_hiz(const struct intel_device_info * devinfo,const struct iris_resource * res,uint32_t level)781 iris_resource_level_has_hiz(const struct intel_device_info *devinfo,
782 const struct iris_resource *res, uint32_t level)
783 {
784 iris_resource_check_level_layer(res, level, 0);
785
786 if (!isl_aux_usage_has_hiz(res->aux.usage))
787 return false;
788
789 /* Disable HiZ for LOD > 0 unless the width/height are 8x4 aligned.
790 * For LOD == 0, we can grow the dimensions to make it work.
791 *
792 * This doesn't appear to be necessary on Gfx11+. See details here:
793 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/3788
794 */
795 if (devinfo->ver < 11 && level > 0) {
796 if (u_minify(res->base.b.width0, level) & 7)
797 return false;
798
799 if (u_minify(res->base.b.height0, level) & 3)
800 return false;
801 }
802
803 return true;
804 }
805
806 /** \brief Assert that the level and layer are valid for the resource. */
807 void
iris_resource_check_level_layer(UNUSED const struct iris_resource * res,UNUSED uint32_t level,UNUSED uint32_t layer)808 iris_resource_check_level_layer(UNUSED const struct iris_resource *res,
809 UNUSED uint32_t level, UNUSED uint32_t layer)
810 {
811 assert(level < res->surf.levels);
812 assert(layer < util_num_layers(&res->base.b, level));
813 }
814
815 static inline uint32_t
miptree_level_range_length(const struct iris_resource * res,uint32_t start_level,uint32_t num_levels)816 miptree_level_range_length(const struct iris_resource *res,
817 uint32_t start_level, uint32_t num_levels)
818 {
819 assert(start_level < res->surf.levels);
820
821 if (num_levels == INTEL_REMAINING_LEVELS)
822 num_levels = res->surf.levels - start_level;
823
824 /* Check for overflow */
825 assert(start_level + num_levels >= start_level);
826 assert(start_level + num_levels <= res->surf.levels);
827
828 return num_levels;
829 }
830
831 static inline uint32_t
miptree_layer_range_length(const struct iris_resource * res,uint32_t level,uint32_t start_layer,uint32_t num_layers)832 miptree_layer_range_length(const struct iris_resource *res, uint32_t level,
833 uint32_t start_layer, uint32_t num_layers)
834 {
835 assert(level <= res->base.b.last_level);
836
837 const uint32_t total_num_layers = iris_get_num_logical_layers(res, level);
838 assert(start_layer < total_num_layers);
839 if (num_layers == INTEL_REMAINING_LAYERS)
840 num_layers = total_num_layers - start_layer;
841 /* Check for overflow */
842 assert(start_layer + num_layers >= start_layer);
843 assert(start_layer + num_layers <= total_num_layers);
844
845 return num_layers;
846 }
847
848 bool
iris_has_invalid_primary(const struct iris_resource * res,unsigned start_level,unsigned num_levels,unsigned start_layer,unsigned num_layers)849 iris_has_invalid_primary(const struct iris_resource *res,
850 unsigned start_level, unsigned num_levels,
851 unsigned start_layer, unsigned num_layers)
852 {
853 if (res->aux.usage == ISL_AUX_USAGE_NONE)
854 return false;
855
856 /* Clamp the level range to fit the resource */
857 num_levels = miptree_level_range_length(res, start_level, num_levels);
858
859 for (uint32_t l = 0; l < num_levels; l++) {
860 const uint32_t level = start_level + l;
861 const uint32_t level_layers =
862 miptree_layer_range_length(res, level, start_layer, num_layers);
863 for (unsigned a = 0; a < level_layers; a++) {
864 enum isl_aux_state aux_state =
865 iris_resource_get_aux_state(res, level, start_layer + a);
866 if (!isl_aux_state_has_valid_primary(aux_state))
867 return true;
868 }
869 }
870
871 return false;
872 }
873
874 void
iris_resource_prepare_access(struct iris_context * ice,struct iris_resource * res,uint32_t start_level,uint32_t num_levels,uint32_t start_layer,uint32_t num_layers,enum isl_aux_usage aux_usage,bool fast_clear_supported)875 iris_resource_prepare_access(struct iris_context *ice,
876 struct iris_resource *res,
877 uint32_t start_level, uint32_t num_levels,
878 uint32_t start_layer, uint32_t num_layers,
879 enum isl_aux_usage aux_usage,
880 bool fast_clear_supported)
881 {
882 if (res->aux.usage == ISL_AUX_USAGE_NONE)
883 return;
884
885 /* We can't do resolves on the compute engine, so awkwardly, we have to
886 * do them on the render batch...
887 */
888 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
889
890 const uint32_t clamped_levels =
891 miptree_level_range_length(res, start_level, num_levels);
892 for (uint32_t l = 0; l < clamped_levels; l++) {
893 const uint32_t level = start_level + l;
894 const uint32_t level_layers =
895 miptree_layer_range_length(res, level, start_layer, num_layers);
896 for (uint32_t a = 0; a < level_layers; a++) {
897 const uint32_t layer = start_layer + a;
898 const enum isl_aux_state aux_state =
899 iris_resource_get_aux_state(res, level, layer);
900 const enum isl_aux_op aux_op =
901 isl_aux_prepare_access(aux_state, aux_usage, fast_clear_supported);
902
903 /* Prepare the aux buffer for a conditional or unconditional access.
904 * A conditional access is handled by assuming that the access will
905 * not evaluate to a no-op. If the access does in fact occur, the aux
906 * will be in the required state. If it does not, no data is lost
907 * because the aux_op performed is lossless.
908 */
909 if (aux_op == ISL_AUX_OP_NONE) {
910 /* Nothing to do here. */
911 } else if (isl_aux_usage_has_mcs(res->aux.usage)) {
912 iris_mcs_exec(ice, batch, res, layer, 1, aux_op);
913 } else if (isl_aux_usage_has_hiz(res->aux.usage)) {
914 iris_hiz_exec(ice, batch, res, level, layer, 1, aux_op);
915 } else if (res->aux.usage == ISL_AUX_USAGE_STC_CCS) {
916 unreachable("iris doesn't resolve STC_CCS resources");
917 } else {
918 assert(isl_aux_usage_has_ccs(res->aux.usage));
919 iris_resolve_color(ice, batch, res, level, layer, aux_op);
920 }
921
922 const enum isl_aux_state new_state =
923 isl_aux_state_transition_aux_op(aux_state, res->aux.usage, aux_op);
924 iris_resource_set_aux_state(ice, res, level, layer, 1, new_state);
925 }
926 }
927
928 flush_previous_aux_mode(batch, res->bo, aux_usage);
929 }
930
931 void
iris_resource_finish_write(struct iris_context * ice,struct iris_resource * res,uint32_t level,uint32_t start_layer,uint32_t num_layers,enum isl_aux_usage aux_usage)932 iris_resource_finish_write(struct iris_context *ice,
933 struct iris_resource *res, uint32_t level,
934 uint32_t start_layer, uint32_t num_layers,
935 enum isl_aux_usage aux_usage)
936 {
937 if (res->aux.usage == ISL_AUX_USAGE_NONE)
938 return;
939
940 const uint32_t level_layers =
941 miptree_layer_range_length(res, level, start_layer, num_layers);
942
943 for (uint32_t a = 0; a < level_layers; a++) {
944 const uint32_t layer = start_layer + a;
945 const enum isl_aux_state aux_state =
946 iris_resource_get_aux_state(res, level, layer);
947
948 /* Transition the aux state for a conditional or unconditional write. A
949 * conditional write is handled by assuming that the write applies to
950 * only part of the render target. This prevents the new state from
951 * losing the types of compression that might exist in the current state
952 * (e.g. CLEAR). If the write evaluates to a no-op, the state will still
953 * be able to communicate when resolves are necessary (but it may
954 * falsely communicate this as well).
955 */
956 const enum isl_aux_state new_aux_state =
957 isl_aux_state_transition_write(aux_state, aux_usage, false);
958
959 iris_resource_set_aux_state(ice, res, level, layer, 1, new_aux_state);
960 }
961 }
962
963 enum isl_aux_state
iris_resource_get_aux_state(const struct iris_resource * res,uint32_t level,uint32_t layer)964 iris_resource_get_aux_state(const struct iris_resource *res,
965 uint32_t level, uint32_t layer)
966 {
967 iris_resource_check_level_layer(res, level, layer);
968
969 if (res->surf.usage & ISL_SURF_USAGE_DEPTH_BIT) {
970 assert(isl_aux_usage_has_hiz(res->aux.usage));
971 } else {
972 assert(res->surf.samples == 1 ||
973 res->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
974 }
975
976 return res->aux.state[level][layer];
977 }
978
979 void
iris_resource_set_aux_state(struct iris_context * ice,struct iris_resource * res,uint32_t level,uint32_t start_layer,uint32_t num_layers,enum isl_aux_state aux_state)980 iris_resource_set_aux_state(struct iris_context *ice,
981 struct iris_resource *res, uint32_t level,
982 uint32_t start_layer, uint32_t num_layers,
983 enum isl_aux_state aux_state)
984 {
985 struct iris_screen *screen = (void *) ice->ctx.screen;
986 ASSERTED const struct intel_device_info *devinfo = screen->devinfo;
987
988 num_layers = miptree_layer_range_length(res, level, start_layer, num_layers);
989
990 if (res->surf.usage & ISL_SURF_USAGE_DEPTH_BIT) {
991 assert(iris_resource_level_has_hiz(devinfo, res, level) ||
992 !isl_aux_state_has_valid_aux(aux_state));
993 } else {
994 assert(res->surf.samples == 1 ||
995 res->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
996 }
997
998 for (unsigned a = 0; a < num_layers; a++) {
999 if (res->aux.state[level][start_layer + a] != aux_state) {
1000 res->aux.state[level][start_layer + a] = aux_state;
1001 /* XXX: Need to track which bindings to make dirty */
1002 ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER |
1003 IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES |
1004 IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES;
1005 ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS;
1006 }
1007 }
1008
1009 if (res->mod_info && !res->mod_info->supports_clear_color) {
1010 assert(isl_drm_modifier_has_aux(res->mod_info->modifier));
1011 if (aux_state == ISL_AUX_STATE_CLEAR ||
1012 aux_state == ISL_AUX_STATE_COMPRESSED_CLEAR ||
1013 aux_state == ISL_AUX_STATE_PARTIAL_CLEAR) {
1014 iris_mark_dirty_dmabuf(ice, &res->base.b);
1015 }
1016 }
1017 }
1018
1019 enum isl_aux_usage
iris_resource_texture_aux_usage(struct iris_context * ice,const struct iris_resource * res,enum isl_format view_format,unsigned start_level,unsigned num_levels)1020 iris_resource_texture_aux_usage(struct iris_context *ice,
1021 const struct iris_resource *res,
1022 enum isl_format view_format,
1023 unsigned start_level,
1024 unsigned num_levels)
1025 {
1026 struct iris_screen *screen = (void *) ice->ctx.screen;
1027 const struct intel_device_info *devinfo = screen->devinfo;
1028
1029 switch (res->aux.usage) {
1030 case ISL_AUX_USAGE_HIZ:
1031 case ISL_AUX_USAGE_HIZ_CCS:
1032 case ISL_AUX_USAGE_HIZ_CCS_WT:
1033 assert(res->surf.format == view_format);
1034 return iris_sample_with_depth_aux(devinfo, res) ?
1035 res->aux.usage : ISL_AUX_USAGE_NONE;
1036
1037 case ISL_AUX_USAGE_MCS:
1038 case ISL_AUX_USAGE_MCS_CCS:
1039 case ISL_AUX_USAGE_STC_CCS:
1040 case ISL_AUX_USAGE_MC:
1041 return res->aux.usage;
1042
1043 case ISL_AUX_USAGE_CCS_E:
1044 case ISL_AUX_USAGE_FCV_CCS_E:
1045 /* If we don't have any unresolved color, report an aux usage of
1046 * ISL_AUX_USAGE_NONE. This way, texturing won't even look at the
1047 * aux surface and we can save some bandwidth.
1048 */
1049 if (!iris_has_invalid_primary(res, start_level, num_levels,
1050 0, INTEL_REMAINING_LAYERS))
1051 return ISL_AUX_USAGE_NONE;
1052
1053 /* On Gfx9 color buffers may be compressed by the hardware (lossless
1054 * compression). There are, however, format restrictions and care needs
1055 * to be taken that the sampler engine is capable for re-interpreting a
1056 * buffer with format different the buffer was originally written with.
1057 *
1058 * For example, SRGB formats are not compressible and the sampler engine
1059 * isn't capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case
1060 * the underlying color buffer needs to be resolved so that the sampling
1061 * surface can be sampled as non-compressed (i.e., without the auxiliary
1062 * MCS buffer being set).
1063 */
1064 if (isl_formats_are_ccs_e_compatible(devinfo, res->surf.format,
1065 view_format))
1066 return res->aux.usage;
1067 break;
1068
1069 default:
1070 break;
1071 }
1072
1073 return ISL_AUX_USAGE_NONE;
1074 }
1075
1076 enum isl_aux_usage
iris_image_view_aux_usage(struct iris_context * ice,const struct pipe_image_view * pview,const struct shader_info * info)1077 iris_image_view_aux_usage(struct iris_context *ice,
1078 const struct pipe_image_view *pview,
1079 const struct shader_info *info)
1080 {
1081 if (!info)
1082 return ISL_AUX_USAGE_NONE;
1083
1084 const struct iris_screen *screen = (void *) ice->ctx.screen;
1085 const struct intel_device_info *devinfo = screen->devinfo;
1086 struct iris_resource *res = (void *) pview->resource;
1087
1088 const unsigned level = res->base.b.target != PIPE_BUFFER ?
1089 pview->u.tex.level : 0;
1090
1091 bool uses_atomic_load_store =
1092 ice->shaders.uncompiled[info->stage]->uses_atomic_load_store;
1093
1094 /* Prior to GFX12, render compression is not supported for images. */
1095 if (devinfo->ver < 12)
1096 return ISL_AUX_USAGE_NONE;
1097
1098 /* On GFX12, compressed surfaces supports non-atomic operations. GFX12HP and
1099 * further, add support for all the operations.
1100 */
1101 if (devinfo->verx10 < 125 && uses_atomic_load_store)
1102 return ISL_AUX_USAGE_NONE;
1103
1104 /* If the image is read-only, and doesn't have any unresolved color,
1105 * report ISL_AUX_USAGE_NONE. Bypassing useless aux can save bandwidth.
1106 */
1107 if (!(pview->access & PIPE_IMAGE_ACCESS_WRITE) &&
1108 !iris_has_invalid_primary(res, level, 1, 0, INTEL_REMAINING_LAYERS))
1109 return ISL_AUX_USAGE_NONE;
1110
1111 /* The FCV feature is documented to occur on regular render writes. Images
1112 * are written to with the DC data port however.
1113 */
1114 if (res->aux.usage == ISL_AUX_USAGE_FCV_CCS_E)
1115 return ISL_AUX_USAGE_CCS_E;
1116
1117 return res->aux.usage;
1118 }
1119
1120 static bool
formats_are_fast_clear_compatible(enum isl_format a,enum isl_format b)1121 formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b)
1122 {
1123 /* On gfx8 and earlier, the hardware was only capable of handling 0/1 clear
1124 * values so sRGB curve application was a no-op for all fast-clearable
1125 * formats.
1126 *
1127 * On gfx9+, the hardware supports arbitrary clear values. For sRGB clear
1128 * values, the hardware interprets the floats, not as what would be
1129 * returned from the sampler (or written by the shader), but as being
1130 * between format conversion and sRGB curve application. This means that
1131 * we can switch between sRGB and UNORM without having to whack the clear
1132 * color.
1133 */
1134 return isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b);
1135 }
1136
1137 void
iris_resource_prepare_texture(struct iris_context * ice,struct iris_resource * res,enum isl_format view_format,uint32_t start_level,uint32_t num_levels,uint32_t start_layer,uint32_t num_layers)1138 iris_resource_prepare_texture(struct iris_context *ice,
1139 struct iris_resource *res,
1140 enum isl_format view_format,
1141 uint32_t start_level, uint32_t num_levels,
1142 uint32_t start_layer, uint32_t num_layers)
1143 {
1144 const struct iris_screen *screen = (void *) ice->ctx.screen;
1145 const struct intel_device_info *devinfo = screen->devinfo;
1146
1147 enum isl_aux_usage aux_usage =
1148 iris_resource_texture_aux_usage(ice, res, view_format,
1149 start_level, num_levels);
1150
1151 bool clear_supported = isl_aux_usage_has_fast_clears(aux_usage);
1152
1153 /* On gfx8-9, the clear color is specified as ints or floats and the
1154 * conversion is done by the sampler. If we have a texture view, we would
1155 * have to perform the clear color conversion manually. Just disable clear
1156 * color.
1157 */
1158 if (devinfo->ver <= 9 &&
1159 !formats_are_fast_clear_compatible(res->surf.format, view_format)) {
1160 clear_supported = false;
1161 }
1162
1163 /* With indirect clear colors, the sampler reads clear values stored in
1164 * pixel form. The location the sampler reads from is dependent on the
1165 * bits-per-channel of the format. Disable support for clear colors if the
1166 * new format points the sampler to an incompatible location. See
1167 * isl_get_sampler_clear_field_offset() for more information.
1168 */
1169 if (res->aux.clear_color_bo &&
1170 isl_format_get_layout(res->surf.format)->channels.r.bits != 32 &&
1171 isl_format_get_layout(view_format)->channels.r.bits == 32) {
1172 clear_supported = false;
1173 }
1174
1175 /* On gfx12.0, the sampler has an issue with some 8 and 16bpp MSAA fast
1176 * clears. See HSD 1707282275, wa_14013111325. A simplified workaround is
1177 * implemented, but we could implement something more specific.
1178 */
1179 if (isl_aux_usage_has_mcs(aux_usage) &&
1180 intel_needs_workaround(devinfo, 14013111325) &&
1181 isl_format_get_layout(res->surf.format)->bpb <= 16) {
1182 clear_supported = false;
1183 }
1184
1185 iris_resource_prepare_access(ice, res, start_level, num_levels,
1186 start_layer, num_layers,
1187 aux_usage, clear_supported);
1188 }
1189
1190 /* Whether or not rendering a color value with either format results in the
1191 * same pixel. This can return false negatives.
1192 */
1193 bool
iris_render_formats_color_compatible(enum isl_format a,enum isl_format b,union isl_color_value color,bool clear_color_unknown)1194 iris_render_formats_color_compatible(enum isl_format a, enum isl_format b,
1195 union isl_color_value color,
1196 bool clear_color_unknown)
1197 {
1198 if (a == b)
1199 return true;
1200
1201 /* A difference in color space doesn't matter for 0/1 values. */
1202 if (!clear_color_unknown &&
1203 isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b) &&
1204 isl_color_value_is_zero_one(color, a)) {
1205 return true;
1206 }
1207
1208 /* Both formats may interpret the clear color as zero. */
1209 if (!clear_color_unknown &&
1210 isl_color_value_is_zero(color, a) &&
1211 isl_color_value_is_zero(color, b)) {
1212 return true;
1213 }
1214
1215 return false;
1216 }
1217
1218 void
iris_resource_update_indirect_color(struct iris_batch * batch,struct iris_resource * res)1219 iris_resource_update_indirect_color(struct iris_batch *batch,
1220 struct iris_resource *res)
1221 {
1222 assert(res->aux.clear_color_bo);
1223
1224 uint32_t pixel[4] = {};
1225 isl_color_value_pack(&res->aux.clear_color, res->surf.format, pixel);
1226
1227 iris_emit_pipe_control_write(batch, "update fast clear color (RG____)",
1228 PIPE_CONTROL_WRITE_IMMEDIATE,
1229 res->aux.clear_color_bo,
1230 res->aux.clear_color_offset,
1231 (uint64_t) res->aux.clear_color.u32[0] |
1232 (uint64_t) res->aux.clear_color.u32[1] << 32);
1233
1234 iris_emit_pipe_control_write(batch, "update fast clear color (__BA__)",
1235 PIPE_CONTROL_WRITE_IMMEDIATE,
1236 res->aux.clear_color_bo,
1237 res->aux.clear_color_offset + 8,
1238 (uint64_t) res->aux.clear_color.u32[2] |
1239 (uint64_t) res->aux.clear_color.u32[3] << 32);
1240
1241 iris_emit_pipe_control_write(batch, "update fast clear color (____PX)",
1242 PIPE_CONTROL_WRITE_IMMEDIATE,
1243 res->aux.clear_color_bo,
1244 res->aux.clear_color_offset + 16,
1245 (uint64_t) pixel[0] |
1246 (uint64_t) pixel[1] << 32);
1247
1248 /* From the ICL PRMs, Volume 9: Render Engine, State Caching :
1249 *
1250 * "Any values referenced by pointers within the RENDER_SURFACE_STATE
1251 * [...] (e.g. Clear Color Pointer, [...]) are considered to be part of
1252 * that state and any changes to these referenced values requires an
1253 * invalidation of the L1 state cache to ensure the new values are being
1254 * used as part of the state. [...]"
1255 *
1256 * Invalidate the state cache as suggested.
1257 */
1258 iris_emit_pipe_control_flush(batch, "new clear color affects state cache",
1259 PIPE_CONTROL_FLUSH_ENABLE |
1260 PIPE_CONTROL_STATE_CACHE_INVALIDATE);
1261 }
1262
1263 enum isl_aux_usage
iris_resource_render_aux_usage(struct iris_context * ice,struct iris_resource * res,enum isl_format render_format,uint32_t level,bool draw_aux_disabled)1264 iris_resource_render_aux_usage(struct iris_context *ice,
1265 struct iris_resource *res,
1266 enum isl_format render_format, uint32_t level,
1267 bool draw_aux_disabled)
1268 {
1269 struct iris_screen *screen = (void *) ice->ctx.screen;
1270 const struct intel_device_info *devinfo = screen->devinfo;
1271
1272 if (draw_aux_disabled)
1273 return ISL_AUX_USAGE_NONE;
1274
1275 switch (res->aux.usage) {
1276 case ISL_AUX_USAGE_HIZ:
1277 case ISL_AUX_USAGE_HIZ_CCS:
1278 case ISL_AUX_USAGE_HIZ_CCS_WT:
1279 assert(render_format == res->surf.format);
1280 return iris_resource_level_has_hiz(devinfo, res, level) ?
1281 res->aux.usage : ISL_AUX_USAGE_NONE;
1282
1283 case ISL_AUX_USAGE_STC_CCS:
1284 assert(render_format == res->surf.format);
1285 return res->aux.usage;
1286
1287 case ISL_AUX_USAGE_MCS:
1288 case ISL_AUX_USAGE_MCS_CCS:
1289 case ISL_AUX_USAGE_CCS_D:
1290 return res->aux.usage;
1291
1292 case ISL_AUX_USAGE_CCS_E:
1293 case ISL_AUX_USAGE_FCV_CCS_E:
1294 if (isl_formats_are_ccs_e_compatible(devinfo, res->surf.format,
1295 render_format)) {
1296 return res->aux.usage;
1297 }
1298 FALLTHROUGH;
1299
1300 default:
1301 return ISL_AUX_USAGE_NONE;
1302 }
1303 }
1304
1305 void
iris_resource_prepare_render(struct iris_context * ice,struct iris_resource * res,enum isl_format render_format,uint32_t level,uint32_t start_layer,uint32_t layer_count,enum isl_aux_usage aux_usage)1306 iris_resource_prepare_render(struct iris_context *ice,
1307 struct iris_resource *res,
1308 enum isl_format render_format, uint32_t level,
1309 uint32_t start_layer, uint32_t layer_count,
1310 enum isl_aux_usage aux_usage)
1311 {
1312 /* Replace the resource's clear color with zero if:
1313 *
1314 * - The resource's clear color is incompatible with render_format. This
1315 * avoids corrupting current fast clear blocks and ensures any fast clear
1316 * blocks generated as a result of the render will be recoverable.
1317 *
1318 * - The clear color struct is uninitialized and potentially inconsistent
1319 * with itself. For non-32-bpc formats, the struct consists of different
1320 * fields for rendering and sampling. If rendering can generate
1321 * fast-cleared blocks, we want these to agree so that we can avoid
1322 * partially resolving prior to sampling. Images with modifiers can be
1323 * ignored. Either we will have already initialized their structs to
1324 * zero, or they will have already been consistent at the time of import
1325 * (as defined by drm_fourcc.h)
1326 *
1327 * The only aux usage which requires this process is FCV_CCS_E. Other aux
1328 * usages share a subset of these restrictions and benefit from only some
1329 * of the steps involved with changing the clear color. For now, just keep
1330 * things simple and assume we have the worst case usage of FCV_CCS_E.
1331 */
1332 if (!iris_render_formats_color_compatible(render_format,
1333 res->surf.format,
1334 res->aux.clear_color,
1335 res->aux.clear_color_unknown) ||
1336 (res->aux.clear_color_unknown && !res->mod_info &&
1337 isl_format_get_layout(render_format)->channels.r.bits != 32)) {
1338
1339 /* Remove references to the clear color with resolves. */
1340 iris_resource_prepare_access(ice, res, 0, INTEL_REMAINING_LEVELS, 0,
1341 INTEL_REMAINING_LAYERS, res->aux.usage,
1342 false);
1343
1344 /* The clear color is no longer in use; replace it now. */
1345 const union isl_color_value zero = { .u32 = { 0, } };
1346 iris_resource_set_clear_color(ice, res, zero);
1347
1348 if (res->aux.clear_color_bo) {
1349 /* Update dwords used for rendering and sampling. */
1350 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
1351 iris_resource_update_indirect_color(batch, res);
1352 } else {
1353 /* Flag surface states with inline clear colors as dirty. */
1354 ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS;
1355 }
1356 }
1357
1358 /* Now, do the preparation requested by the caller. Doing this after the
1359 * partial resolves above helps maintain the accuracy of the aux-usage
1360 * tracking that happens within the preparation function.
1361 */
1362 iris_resource_prepare_access(ice, res, level, 1, start_layer,
1363 layer_count, aux_usage,
1364 isl_aux_usage_has_fast_clears(aux_usage));
1365 }
1366
1367 void
iris_resource_finish_render(struct iris_context * ice,struct iris_resource * res,uint32_t level,uint32_t start_layer,uint32_t layer_count,enum isl_aux_usage aux_usage)1368 iris_resource_finish_render(struct iris_context *ice,
1369 struct iris_resource *res, uint32_t level,
1370 uint32_t start_layer, uint32_t layer_count,
1371 enum isl_aux_usage aux_usage)
1372 {
1373 iris_resource_finish_write(ice, res, level, start_layer, layer_count,
1374 aux_usage);
1375 }
1376