xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/v3d/v3d_blit.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir/pipe_nir.h"
25 #include "util/format/u_format.h"
26 #include "util/u_surface.h"
27 #include "util/u_blitter.h"
28 #include "compiler/nir/nir_builder.h"
29 #include "v3d_context.h"
30 #include "broadcom/common/v3d_tiling.h"
31 #include "broadcom/common/v3d_tfu.h"
32 
33 /**
34  * The param @op_blit is used to tell if we are saving state for blitter_blit
35  * (if true) or blitter_clear (if false). If other blitter functions are used
36  * that require different state we may need something more elaborated than
37  * this.
38  */
39 
40 void
v3d_blitter_save(struct v3d_context * v3d,enum v3d_blitter_op op)41 v3d_blitter_save(struct v3d_context *v3d, enum v3d_blitter_op op)
42 {
43         util_blitter_save_fragment_constant_buffer_slot(v3d->blitter,
44                                                         v3d->constbuf[PIPE_SHADER_FRAGMENT].cb);
45         util_blitter_save_vertex_buffers(v3d->blitter, v3d->vertexbuf.vb, v3d->vertexbuf.count);
46         util_blitter_save_vertex_elements(v3d->blitter, v3d->vtx);
47         util_blitter_save_vertex_shader(v3d->blitter, v3d->prog.bind_vs);
48         util_blitter_save_geometry_shader(v3d->blitter, v3d->prog.bind_gs);
49         util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
50                                      v3d->streamout.targets);
51         util_blitter_save_rasterizer(v3d->blitter, v3d->rasterizer);
52         util_blitter_save_viewport(v3d->blitter, &v3d->viewport);
53         util_blitter_save_fragment_shader(v3d->blitter, v3d->prog.bind_fs);
54         util_blitter_save_blend(v3d->blitter, v3d->blend);
55         util_blitter_save_depth_stencil_alpha(v3d->blitter, v3d->zsa);
56         util_blitter_save_stencil_ref(v3d->blitter, &v3d->stencil_ref);
57         util_blitter_save_sample_mask(v3d->blitter, v3d->sample_mask, 0);
58         util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
59                                      v3d->streamout.targets);
60 
61         if (op & V3D_SAVE_FRAMEBUFFER)
62                 util_blitter_save_framebuffer(v3d->blitter, &v3d->framebuffer);
63 
64         if (op & V3D_SAVE_TEXTURES) {
65                 util_blitter_save_scissor(v3d->blitter, &v3d->scissor);
66                 util_blitter_save_fragment_sampler_states(v3d->blitter,
67                                                           v3d->tex[PIPE_SHADER_FRAGMENT].num_samplers,
68                                                           (void **)v3d->tex[PIPE_SHADER_FRAGMENT].samplers);
69                 util_blitter_save_fragment_sampler_views(v3d->blitter,
70                                                          v3d->tex[PIPE_SHADER_FRAGMENT].num_textures,
71                                                          v3d->tex[PIPE_SHADER_FRAGMENT].textures);
72         }
73 
74         if (!(op & V3D_DISABLE_RENDER_COND)) {
75                 util_blitter_save_render_condition(v3d->blitter, v3d->cond_query,
76                                                    v3d->cond_cond, v3d->cond_mode);
77         }
78 }
79 
80 static void
v3d_render_blit(struct pipe_context * ctx,struct pipe_blit_info * info)81 v3d_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
82 {
83         struct v3d_context *v3d = v3d_context(ctx);
84         struct v3d_resource *src = v3d_resource(info->src.resource);
85         struct pipe_resource *tiled = NULL;
86 
87         if (!info->mask)
88                 return;
89 
90         if (!src->tiled &&
91             info->src.resource->target != PIPE_TEXTURE_1D &&
92             info->src.resource->target != PIPE_TEXTURE_1D_ARRAY) {
93                 struct pipe_box box = {
94                         .x = 0,
95                         .y = 0,
96                         .width = u_minify(info->src.resource->width0,
97                                            info->src.level),
98                         .height = u_minify(info->src.resource->height0,
99                                            info->src.level),
100                         .depth = 1,
101                 };
102                 struct pipe_resource tmpl = {
103                         .target = info->src.resource->target,
104                         .format = info->src.resource->format,
105                         .width0 = box.width,
106                         .height0 = box.height,
107                         .depth0 = 1,
108                         .array_size = 1,
109                 };
110                 tiled = ctx->screen->resource_create(ctx->screen, &tmpl);
111                 if (!tiled) {
112                         fprintf(stderr, "Failed to create tiled blit temp\n");
113                         return;
114                 }
115                 ctx->resource_copy_region(ctx,
116                                           tiled, 0,
117                                           0, 0, 0,
118                                           info->src.resource, info->src.level,
119                                           &box);
120                 info->src.level = 0;
121                 info->src.resource = tiled;
122         }
123 
124         if (!util_blitter_is_blit_supported(v3d->blitter, info)) {
125                 fprintf(stderr, "blit unsupported %s -> %s\n",
126                     util_format_short_name(info->src.format),
127                     util_format_short_name(info->dst.format));
128                 return;
129         }
130 
131         v3d_blitter_save(v3d, info->render_condition_enable ?
132                          V3D_BLIT_COND : V3D_BLIT);
133         util_blitter_blit(v3d->blitter, info, NULL);
134 
135         pipe_resource_reference(&tiled, NULL);
136         info->mask = 0;
137 }
138 
139 /* Implement stencil blits by reinterpreting the stencil data as an RGBA8888
140  * or R8 texture.
141  */
142 static void
v3d_stencil_blit(struct pipe_context * ctx,struct pipe_blit_info * info)143 v3d_stencil_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
144 {
145         struct v3d_context *v3d = v3d_context(ctx);
146         struct v3d_resource *src = v3d_resource(info->src.resource);
147         struct v3d_resource *dst = v3d_resource(info->dst.resource);
148         enum pipe_format src_format, dst_format;
149 
150         if ((info->mask & PIPE_MASK_S) == 0)
151                 return;
152 
153         if (src->separate_stencil) {
154                 src = src->separate_stencil;
155                 src_format = PIPE_FORMAT_R8_UINT;
156         } else {
157                 src_format = PIPE_FORMAT_RGBA8888_UINT;
158         }
159 
160         if (dst->separate_stencil) {
161                 dst = dst->separate_stencil;
162                 dst_format = PIPE_FORMAT_R8_UINT;
163         } else {
164                 dst_format = PIPE_FORMAT_RGBA8888_UINT;
165         }
166 
167         /* Initialize the surface. */
168         struct pipe_surface dst_tmpl = {
169                 .u.tex = {
170                         .level = info->dst.level,
171                         .first_layer = info->dst.box.z,
172                         .last_layer = info->dst.box.z,
173                 },
174                 .format = dst_format,
175         };
176         struct pipe_surface *dst_surf =
177                 ctx->create_surface(ctx, &dst->base, &dst_tmpl);
178 
179         /* Initialize the sampler view. */
180         struct pipe_sampler_view src_tmpl = {
181                 .target = (src->base.target == PIPE_TEXTURE_CUBE_ARRAY) ?
182                           PIPE_TEXTURE_2D_ARRAY :
183                           src->base.target,
184                 .format = src_format,
185                 .u.tex = {
186                         .first_level = info->src.level,
187                         .last_level = info->src.level,
188                         .first_layer = 0,
189                         .last_layer = (PIPE_TEXTURE_3D ?
190                                        u_minify(src->base.depth0,
191                                                 info->src.level) - 1 :
192                                        src->base.array_size - 1),
193                 },
194                 .swizzle_r = PIPE_SWIZZLE_X,
195                 .swizzle_g = PIPE_SWIZZLE_Y,
196                 .swizzle_b = PIPE_SWIZZLE_Z,
197                 .swizzle_a = PIPE_SWIZZLE_W,
198         };
199         struct pipe_sampler_view *src_view =
200                 ctx->create_sampler_view(ctx, &src->base, &src_tmpl);
201 
202         v3d_blitter_save(v3d, info->render_condition_enable ?
203                          V3D_BLIT_COND : V3D_BLIT);
204         util_blitter_blit_generic(v3d->blitter, dst_surf, &info->dst.box,
205                                   src_view, &info->src.box,
206                                   src->base.width0, src->base.height0,
207                                   PIPE_MASK_R,
208                                   PIPE_TEX_FILTER_NEAREST,
209                                   info->scissor_enable ? &info->scissor : NULL,
210                                   info->alpha_blend, false, 0, NULL);
211 
212         pipe_surface_reference(&dst_surf, NULL);
213         pipe_sampler_view_reference(&src_view, NULL);
214 
215         info->mask &= ~PIPE_MASK_S;
216 }
217 
218 bool
v3d_generate_mipmap(struct pipe_context * pctx,struct pipe_resource * prsc,enum pipe_format format,unsigned int base_level,unsigned int last_level,unsigned int first_layer,unsigned int last_layer)219 v3d_generate_mipmap(struct pipe_context *pctx,
220                     struct pipe_resource *prsc,
221                     enum pipe_format format,
222                     unsigned int base_level,
223                     unsigned int last_level,
224                     unsigned int first_layer,
225                     unsigned int last_layer)
226 {
227         if (format != prsc->format)
228                 return false;
229 
230         /* We could maybe support looping over layers for array textures, but
231          * we definitely don't support 3D.
232          */
233         if (first_layer != last_layer)
234                 return false;
235 
236         struct v3d_context *v3d = v3d_context(pctx);
237         struct v3d_screen *screen = v3d->screen;
238         struct v3d_device_info *devinfo = &screen->devinfo;
239 
240         return v3d_X(devinfo, tfu)(pctx,
241                                    prsc, prsc,
242                                    base_level,
243                                    base_level, last_level,
244                                    first_layer, first_layer,
245                                    true);
246 }
247 
248 static void
v3d_tfu_blit(struct pipe_context * pctx,struct pipe_blit_info * info)249 v3d_tfu_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
250 {
251         int dst_width = u_minify(info->dst.resource->width0, info->dst.level);
252         int dst_height = u_minify(info->dst.resource->height0, info->dst.level);
253 
254         if ((info->mask & PIPE_MASK_RGBA) == 0)
255                 return;
256 
257         if (info->scissor_enable ||
258             info->dst.box.x != 0 ||
259             info->dst.box.y != 0 ||
260             info->dst.box.width != dst_width ||
261             info->dst.box.height != dst_height ||
262             info->dst.box.depth != 1 ||
263             info->src.box.x != 0 ||
264             info->src.box.y != 0 ||
265             info->src.box.width != info->dst.box.width ||
266             info->src.box.height != info->dst.box.height ||
267             info->src.box.depth != 1) {
268                 return;
269         }
270 
271         if (info->dst.format != info->src.format)
272                 return;
273 
274         struct v3d_context *v3d = v3d_context(pctx);
275         struct v3d_screen *screen = v3d->screen;
276         struct v3d_device_info *devinfo = &screen->devinfo;
277 
278         if (v3d_X(devinfo, tfu)(pctx, info->dst.resource, info->src.resource,
279                                 info->src.level,
280                                 info->dst.level, info->dst.level,
281                                 info->src.box.z, info->dst.box.z,
282                                 false)) {
283                 info->mask &= ~PIPE_MASK_RGBA;
284         }
285 }
286 
287 static struct pipe_surface *
v3d_get_blit_surface(struct pipe_context * pctx,struct pipe_resource * prsc,enum pipe_format format,unsigned level,int16_t layer)288 v3d_get_blit_surface(struct pipe_context *pctx,
289                      struct pipe_resource *prsc,
290                      enum pipe_format format,
291                      unsigned level,
292                      int16_t layer)
293 {
294         struct pipe_surface tmpl;
295 
296         tmpl.format = format;
297         tmpl.u.tex.level = level;
298         tmpl.u.tex.first_layer = layer;
299         tmpl.u.tex.last_layer = layer;
300 
301         return pctx->create_surface(pctx, prsc, &tmpl);
302 }
303 
304 static bool
is_tile_unaligned(unsigned size,unsigned tile_size)305 is_tile_unaligned(unsigned size, unsigned tile_size)
306 {
307         return size & (tile_size - 1);
308 }
309 
310 static bool
check_tlb_blit_ok(struct v3d_device_info * devinfo,struct pipe_blit_info * info)311 check_tlb_blit_ok(struct v3d_device_info *devinfo, struct pipe_blit_info *info)
312 {
313         if (!info->mask)
314                 return false;
315 
316         bool is_color_blit = info->mask & PIPE_MASK_RGBA;
317         bool is_depth_blit = info->mask & PIPE_MASK_Z;
318         bool is_stencil_blit = info->mask & PIPE_MASK_S;
319 
320         /* We should receive either a depth/stencil blit, or color blit, but
321          * not both.
322          */
323         assert ((is_color_blit && !is_depth_blit && !is_stencil_blit) ||
324                 (!is_color_blit && (is_depth_blit || is_stencil_blit)));
325 
326         if (info->scissor_enable)
327                 return false;
328 
329         if (info->src.box.x != info->dst.box.x ||
330             info->src.box.y != info->dst.box.y ||
331             info->src.box.width != info->dst.box.width ||
332             info->src.box.height != info->dst.box.height)
333                 return false;
334 
335         if (is_color_blit &&
336             util_format_is_depth_or_stencil(info->dst.format))
337                 return false;
338 
339         if ((is_depth_blit || is_stencil_blit) &&
340             !util_format_is_depth_or_stencil(info->dst.format))
341                 return false;
342 
343         if (!v3d_rt_format_supported(devinfo, info->src.format))
344                 return false;
345 
346         if (v3d_get_rt_format(devinfo, info->src.format) !=
347             v3d_get_rt_format(devinfo, info->dst.format))
348                 return false;
349 
350         bool is_msaa_resolve = (info->src.resource->nr_samples > 1 &&
351                                 info->dst.resource->nr_samples < 2);
352 
353         if (is_msaa_resolve &&
354             !v3d_format_supports_tlb_msaa_resolve(devinfo, info->src.format))
355                 return false;
356 
357         return true;
358 }
359 
360 /* This checks if we can implement the blit straight from a job that we have
361  * not yet flushed, including MSAA resolves.
362  */
363 static void
v3d_tlb_blit_fast(struct pipe_context * pctx,struct pipe_blit_info * info)364 v3d_tlb_blit_fast(struct pipe_context *pctx, struct pipe_blit_info *info)
365 {
366         struct v3d_context *v3d = v3d_context(pctx);
367         struct v3d_screen *screen = v3d->screen;
368         struct v3d_device_info *devinfo = &screen->devinfo;
369 
370         if (!check_tlb_blit_ok(devinfo, info))
371                 return;
372 
373         /* FIXME: support depth/stencil */
374         if (!(info->mask & PIPE_MASK_RGBA))
375                 return;
376 
377         /* Can't blit from 1x to 4x since the render target configuration
378          * for the job would not be compatible.
379          */
380         if (info->src.resource->nr_samples < info->dst.resource->nr_samples)
381                 return;
382 
383         /* Can't blit form RGBX to RGBA since we can't force A=1 on tile
384          * stores.
385          */
386         if (util_format_has_alpha1(info->src.format) &&
387             !util_format_has_alpha1(info->dst.format))
388             return;
389 
390         /* Find the job that writes the blit source */
391         struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
392                                                            info->src.resource);
393         if (!entry)
394                 return;
395 
396         struct v3d_job *job = entry->data;
397         assert(job);
398 
399         /* The TLB store will involve the same area and tiles as the job
400          * writing to the resource, so only do this if we are blitting the
401          * full resource and the job is writing the full resource.
402          */
403         int dst_width = u_minify(info->dst.resource->width0, info->dst.level);
404         int dst_height = u_minify(info->dst.resource->height0, info->dst.level);
405         if (info->dst.box.x != 0 || info->dst.box.width != dst_width ||
406             info->dst.box.y != 0 || info->dst.box.height != dst_height ||
407             job->draw_min_x != 0 || job->draw_min_y != 0 ||
408             job->draw_max_x != dst_width || job->draw_max_y != dst_height) {
409                 return;
410         }
411 
412         /* Blits are specified for single-layered FBOs, if the job that
413          * produces the blit source is multilayered we would attempt to
414          * blit all layers and write out of bounds on the destination.
415          */
416         if (job->num_layers > 1)
417                 return;
418 
419         /* Find which color attachment in the job is the blit source  */
420         int idx = -1;
421         for (int i = 0; i < job->nr_cbufs; i++) {
422                 if (!job->cbufs[i] ||
423                     job->cbufs[i]->texture != info->src.resource) {
424                         continue;
425                 }
426                 idx = i;
427                 break;
428         }
429 
430         if (idx < 0)
431                 return;
432 
433         struct pipe_surface *dbuf =
434                 v3d_get_blit_surface(pctx, info->dst.resource,
435                                      info->dst.format, info->dst.level,
436                                      info->dst.box.z);
437 
438         /* The job's RT setup must be compatible with the blit buffer. */
439         struct v3d_surface *ssurf = v3d_surface(job->cbufs[idx]);
440         struct v3d_surface *rsurf = v3d_surface(dbuf);
441         if (ssurf->internal_bpp < rsurf->internal_bpp)
442                 return;
443         if (ssurf->internal_type != rsurf->internal_type)
444                 return;
445 
446         /* If we had any other jobs writing to the blit dst we should submit
447          * them now before we blit.
448          *
449          * FIXME: We could just drop these jobs completely if they are
450          * rendering a subset of the resource being blit here.
451          */
452         v3d_flush_jobs_writing_resource(v3d, info->dst.resource,
453                                         V3D_FLUSH_DEFAULT, false);
454 
455         /* Program the job to blit from the TLB into the destination buffer */
456         info->mask &= ~PIPE_MASK_RGBA;
457         job->blit_tlb |= PIPE_CLEAR_COLOR0 << idx;
458         job->dbuf = dbuf;
459         v3d_job_add_write_resource(job, info->dst.resource);
460 
461         /* Submit the job immediately, since otherwise we could accumulate
462          * draw calls happening after the blit.
463          */
464         v3d_job_submit(v3d, job);
465 }
466 
467 static void
v3d_tlb_blit(struct pipe_context * pctx,struct pipe_blit_info * info)468 v3d_tlb_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
469 {
470         struct v3d_context *v3d = v3d_context(pctx);
471         struct v3d_screen *screen = v3d->screen;
472         struct v3d_device_info *devinfo = &screen->devinfo;
473 
474         if (!check_tlb_blit_ok(devinfo, info))
475                 return;
476 
477         v3d_flush_jobs_writing_resource(v3d, info->src.resource, V3D_FLUSH_DEFAULT, false);
478 
479         struct pipe_surface *dst_surf =
480            v3d_get_blit_surface(pctx, info->dst.resource, info->dst.format, info->dst.level, info->dst.box.z);
481         struct pipe_surface *src_surf =
482            v3d_get_blit_surface(pctx, info->src.resource, info->src.format, info->src.level, info->src.box.z);
483 
484         bool is_color_blit = info->mask & PIPE_MASK_RGBA;
485         bool is_depth_blit = info->mask & PIPE_MASK_Z;
486         bool is_stencil_blit = info->mask & PIPE_MASK_S;
487 
488         struct pipe_surface *surfaces[V3D_MAX_DRAW_BUFFERS] = { 0 };
489         if (is_color_blit)
490                 surfaces[0] = dst_surf;
491 
492         bool msaa = (info->src.resource->nr_samples > 1 ||
493                      info->dst.resource->nr_samples > 1);
494 
495         bool double_buffer = V3D_DBG(DOUBLE_BUFFER) && !msaa;
496 
497         uint32_t tile_width, tile_height, max_bpp;
498         v3d_get_tile_buffer_size(devinfo, msaa, double_buffer,
499                                  is_color_blit ? 1 : 0, surfaces, src_surf,
500                                  &tile_width, &tile_height, &max_bpp);
501 
502         int dst_surface_width = u_minify(info->dst.resource->width0,
503                                          info->dst.level);
504         int dst_surface_height = u_minify(info->dst.resource->height0,
505                                          info->dst.level);
506         if (is_tile_unaligned(info->dst.box.x, tile_width) ||
507             is_tile_unaligned(info->dst.box.y, tile_height) ||
508             (is_tile_unaligned(info->dst.box.width, tile_width) &&
509              info->dst.box.x + info->dst.box.width != dst_surface_width) ||
510             (is_tile_unaligned(info->dst.box.height, tile_height) &&
511              info->dst.box.y + info->dst.box.height != dst_surface_height)) {
512                 pipe_surface_reference(&dst_surf, NULL);
513                 pipe_surface_reference(&src_surf, NULL);
514                 return;
515         }
516 
517         struct v3d_job *job = v3d_get_job(v3d,
518                                           is_color_blit ? 1u : 0u,
519                                           surfaces,
520                                           is_color_blit ? NULL : dst_surf,
521                                           src_surf);
522         job->msaa = msaa;
523         job->double_buffer = double_buffer;
524         job->tile_width = tile_width;
525         job->tile_height = tile_height;
526         job->internal_bpp = max_bpp;
527         job->draw_min_x = info->dst.box.x;
528         job->draw_min_y = info->dst.box.y;
529         job->draw_max_x = info->dst.box.x + info->dst.box.width;
530         job->draw_max_y = info->dst.box.y + info->dst.box.height;
531         job->scissor.disabled = false;
532 
533         /* The simulator complains if we do a TLB load from a source with a
534          * stride that is smaller than the destination's, so we program the
535          * 'frame region' to match the smallest dimensions of the two surfaces.
536          * This should be fine because we only get here if the src and dst boxes
537          * match, so we know the blit involves the same tiles on both surfaces.
538          */
539         job->draw_width = MIN2(dst_surf->width, src_surf->width);
540         job->draw_height = MIN2(dst_surf->height, src_surf->height);
541         job->draw_tiles_x = DIV_ROUND_UP(job->draw_width,
542                                          job->tile_width);
543         job->draw_tiles_y = DIV_ROUND_UP(job->draw_height,
544                                          job->tile_height);
545 
546         job->needs_flush = true;
547         job->num_layers = info->dst.box.depth;
548 
549         job->store = 0;
550         if (is_color_blit) {
551                 job->store |= PIPE_CLEAR_COLOR0;
552                 info->mask &= ~PIPE_MASK_RGBA;
553         }
554         if (is_depth_blit) {
555                 job->store |= PIPE_CLEAR_DEPTH;
556                 info->mask &= ~PIPE_MASK_Z;
557         }
558         if (is_stencil_blit){
559                 job->store |= PIPE_CLEAR_STENCIL;
560                 info->mask &= ~PIPE_MASK_S;
561         }
562 
563         v3d_X(devinfo, start_binning)(v3d, job);
564 
565         v3d_job_submit(v3d, job);
566 
567         pipe_surface_reference(&dst_surf, NULL);
568         pipe_surface_reference(&src_surf, NULL);
569 }
570 
571 /**
572  * Creates the VS of the custom blit shader to convert YUV plane from
573  * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format.
574  * This vertex shader is mostly a pass-through VS.
575  */
576 static void *
v3d_get_sand8_vs(struct pipe_context * pctx)577 v3d_get_sand8_vs(struct pipe_context *pctx)
578 {
579         struct v3d_context *v3d = v3d_context(pctx);
580         struct pipe_screen *pscreen = pctx->screen;
581 
582         if (v3d->sand8_blit_vs)
583                 return v3d->sand8_blit_vs;
584 
585         const struct nir_shader_compiler_options *options =
586                 pscreen->get_compiler_options(pscreen,
587                                               PIPE_SHADER_IR_NIR,
588                                               PIPE_SHADER_VERTEX);
589 
590         nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
591                                                        options,
592                                                        "sand8_blit_vs");
593 
594         const struct glsl_type *vec4 = glsl_vec4_type();
595         nir_variable *pos_in = nir_variable_create(b.shader,
596                                                    nir_var_shader_in,
597                                                    vec4, "pos");
598 
599         nir_variable *pos_out = nir_variable_create(b.shader,
600                                                     nir_var_shader_out,
601                                                     vec4, "gl_Position");
602         pos_out->data.location = VARYING_SLOT_POS;
603         nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);
604 
605         v3d->sand8_blit_vs = pipe_shader_from_nir(pctx, b.shader);
606 
607         return v3d->sand8_blit_vs;
608 }
609 /**
610  * Creates the FS of the custom blit shader to convert YUV plane from
611  * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format.
612  * The result texture is equivalent to a chroma (cpp=2) or luma (cpp=1)
613  * plane for a NV12 format without the SAND modifier.
614  */
615 static void *
v3d_get_sand8_fs(struct pipe_context * pctx,int cpp)616 v3d_get_sand8_fs(struct pipe_context *pctx, int cpp)
617 {
618         struct v3d_context *v3d = v3d_context(pctx);
619         struct pipe_screen *pscreen = pctx->screen;
620         struct pipe_shader_state **cached_shader;
621         const char *name;
622 
623         if (cpp == 1) {
624                 cached_shader = &v3d->sand8_blit_fs_luma;
625                 name = "sand8_blit_fs_luma";
626         } else {
627                 cached_shader = &v3d->sand8_blit_fs_chroma;
628                 name = "sand8_blit_fs_chroma";
629         }
630 
631         if (*cached_shader)
632                 return *cached_shader;
633 
634         const struct nir_shader_compiler_options *options =
635                 pscreen->get_compiler_options(pscreen,
636                                               PIPE_SHADER_IR_NIR,
637                                               PIPE_SHADER_FRAGMENT);
638 
639         nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
640                                                        options, "%s", name);
641         b.shader->info.num_ubos = 1;
642         b.shader->num_outputs = 1;
643         b.shader->num_inputs = 1;
644         b.shader->num_uniforms = 1;
645 
646         const struct glsl_type *vec4 = glsl_vec4_type();
647 
648         const struct glsl_type *glsl_uint = glsl_uint_type();
649 
650         nir_variable *color_out =
651                 nir_variable_create(b.shader, nir_var_shader_out,
652                                     vec4, "f_color");
653         color_out->data.location = FRAG_RESULT_COLOR;
654 
655         nir_variable *pos_in =
656                 nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos");
657         pos_in->data.location = VARYING_SLOT_POS;
658         nir_def *pos = nir_load_var(&b, pos_in);
659 
660         nir_def *zero = nir_imm_int(&b, 0);
661         nir_def *one = nir_imm_int(&b, 1);
662         nir_def *two = nir_imm_int(&b, 2);
663         nir_def *six = nir_imm_int(&b, 6);
664         nir_def *seven = nir_imm_int(&b, 7);
665         nir_def *eight = nir_imm_int(&b, 8);
666 
667         nir_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
668         nir_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));
669 
670         nir_variable *stride_in =
671                 nir_variable_create(b.shader, nir_var_uniform, glsl_uint,
672                                     "sand8_stride");
673         nir_def *stride =
674                 nir_load_uniform(&b, 1, 32, zero,
675                                  .base = stride_in->data.driver_location,
676                                  .range = 4,
677                                  .dest_type = nir_type_uint32);
678 
679         nir_def *x_offset;
680         nir_def *y_offset;
681 
682         /* UIF tiled format is composed by UIF blocks, Each block has
683          * four 64 byte microtiles. Inside each microtile pixels are stored
684          * in raster format. But microtiles have different dimensions
685          * based in the bits per pixel of the image.
686          *
687          *   8bpp microtile dimensions are 8x8
688          *  16bpp microtile dimensions are 8x4
689          *  32bpp microtile dimensions are 4x4
690          *
691          * As we are reading and writing with 32bpp to optimize
692          * the number of texture operations during the blit, we need
693          * to adjust the offsets were we read and write as data will
694          * be later read using 8bpp (luma) and 16bpp (chroma).
695          *
696          * For chroma 8x4 16bpp raster order is compatible with 4x4
697          * 32bpp. In both layouts each line has 8*2 == 4*4 == 16 bytes.
698          * But luma 8x8 8bpp raster order is not compatible
699          * with 4x4 32bpp. 8bpp has 8 bytes per line, and 32bpp has
700          * 16 bytes per line. So if we read a 8bpp texture that was
701          * written as 32bpp texture. Bytes would be misplaced.
702          *
703          * inter/intra_utile_x_offsets takes care of mapping the offsets
704          * between microtiles to deal with this issue for luma planes.
705          */
706         if (cpp == 1) {
707                 nir_def *intra_utile_x_offset =
708                         nir_ishl(&b, nir_iand_imm(&b, x, 1), two);
709                 nir_def *inter_utile_x_offset =
710                         nir_ishl(&b, nir_iand_imm(&b, x, 60), one);
711                 nir_def *stripe_offset=
712                         nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 6),
713                                              stride),
714                                  seven);
715 
716                 x_offset = nir_iadd(&b, stripe_offset,
717                                         nir_iadd(&b, intra_utile_x_offset,
718                                                      inter_utile_x_offset));
719                 y_offset = nir_iadd(&b,
720                                     nir_ishl(&b, nir_iand_imm(&b, x, 2), six),
721                                     nir_ishl(&b, y, eight));
722         } else  {
723                 nir_def *stripe_offset=
724                         nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 5),
725                                                 stride),
726                                  seven);
727                 x_offset = nir_iadd(&b, stripe_offset,
728                                nir_ishl(&b, nir_iand_imm(&b, x, 31), two));
729                 y_offset = nir_ishl(&b, y, seven);
730         }
731         nir_def *ubo_offset = nir_iadd(&b, x_offset, y_offset);
732         nir_def *load =
733         nir_load_ubo(&b, 1, 32, zero, ubo_offset,
734                     .align_mul = 4,
735                     .align_offset = 0,
736                     .range_base = 0,
737                     .range = ~0);
738 
739         nir_def *output = nir_unpack_unorm_4x8(&b, load);
740 
741         nir_store_var(&b, color_out,
742                       output,
743                       0xF);
744 
745 
746         *cached_shader = pipe_shader_from_nir(pctx, b.shader);
747 
748         return *cached_shader;
749 }
750 
751 /**
752  * Turns NV12 with SAND8 format modifier from raster-order with interleaved
753  * luma and chroma 128-byte-wide-columns to tiled format for luma and chroma.
754  *
755  * This implementation is based on vc4_yuv_blit.
756  */
757 static void
v3d_sand8_blit(struct pipe_context * pctx,struct pipe_blit_info * info)758 v3d_sand8_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
759 {
760         struct v3d_context *v3d = v3d_context(pctx);
761         struct v3d_resource *src = v3d_resource(info->src.resource);
762         ASSERTED struct v3d_resource *dst = v3d_resource(info->dst.resource);
763 
764         if (!src->sand_col128_stride)
765                 return;
766         if (src->tiled)
767                 return;
768         if (src->base.format != PIPE_FORMAT_R8_UNORM &&
769             src->base.format != PIPE_FORMAT_R8G8_UNORM)
770                 return;
771         if (!(info->mask & PIPE_MASK_RGBA))
772                 return;
773 
774         assert(dst->base.format == src->base.format);
775         assert(dst->tiled);
776 
777         assert(info->src.box.x == 0 && info->dst.box.x == 0);
778         assert(info->src.box.y == 0 && info->dst.box.y == 0);
779         assert(info->src.box.width == info->dst.box.width);
780         assert(info->src.box.height == info->dst.box.height);
781 
782         v3d_blitter_save(v3d, info->render_condition_enable ?
783                          V3D_BLIT_COND : V3D_BLIT);
784 
785         struct pipe_surface dst_tmpl;
786         util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
787                                          info->dst.level, info->dst.box.z);
788         /* Although the src textures are cpp=1 or cpp=2, the dst texture
789          * uses a cpp=4 dst texture. So, all read/write texture ops will
790          * be done using 32-bit read and writes.
791          */
792         dst_tmpl.format = PIPE_FORMAT_R8G8B8A8_UNORM;
793         struct pipe_surface *dst_surf =
794                 pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
795         if (!dst_surf) {
796                 fprintf(stderr, "Failed to create YUV dst surface\n");
797                 util_blitter_unset_running_flag(v3d->blitter);
798                 return;
799         }
800 
801         uint32_t sand8_stride = src->sand_col128_stride;
802 
803         /* Adjust the dimensions of dst luma/chroma to match src
804          * size now we are using a cpp=4 format. Next dimension take into
805          * account the UIF microtile layouts.
806          */
807         dst_surf->width = align(dst_surf->width, 8) / 2;
808         if (src->cpp == 1)
809                 dst_surf->height /= 2;
810 
811         /* Set the constant buffer. */
812         struct pipe_constant_buffer cb_uniforms = {
813                 .user_buffer = &sand8_stride,
814                 .buffer_size = sizeof(sand8_stride),
815         };
816 
817         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false,
818                                   &cb_uniforms);
819         struct pipe_constant_buffer saved_fs_cb1 = { 0 };
820         pipe_resource_reference(&saved_fs_cb1.buffer,
821                                 v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1].buffer);
822         memcpy(&saved_fs_cb1, &v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1],
823                sizeof(struct pipe_constant_buffer));
824         struct pipe_constant_buffer cb_src = {
825                 .buffer = info->src.resource,
826                 .buffer_offset = src->slices[info->src.level].offset,
827                 .buffer_size = (src->bo->size -
828                                 src->slices[info->src.level].offset),
829         };
830         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false,
831                                   &cb_src);
832         /* Unbind the textures, to make sure we don't try to recurse into the
833          * shadow blit.
834          */
835         pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false, NULL);
836         pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
837 
838         util_blitter_custom_shader(v3d->blitter, dst_surf,
839                                    v3d_get_sand8_vs(pctx),
840                                    v3d_get_sand8_fs(pctx, src->cpp));
841 
842         util_blitter_restore_textures(v3d->blitter);
843         util_blitter_restore_constant_buffer_state(v3d->blitter);
844 
845         /* Restore cb1 (util_blitter doesn't handle this one). */
846         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, true,
847                                   &saved_fs_cb1);
848 
849         pipe_surface_reference(&dst_surf, NULL);
850 
851         info->mask &= ~PIPE_MASK_RGBA;
852 }
853 
854 
855 /**
856  * Creates the VS of the custom blit shader to convert YUV plane from
857  * the P030 format with BROADCOM_SAND_COL128 modifier to UIF tiled P010
858  * format.
859  * This vertex shader is mostly a pass-through VS.
860  */
861 static void *
v3d_get_sand30_vs(struct pipe_context * pctx)862 v3d_get_sand30_vs(struct pipe_context *pctx)
863 {
864         struct v3d_context *v3d = v3d_context(pctx);
865         struct pipe_screen *pscreen = pctx->screen;
866 
867         if (v3d->sand30_blit_vs)
868                 return v3d->sand30_blit_vs;
869 
870         const struct nir_shader_compiler_options *options =
871                 pscreen->get_compiler_options(pscreen,
872                                               PIPE_SHADER_IR_NIR,
873                                               PIPE_SHADER_VERTEX);
874 
875         nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
876                                                        options,
877                                                        "sand30_blit_vs");
878 
879         const struct glsl_type *vec4 = glsl_vec4_type();
880         nir_variable *pos_in = nir_variable_create(b.shader,
881                                                    nir_var_shader_in,
882                                                    vec4, "pos");
883 
884         nir_variable *pos_out = nir_variable_create(b.shader,
885                                                     nir_var_shader_out,
886                                                     vec4, "gl_Position");
887         pos_out->data.location = VARYING_SLOT_POS;
888         nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);
889 
890         v3d->sand30_blit_vs = pipe_shader_from_nir(pctx, b.shader);
891 
892         return v3d->sand30_blit_vs;
893 }
894 
895 /**
896  * Given an uvec2 value with rgb10a2 components, it extracts four 10-bit
897  * components, then converts them from unorm10 to unorm16 and returns them
898  * in an uvec4. The start parameter defines where the sequence of 4 values
899  * begins.
900  */
901 static nir_def *
extract_unorm_2xrgb10a2_component_to_4xunorm16(nir_builder * b,nir_def * value,nir_def * start)902 extract_unorm_2xrgb10a2_component_to_4xunorm16(nir_builder *b,
903                                                nir_def *value,
904                                                nir_def *start)
905 {
906         const unsigned mask = BITFIELD_MASK(10);
907 
908         nir_def *shiftw0 = nir_imul_imm(b, start, 10);
909         nir_def *word0 = nir_iand_imm(b, nir_channel(b, value, 0),
910                                           BITFIELD_MASK(30));
911         nir_def *finalword0 = nir_ushr(b, word0, shiftw0);
912         nir_def *word1 = nir_channel(b, value, 1);
913         nir_def *shiftw0tow1 = nir_isub_imm(b, 30, shiftw0);
914         nir_def *word1toword0 =  nir_ishl(b, word1, shiftw0tow1);
915         finalword0 = nir_ior(b, finalword0, word1toword0);
916         nir_def *finalword1 = nir_ushr(b, word1, shiftw0);
917 
918         nir_def *val0 = nir_ishl_imm(b, nir_iand_imm(b, finalword0,
919                                                          mask), 6);
920         nir_def *val1 = nir_ishr_imm(b, nir_iand_imm(b, finalword0,
921                                                          mask << 10), 4);
922         nir_def *val2 = nir_ishr_imm(b, nir_iand_imm(b, finalword0,
923                                                          mask << 20), 14);
924         nir_def *val3 = nir_ishl_imm(b, nir_iand_imm(b, finalword1,
925                                                          mask), 6);
926 
927         return nir_vec4(b, val0, val1, val2, val3);
928 }
929 
930 /**
931  * Creates the FS of the custom blit shader to convert YUV plane from
932  * the P030 format with BROADCOM_SAND_COL128 modifier to UIF tiled P10
933  * format a 16-bit representation per component.
934  *
935  * The result texture is equivalent to a chroma (cpp=4) or luma (cpp=2)
936  * plane for a P010 format without the SAND128 modifier.
937  */
938 static void *
v3d_get_sand30_fs(struct pipe_context * pctx)939 v3d_get_sand30_fs(struct pipe_context *pctx)
940 {
941         struct v3d_context *v3d = v3d_context(pctx);
942         struct pipe_screen *pscreen = pctx->screen;
943 
944         if (v3d->sand30_blit_fs)
945                 return  v3d->sand30_blit_fs;
946 
947         const struct nir_shader_compiler_options *options =
948                 pscreen->get_compiler_options(pscreen,
949                                               PIPE_SHADER_IR_NIR,
950                                               PIPE_SHADER_FRAGMENT);
951 
952         nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
953                                                        options,
954                                                        "sand30_blit_fs");
955         b.shader->info.num_ubos = 1;
956         b.shader->num_outputs = 1;
957         b.shader->num_inputs = 1;
958         b.shader->num_uniforms = 1;
959 
960         const struct glsl_type *vec4 = glsl_vec4_type();
961 
962         const struct glsl_type *glsl_uint = glsl_uint_type();
963         const struct glsl_type *glsl_uvec4 = glsl_vector_type(GLSL_TYPE_UINT,
964                                                               4);
965 
966         nir_variable *color_out = nir_variable_create(b.shader,
967                                                       nir_var_shader_out,
968                                                       glsl_uvec4, "f_color");
969         color_out->data.location = FRAG_RESULT_COLOR;
970 
971         nir_variable *pos_in =
972                 nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos");
973         pos_in->data.location = VARYING_SLOT_POS;
974         nir_def *pos = nir_load_var(&b, pos_in);
975 
976         nir_def *zero = nir_imm_int(&b, 0);
977         nir_def *three = nir_imm_int(&b, 3);
978 
979         /* With a SAND128 stripe, in 128-bytes with rgb10a2 format we have 96
980          * 10-bit values. So, it represents 96 pixels for Y plane and 48 pixels
981          * for UV frame, but as we are reading 4 10-bit-values at a time we
982          * will have 24 groups (pixels) of 4 10-bit values.
983          */
984         uint32_t pixels_stripe = 24;
985 
986         nir_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
987         nir_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));
988 
989         /* UIF tiled format is composed by UIF blocks. Each block has four 64
990          * byte microtiles. Inside each microtile pixels are stored in raster
991          * format. But microtiles have different dimensions based in the bits
992          * per pixel of the image.
993          *
994          *  16bpp microtile dimensions are 8x4
995          *  32bpp microtile dimensions are 4x4
996          *  64bpp microtile dimensions are 4x2
997          *
998          * As we are reading and writing with 64bpp to optimize the number of
999          * texture operations during the blit, we adjust the offsets so when
1000          * the microtile is sampled using the 16bpp (luma) and the 32bpp
1001          * (chroma) the expected pixels are in the correct position, that
1002          * would be different if we were using a 64bpp sampling.
1003          *
1004          * For luma 8x4 16bpp and chroma 4x4 32bpp luma raster order is
1005          * incompatible with 4x2 64bpp. 16bpp has 16 bytes per line, 32bpp has
1006          * also 16byte per line. But 64bpp has 32 bytes per line. So if we
1007          * read a 16bpp or 32bpp texture that was written as 64bpp texture,
1008          * pixels would be misplaced.
1009          *
1010          * inter/intra_utile_x_offsets takes care of mapping the offsets
1011          * between microtiles to deal with this issue for luma and chroma
1012          * planes.
1013          *
1014          * We reduce the luma and chroma planes to the same blit case
1015          * because 16bpp and 32bpp have compatible microtile raster layout.
1016          * So just doubling the width of the chroma plane before calling the
1017          * blit makes them equivalent.
1018          */
1019         nir_variable *stride_in =
1020                 nir_variable_create(b.shader, nir_var_uniform,
1021                                     glsl_uint, "sand30_stride");
1022         nir_def *stride =
1023                 nir_load_uniform(&b, 1, 32, zero,
1024                                  .base = stride_in->data.driver_location,
1025                                  .range = 4,
1026                                  .dest_type = nir_type_uint32);
1027 
1028         nir_def *real_x = nir_ior(&b, nir_iand_imm(&b, x, 1),
1029                                       nir_ishl_imm(&b,nir_ushr_imm(&b, x, 2),
1030                                       1));
1031         nir_def *x_pos_in_stripe = nir_umod_imm(&b, real_x, pixels_stripe);
1032         nir_def *component = nir_umod(&b, real_x, three);
1033         nir_def *intra_utile_x_offset = nir_ishl_imm(&b, component, 2);
1034 
1035         nir_def *inter_utile_x_offset =
1036                 nir_ishl_imm(&b, nir_udiv_imm(&b, x_pos_in_stripe, 3), 4);
1037 
1038         nir_def *stripe_offset=
1039                 nir_ishl_imm(&b,
1040                              nir_imul(&b,
1041                                       nir_udiv_imm(&b, real_x, pixels_stripe),
1042                                       stride),
1043                              7);
1044 
1045         nir_def *x_offset = nir_iadd(&b, stripe_offset,
1046                                          nir_iadd(&b, intra_utile_x_offset,
1047                                                   inter_utile_x_offset));
1048         nir_def *y_offset =
1049                 nir_iadd(&b, nir_ishl_imm(&b, nir_iand_imm(&b, x, 2), 6),
1050                          nir_ishl_imm(&b, y, 8));
1051         nir_def *ubo_offset = nir_iadd(&b, x_offset, y_offset);
1052 
1053         nir_def *load = nir_load_ubo(&b, 2, 32, zero, ubo_offset,
1054                                          .align_mul = 8,
1055                                          .align_offset = 0,
1056                                          .range_base = 0,
1057                                          .range = ~0);
1058         nir_def *output =
1059                 extract_unorm_2xrgb10a2_component_to_4xunorm16(&b, load,
1060                                                                component);
1061         nir_store_var(&b, color_out,
1062                       output,
1063                       0xf);
1064 
1065         v3d->sand30_blit_fs = pipe_shader_from_nir(pctx, b.shader);
1066 
1067         return v3d->sand30_blit_fs;
1068 }
1069 
1070 /**
1071  * Turns P030 with SAND30 format modifier from raster-order with interleaved
1072  * luma and chroma 128-byte-wide-columns to a P010 UIF tiled format for luma
1073  * and chroma.
1074  */
1075 static void
v3d_sand30_blit(struct pipe_context * pctx,struct pipe_blit_info * info)1076 v3d_sand30_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
1077 {
1078         struct v3d_context *v3d = v3d_context(pctx);
1079         struct v3d_resource *src = v3d_resource(info->src.resource);
1080         ASSERTED struct v3d_resource *dst = v3d_resource(info->dst.resource);
1081 
1082         if (!src->sand_col128_stride)
1083                 return;
1084         if (src->tiled)
1085                 return;
1086         if (src->base.format != PIPE_FORMAT_R16_UNORM &&
1087             src->base.format != PIPE_FORMAT_R16G16_UNORM)
1088                 return;
1089         if (!(info->mask & PIPE_MASK_RGBA))
1090                 return;
1091 
1092         assert(dst->base.format == src->base.format);
1093         assert(dst->tiled);
1094 
1095         assert(info->src.box.x == 0 && info->dst.box.x == 0);
1096         assert(info->src.box.y == 0 && info->dst.box.y == 0);
1097         assert(info->src.box.width == info->dst.box.width);
1098         assert(info->src.box.height == info->dst.box.height);
1099 
1100         v3d_blitter_save(v3d, info->render_condition_enable ?
1101                          V3D_BLIT_COND : V3D_BLIT);
1102 
1103         struct pipe_surface dst_tmpl;
1104         util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
1105                                          info->dst.level, info->dst.box.z);
1106 
1107         dst_tmpl.format = PIPE_FORMAT_R16G16B16A16_UINT;
1108 
1109         struct pipe_surface *dst_surf =
1110                 pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
1111         if (!dst_surf) {
1112                 fprintf(stderr, "Failed to create YUV dst surface\n");
1113                 util_blitter_unset_running_flag(v3d->blitter);
1114                 return;
1115         }
1116 
1117         uint32_t sand30_stride = src->sand_col128_stride;
1118 
1119         /* Adjust the dimensions of dst luma/chroma to match src
1120          * size now we are using a cpp=8 format. Next dimension take into
1121          * account the UIF microtile layouts.
1122          */
1123         dst_surf->height /= 2;
1124         dst_surf->width = align(dst_surf->width, 8);
1125         if (src->cpp == 2)
1126                 dst_surf->width /= 2;
1127         /* Set the constant buffer. */
1128         struct pipe_constant_buffer cb_uniforms = {
1129                 .user_buffer = &sand30_stride,
1130                 .buffer_size = sizeof(sand30_stride),
1131         };
1132 
1133         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false,
1134                                   &cb_uniforms);
1135 
1136         struct pipe_constant_buffer saved_fs_cb1 = { 0 };
1137         pipe_resource_reference(&saved_fs_cb1.buffer,
1138                                 v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1].buffer);
1139         memcpy(&saved_fs_cb1, &v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1],
1140                sizeof(struct pipe_constant_buffer));
1141         struct pipe_constant_buffer cb_src = {
1142                 .buffer = info->src.resource,
1143                 .buffer_offset = src->slices[info->src.level].offset,
1144                 .buffer_size = (src->bo->size -
1145                                 src->slices[info->src.level].offset),
1146         };
1147         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false,
1148                                   &cb_src);
1149         /* Unbind the textures, to make sure we don't try to recurse into the
1150          * shadow blit.
1151          */
1152         pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false,
1153                                 NULL);
1154         pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
1155 
1156         util_blitter_custom_shader(v3d->blitter, dst_surf,
1157                                    v3d_get_sand30_vs(pctx),
1158                                    v3d_get_sand30_fs(pctx));
1159 
1160         util_blitter_restore_textures(v3d->blitter);
1161         util_blitter_restore_constant_buffer_state(v3d->blitter);
1162 
1163         /* Restore cb1 (util_blitter doesn't handle this one). */
1164         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, true,
1165                                   &saved_fs_cb1);
1166         pipe_surface_reference(&dst_surf, NULL);
1167 
1168         info->mask &= ~PIPE_MASK_RGBA;
1169         return;
1170 }
1171 
1172 /* Optimal hardware path for blitting pixels.
1173  * Scaling, format conversion, up- and downsampling (resolve) are allowed.
1174  */
1175 void
v3d_blit(struct pipe_context * pctx,const struct pipe_blit_info * blit_info)1176 v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
1177 {
1178         struct v3d_context *v3d = v3d_context(pctx);
1179         struct pipe_blit_info info = *blit_info;
1180 
1181         if (info.render_condition_enable && !v3d_render_condition_check(v3d))
1182                 return;
1183 
1184         v3d_sand30_blit(pctx, &info);
1185 
1186         v3d_sand8_blit(pctx, &info);
1187 
1188         v3d_tlb_blit_fast(pctx, &info);
1189 
1190         v3d_tfu_blit(pctx, &info);
1191 
1192         v3d_tlb_blit(pctx, &info);
1193 
1194         v3d_stencil_blit(pctx, &info);
1195 
1196         v3d_render_blit(pctx, &info);
1197 
1198         /* Flush our blit jobs immediately.  They're unlikely to get reused by
1199          * normal drawing or other blits, and without flushing we can easily
1200          * run into unexpected OOMs when blits are used for a large series of
1201          * texture uploads before using the textures.
1202          */
1203         v3d_flush_jobs_writing_resource(v3d, info.dst.resource,
1204                                         V3D_FLUSH_DEFAULT, false);
1205 }
1206