xref: /aosp_15_r20/external/mesa3d/src/gallium/auxiliary/vl/vl_compositor_cs.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2019 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  * Authors: James Zhu <james.zhu<@amd.com>
27  *
28  **************************************************************************/
29 
30 #include <assert.h>
31 
32 #include "nir/nir_builder.h"
33 #include "vl_compositor_cs.h"
34 
35 struct cs_viewport {
36    float scale_x;
37    float scale_y;
38    struct u_rect area;
39    float crop_x; /* src */
40    float crop_y;
41    int translate_x; /* dst */
42    int translate_y;
43    float sampler0_w;
44    float sampler0_h;
45    float clamp_x;
46    float clamp_y;
47    float chroma_clamp_x;
48    float chroma_clamp_y;
49    float chroma_offset_x;
50    float chroma_offset_y;
51 };
52 
53 struct cs_shader {
54    nir_builder b;
55    const char *name;
56    bool array;
57    unsigned num_samplers;
58    nir_variable *samplers[3];
59    nir_variable *image;
60    nir_def *params[8];
61    nir_def *fone;
62    nir_def *fzero;
63 };
64 
65 enum coords_flags {
66    COORDS_LUMA          = 0x0,
67    COORDS_CHROMA        = 0x1,
68    COORDS_CHROMA_OFFSET = 0x2,
69 };
70 
cs_create_shader(struct vl_compositor * c,struct cs_shader * s)71 static nir_def *cs_create_shader(struct vl_compositor *c, struct cs_shader *s)
72 {
73    /*
74       #version 450
75 
76       layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
77       layout (binding = 0) uniform sampler2DRect samplers[3]; // or sampler2DArray
78       layout (binding = 0) uniform image2D image;
79 
80       layout (std140, binding = 0) uniform ubo
81       {
82          vec4 csc_mat[3];      // params[0-2]
83          float luma_min;       // params[3].x
84          float luma_max;       // params[3].y
85          vec2 scale;           // params[3].zw
86          vec2 crop;            // params[4].xy
87          ivec2 translate;      // params[4].zw
88          vec2 sampler0_wh;     // params[5].xy
89          vec2 subsample_ratio; // params[5].zw
90          vec2 coord_clamp;     // params[6].xy
91          vec2 chroma_clamp;    // params[6].zw
92          vec2 chroma_offset;   // params[7].xy
93       };
94 
95       void main()
96       {
97          ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
98       }
99    */
100    enum glsl_sampler_dim sampler_dim = s->array ? GLSL_SAMPLER_DIM_2D : GLSL_SAMPLER_DIM_RECT;
101    const struct glsl_type *sampler_type =
102       glsl_sampler_type(sampler_dim, /*is_shadow*/ false, s->array, GLSL_TYPE_FLOAT);
103    const struct glsl_type *image_type =
104       glsl_image_type(GLSL_SAMPLER_DIM_2D, /*is_array*/ false, GLSL_TYPE_FLOAT);
105    const nir_shader_compiler_options *options =
106       c->pipe->screen->get_compiler_options(c->pipe->screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);
107 
108    s->b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "vl:%s", s->name);
109    nir_builder *b = &s->b;
110    b->shader->info.workgroup_size[0] = 8;
111    b->shader->info.workgroup_size[1] = 8;
112    b->shader->info.workgroup_size[2] = 1;
113    b->shader->info.num_ubos = 1;
114    b->shader->num_uniforms = ARRAY_SIZE(s->params);
115 
116    nir_def *zero = nir_imm_int(b, 0);
117    for (unsigned i = 0; i < b->shader->num_uniforms; ++i)
118       s->params[i] = nir_load_ubo(b, 4, 32, zero, nir_imm_int(b, i * 16), .align_mul = 4, .range = ~0);
119 
120    for (unsigned i = 0; i < s->num_samplers; ++i) {
121       s->samplers[i] = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "sampler");
122       s->samplers[i]->data.binding = i;
123       BITSET_SET(b->shader->info.textures_used, i);
124       BITSET_SET(b->shader->info.samplers_used, i);
125    }
126 
127    s->image = nir_variable_create(b->shader, nir_var_image, image_type, "image");
128    s->image->data.binding = 0;
129    BITSET_SET(b->shader->info.images_used, 0);
130 
131    s->fone = nir_imm_float(b, 1.0f);
132    s->fzero = nir_imm_float(b, 0.0f);
133 
134    nir_def *block_ids = nir_load_workgroup_id(b);
135    nir_def *local_ids = nir_load_local_invocation_id(b);
136    return nir_iadd(b, nir_imul(b, block_ids, nir_imm_ivec3(b, 8, 8, 1)), local_ids);
137 }
138 
cs_create_shader_state(struct vl_compositor * c,struct cs_shader * s)139 static void *cs_create_shader_state(struct vl_compositor *c, struct cs_shader *s)
140 {
141    c->pipe->screen->finalize_nir(c->pipe->screen, s->b.shader);
142 
143    struct pipe_compute_state state = {0};
144    state.ir_type = PIPE_SHADER_IR_NIR;
145    state.prog = s->b.shader;
146 
147    /* create compute shader */
148    return c->pipe->create_compute_state(c->pipe, &state);
149 }
150 
cs_translate(struct cs_shader * s,nir_def * src)151 static inline nir_def *cs_translate(struct cs_shader *s, nir_def *src)
152 {
153    /*
154       return src.xy + params[4].zw;
155    */
156    nir_builder *b = &s->b;
157    return nir_iadd(b, src, nir_channels(b, s->params[4], 0x3 << 2));
158 }
159 
cs_texture_offset(struct cs_shader * s,nir_def * src)160 static inline nir_def *cs_texture_offset(struct cs_shader *s, nir_def *src)
161 {
162    /*
163       return src.xy + 0.5;
164    */
165    nir_builder *b = &s->b;
166    return nir_fadd_imm(b, src, 0.5f);
167 }
168 
cs_chroma_subsampling(struct cs_shader * s,nir_def * src)169 static inline nir_def *cs_chroma_subsampling(struct cs_shader *s, nir_def *src)
170 {
171    /*
172       return src.xy * params[5].zw;
173    */
174    nir_builder *b = &s->b;
175    return nir_fmul(b, src, nir_channels(b, s->params[5], 0x3 << 2));
176 }
177 
cs_scale(struct cs_shader * s,nir_def * src)178 static inline nir_def *cs_scale(struct cs_shader *s, nir_def *src)
179 {
180    /*
181       return src.xy / params[3].zw;
182    */
183    nir_builder *b = &s->b;
184    return nir_fdiv(b, src, nir_channels(b, s->params[3], 0x3 << 2));
185 }
186 
cs_luma_key(struct cs_shader * s,nir_def * src)187 static inline nir_def *cs_luma_key(struct cs_shader *s, nir_def *src)
188 {
189    /*
190       bool luma_min = params[3].x >= src;
191       bool luma_max = params[3].y < src;
192       return float(luma_min || luma_max);
193    */
194    nir_builder *b = &s->b;
195    nir_def *luma_min = nir_fge(b, nir_channel(b, s->params[3], 0), src);
196    nir_def *luma_max = nir_flt(b, nir_channel(b, s->params[3], 1), src);
197    return nir_b2f32(b, nir_ior(b, luma_min, luma_max));
198 }
199 
cs_chroma_offset(struct cs_shader * s,nir_def * src,unsigned flags)200 static inline nir_def *cs_chroma_offset(struct cs_shader *s, nir_def *src, unsigned flags)
201 {
202    /*
203       vec2 offset = params[7].xy;
204       if (flags & COORDS_CHROMA)
205          return src.xy + offset;
206       return offset * -0.5 + src.xy;
207    */
208    nir_builder *b = &s->b;
209    nir_def *offset = nir_channels(b, s->params[7], 0x3);
210    if (flags & COORDS_CHROMA)
211       return nir_fadd(b, src, offset);
212    return nir_ffma_imm1(b, offset, -0.5f, src);
213 }
214 
cs_clamp(struct cs_shader * s,nir_def * src,unsigned flags)215 static inline nir_def *cs_clamp(struct cs_shader *s, nir_def *src, unsigned flags)
216 {
217    /*
218       vec2 coord_max;
219       if (flags & COORDS_CHROMA)
220          coord_max = params[6].zw;
221       else
222          coord_max = params[6].xy;
223       return min(src.xy, coord_max);
224    */
225    nir_builder *b = &s->b;
226    nir_component_mask_t mask = flags & COORDS_CHROMA ? 0x3 << 2 : 0x3;
227    return nir_fmin(b, src, nir_channels(b, s->params[6], mask));
228 }
229 
cs_normalize(struct cs_shader * s,nir_def * src,unsigned flags)230 static inline nir_def *cs_normalize(struct cs_shader *s, nir_def *src, unsigned flags)
231 {
232    /*
233       vec2 div = params[5].xy;
234       if (flags & COORDS_CHROMA)
235          div = cs_chroma_subsampling(div);
236       return src.xy / div;
237    */
238    nir_builder *b = &s->b;
239    nir_def *div = nir_channels(b, s->params[5], 0x3);
240    if (flags & COORDS_CHROMA)
241       div = cs_chroma_subsampling(s, div);
242    return nir_fdiv(b, src, div);
243 }
244 
cs_crop(struct cs_shader * s,nir_def * src,unsigned flags)245 static inline nir_def *cs_crop(struct cs_shader *s, nir_def *src, unsigned flags)
246 {
247    /*
248       vec2 crop = params[4].xy;
249       if (flags & COORDS_CHROMA)
250          crop = cs_chroma_subsampling(crop);
251       return src.xy + crop;
252    */
253    nir_builder *b = &s->b;
254    nir_def *crop = nir_channels(b, s->params[4], 0x3);
255    if (flags & COORDS_CHROMA)
256       crop = cs_chroma_subsampling(s, crop);
257    return nir_fadd(b, src, crop);
258 }
259 
cs_color_space_conversion(struct cs_shader * s,nir_def * src,unsigned comp)260 static inline nir_def *cs_color_space_conversion(struct cs_shader *s, nir_def *src, unsigned comp)
261 {
262    /*
263       return dot(src, params[comp]);
264    */
265    nir_builder *b = &s->b;
266    return nir_fdot4(b, src, s->params[comp]);
267 }
268 
cs_fetch_texel(struct cs_shader * s,nir_def * coords,unsigned sampler)269 static inline nir_def *cs_fetch_texel(struct cs_shader *s, nir_def *coords, unsigned sampler)
270 {
271    /*
272       return texture(samplers[sampler], s->array ? coords.xyz : coords.xy);
273    */
274    nir_builder *b = &s->b;
275    nir_deref_instr *tex_deref = nir_build_deref_var(b, s->samplers[sampler]);
276    nir_component_mask_t mask = s->array ? 0x7 : 0x3;
277    return nir_tex_deref(b, tex_deref, tex_deref, nir_channels(b, coords, mask));
278 }
279 
cs_image_store(struct cs_shader * s,nir_def * pos,nir_def * color)280 static inline void cs_image_store(struct cs_shader *s, nir_def *pos, nir_def *color)
281 {
282    /*
283       imageStore(image, pos.xy, color);
284    */
285    nir_builder *b = &s->b;
286    nir_def *zero = nir_imm_int(b, 0);
287    nir_def *undef32 = nir_undef(b, 1, 32);
288    pos = nir_pad_vector_imm_int(b, pos, 0, 4);
289    nir_image_deref_store(b, &nir_build_deref_var(b, s->image)->def, pos, undef32, color, zero);
290 }
291 
cs_tex_coords(struct cs_shader * s,nir_def * coords,unsigned flags)292 static nir_def *cs_tex_coords(struct cs_shader *s, nir_def *coords, unsigned flags)
293 {
294    nir_builder *b = &s->b;
295 
296    coords = nir_u2f32(b, coords);
297    coords = cs_texture_offset(s, coords);
298 
299    if (flags & COORDS_CHROMA_OFFSET)
300       coords = cs_chroma_offset(s, coords, flags);
301 
302    if (flags & COORDS_CHROMA)
303       coords = cs_chroma_subsampling(s, coords);
304 
305    coords = cs_scale(s, coords);
306    coords = cs_crop(s, coords, flags);
307    coords = cs_clamp(s, coords, flags);
308 
309    return coords;
310 }
311 
create_video_buffer_shader(struct vl_compositor * c)312 static void *create_video_buffer_shader(struct vl_compositor *c)
313 {
314    struct cs_shader s = {
315       .name = "video_buffer",
316       .num_samplers = 3,
317    };
318    nir_builder *b = &s.b;
319 
320    nir_def *ipos = cs_create_shader(c, &s);
321    nir_def *pos[2] = {
322       cs_tex_coords(&s, ipos, COORDS_LUMA),
323       cs_tex_coords(&s, ipos, COORDS_CHROMA | COORDS_CHROMA_OFFSET),
324    };
325 
326    nir_def *col[3];
327    for (unsigned i = 0; i < 3; ++i)
328       col[i] = cs_fetch_texel(&s, pos[MIN2(i, 1)], i);
329 
330    nir_def *alpha = cs_luma_key(&s, col[2]);
331 
332    nir_def *color = nir_vec4(b, col[0], col[1], col[2], s.fone);
333    for (unsigned i = 0; i < 3; ++i)
334       col[i] = cs_color_space_conversion(&s, color, i);
335 
336    color = nir_vec4(b, col[0], col[1], col[2], alpha);
337    cs_image_store(&s, cs_translate(&s, ipos), color);
338 
339    return cs_create_shader_state(c, &s);
340 }
341 
create_yuv_progressive_shader(struct vl_compositor * c,bool y)342 static void *create_yuv_progressive_shader(struct vl_compositor *c, bool y)
343 {
344    struct cs_shader s = {
345       .name = y ? "yuv_progressive_y" : "yuv_progressive_uv",
346       .num_samplers = 3,
347    };
348    nir_builder *b = &s.b;
349 
350    nir_def *ipos = cs_create_shader(c, &s);
351    nir_def *pos = cs_tex_coords(&s, ipos, y ? COORDS_LUMA : COORDS_CHROMA);
352 
353    nir_def *color;
354    if (y) {
355       color = nir_channel(b, cs_fetch_texel(&s, pos, 0), 0);
356    } else {
357       nir_def *col1 = cs_fetch_texel(&s, pos, 1);
358       nir_def *col2 = cs_fetch_texel(&s, pos, 2);
359       color = nir_vec2(b, col1, col2);
360    }
361 
362    cs_image_store(&s, cs_translate(&s, ipos), color);
363 
364    return cs_create_shader_state(c, &s);
365 }
366 
create_rgb_yuv_shader(struct vl_compositor * c,bool y)367 static void *create_rgb_yuv_shader(struct vl_compositor *c, bool y)
368 {
369    struct cs_shader s = {
370       .name = y ? "rgb_yuv_y" : "rgb_yuv_uv",
371       .num_samplers = 1,
372    };
373    nir_builder *b = &s.b;
374 
375    nir_def *ipos = cs_create_shader(c, &s);
376    nir_def *color = NULL;
377 
378    if (y) {
379       nir_def *pos = cs_tex_coords(&s, ipos, COORDS_LUMA);
380       color = cs_fetch_texel(&s, pos, 0);
381    } else {
382       /*
383          vec2 pos[4];
384          pos[0] = vec2(ipos);
385          pos[0] = cs_texture_offset(pos[0]);
386          pos[0] = cs_chroma_offset(pos[0], COORDS_LUMA);
387 
388          // Sample offset
389          pos[3] = pos[0] + vec2( 0.25, -0.25);
390          pos[2] = pos[0] + vec2(-0.25,  0.25);
391          pos[1] = pos[0] + vec2(-0.25, -0.25);
392          pos[0] = pos[0] + vec2( 0.25,  0.25);
393 
394          vec4 col[4];
395          for (uint i = 0; i < 4; ++i) {
396             pos[i] = cs_scale(pos[i]);
397             pos[i] = cs_crop(pos[i], COORDS_LUMA);
398             pos[i] = cs_clamp(pos[i], COORDS_LUMA);
399             col[i] = texture(samp[0], pos[i]);
400          }
401          color = (col[0] + col[1] + col[2] + col[3]) * 0.25;
402       */
403       nir_def *pos[4];
404       pos[0] = nir_u2f32(b, ipos);
405       pos[0] = cs_texture_offset(&s, pos[0]);
406       pos[0] = cs_chroma_offset(&s, pos[0], COORDS_LUMA);
407 
408       /* Sample offset */
409       nir_def *o_plus = nir_imm_float(b, 0.25f);
410       nir_def *o_minus = nir_imm_float(b, -0.25f);
411       pos[3] = nir_fadd(b, pos[0], nir_vec2(b, o_plus, o_minus));
412       pos[2] = nir_fadd(b, pos[0], nir_vec2(b, o_minus, o_plus));
413       pos[1] = nir_fadd(b, pos[0], nir_vec2(b, o_minus, o_minus));
414       pos[0] = nir_fadd(b, pos[0], nir_vec2(b, o_plus, o_plus));
415 
416       for (unsigned i = 0; i < 4; ++i) {
417          pos[i] = cs_scale(&s, pos[i]);
418          pos[i] = cs_crop(&s, pos[i], COORDS_LUMA);
419          pos[i] = cs_clamp(&s, pos[i], COORDS_LUMA);
420 
421          nir_def *c = cs_fetch_texel(&s, pos[i], 0);
422          color = color ? nir_fadd(b, color, c) : c;
423       }
424       color = nir_fmul_imm(b, color, 0.25f);
425    }
426 
427    color = nir_vector_insert_imm(b, color, s.fone, 3);
428 
429    if (y) {
430       color = cs_color_space_conversion(&s, color, 0);
431    } else {
432       nir_def *col1 = cs_color_space_conversion(&s, color, 1);
433       nir_def *col2 = cs_color_space_conversion(&s, color, 2);
434       color = nir_vec2(b, col1, col2);
435    }
436 
437    cs_image_store(&s, cs_translate(&s, ipos), color);
438 
439    return cs_create_shader_state(c, &s);
440 }
441 
create_weave_shader(struct vl_compositor * c,bool rgb,bool y)442 static nir_def *create_weave_shader(struct vl_compositor *c, bool rgb, bool y)
443 {
444    struct cs_shader s = {
445       .name = rgb ? "weave" : y ? "yuv_weave_y" : "yuv_weave_uv",
446       .array = true,
447       .num_samplers = 3,
448    };
449    nir_builder *b = &s.b;
450 
451    nir_def *ipos = cs_create_shader(c, &s);
452 
453    /*
454       vec2 top_y = cs_texture_offset(vec2(ipos));
455       vec2 top_uv = rgb ? cs_chroma_offset(top_y, COORDS_CHROMA) : top_y;
456       top_uv = cs_chroma_subsampling(top_uv);
457       vec2 down_y = top_y;
458       vec2 down_uv = top_uv;
459 
460       top_y = cs_crop(cs_scale(top_y), COORDS_LUMA);
461       top_uv = cs_crop(cs_scale(top_uv), COORDS_CHROMA);
462       down_y = cs_crop(cs_scale(down_y), COORDS_LUMA);
463       down_uv = cs_crop(cs_scale(down_uv), COORDS_CHROMA);
464 
465       // Weave offset
466       top_y = top_y + vec2(0.0, 0.25);
467       top_uv = top_uv + vec2(0.0, 0.25);
468       down_y = down_y + vec2(0.0, -0.25);
469       down_uv = down_uv + vec2(0.0, -0.25);
470 
471       // Texture layer
472       vec3 tex_layer = vec3(top_y.y, top_uv.y, top_uv.y);
473       tex_layer = tex_layer + round(tex_layer) * -1.0;
474       tex_layer = abs(tex_layer) * 2.0;
475 
476       top_y = cs_clamp(top_y, COORDS_LUMA);
477       top_y = cs_normalize(top_y, COORDS_LUMA);
478       top_uv = cs_clamp(top_uv, COORDS_CHROMA);
479       top_uv = cs_normalize(top_uv, COORDS_CHROMA);
480       down_y = cs_clamp(down_y, COORDS_LUMA);
481       down_y = cs_normalize(down_y, COORDS_LUMA);
482       down_uv = cs_clamp(down_uv, COORDS_CHROMA);
483       down_uv = cs_normalize(down_uv, COORDS_CHROMA);
484 
485       vec4 top_col, down_col;
486       top_col.x = texture(samp[0], vec3(top_y, 0.0)).x;
487       top_col.y = texture(samp[1], vec3(top_uv, 0.0)).x;
488       top_col.z = texture(samp[2], vec3(top_uv, 0.0)).x;
489       top_col.w = 1.0;
490       down_col.x = texture(samp[0], vec3(down_y, 1.0)).x;
491       down_col.y = texture(samp[1], vec3(down_uv, 1.0)).x;
492       down_col.z = texture(samp[2], vec3(down_uv, 1.0)).x;
493       down_col.w = 1.0;
494 
495       vec4 color = mix(down_col, top_col, tex_layer);
496    */
497    nir_def *pos[4];
498    /* Top Y */
499    pos[0] = nir_u2f32(b, ipos);
500    pos[0] = cs_texture_offset(&s, pos[0]);
501    /* Top UV */
502    pos[1] = rgb ? cs_chroma_offset(&s, pos[0], COORDS_CHROMA) : pos[0];
503    pos[1] = cs_chroma_subsampling(&s, pos[1]);
504    /* Down Y */
505    pos[2] = pos[0];
506    /* Down UV */
507    pos[3] = pos[1];
508 
509    /* Weave offset */
510    nir_def *o_plus = nir_imm_vec2(b, 0.0f, 0.25f);
511    nir_def *o_minus = nir_imm_vec2(b, 0.0f, -0.25f);
512    for (unsigned i = 0; i < 4; ++i) {
513       pos[i] = cs_scale(&s, pos[i]);
514       pos[i] = cs_crop(&s, pos[i], i % 2 ? COORDS_CHROMA : COORDS_LUMA);
515       pos[i] = nir_fadd(b, pos[i], i < 2 ? o_plus : o_minus);
516    }
517 
518    /* Texture layer */
519    nir_def *tex_layer = nir_vec3(b,
520                                  nir_channel(b, pos[0], 1),
521                                  nir_channel(b, pos[1], 1),
522                                  nir_channel(b, pos[1], 1));
523    tex_layer = nir_fadd(b, tex_layer,
524                         nir_fneg(b, nir_fround_even(b, tex_layer)));
525    tex_layer = nir_fabs(b, tex_layer);
526    tex_layer = nir_fmul_imm(b, tex_layer, 2.0f);
527 
528    nir_def *col[6];
529    for (unsigned i = 0; i < 4; ++i) {
530       bool top = i < 2;
531       unsigned j = top ? 0 : 3;
532       unsigned flags = i % 2 ? COORDS_CHROMA : COORDS_LUMA;
533       pos[i] = cs_clamp(&s, pos[i], flags);
534       pos[i] = cs_normalize(&s, pos[i], flags);
535       pos[i] = nir_vector_insert_imm(b, pos[i],
536                                      top ? s.fzero : s.fone, 2);
537       if (flags == COORDS_LUMA) {
538          col[j] = cs_fetch_texel(&s, pos[i], 0);
539       } else {
540          col[j + 1] = cs_fetch_texel(&s, pos[i], 1);
541          col[j + 2] = cs_fetch_texel(&s, pos[i], 2);
542       }
543    }
544 
545    nir_def *color_top = nir_vec4(b, col[0], col[1], col[2], s.fone);
546    nir_def *color_down = nir_vec4(b, col[3], col[4], col[5], s.fone);
547    nir_def *color = nir_flrp(b, color_down, color_top, tex_layer);
548 
549    if (rgb) {
550       nir_def *alpha = cs_luma_key(&s, nir_channel(b, color, 2));
551       for (unsigned i = 0; i < 3; ++i)
552          col[i] = cs_color_space_conversion(&s, color, i);
553       color = nir_vec4(b, col[0], col[1], col[2], alpha);
554    } else if (y) {
555       color = nir_channel(b, color, 0);
556    } else {
557       nir_def *col1 = nir_channel(b, color, 1);
558       nir_def *col2 = nir_channel(b, color, 2);
559       color = nir_vec2(b, col1, col2);
560    }
561 
562    cs_image_store(&s, cs_translate(&s, ipos), color);
563 
564    return cs_create_shader_state(c, &s);
565 }
566 
567 static void
cs_launch(struct vl_compositor * c,void * cs,const struct u_rect * draw_area)568 cs_launch(struct vl_compositor *c,
569           void                 *cs,
570           const struct u_rect  *draw_area)
571 {
572    struct pipe_context *ctx = c->pipe;
573    unsigned width, height;
574 
575    width = draw_area->x1 - draw_area->x0;
576    height = draw_area->y1 - draw_area->y0;
577 
578    /* Bind the image */
579    struct pipe_image_view image = {0};
580    image.resource = c->fb_state.cbufs[0]->texture;
581    image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE;
582    image.format = c->fb_state.cbufs[0]->texture->format;
583 
584    ctx->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, 0, &image);
585 
586    /* Bind compute shader */
587    ctx->bind_compute_state(ctx, cs);
588 
589    /* Dispatch compute */
590    struct pipe_grid_info info = {0};
591    info.block[0] = 8;
592    info.last_block[0] = width % info.block[0];
593    info.block[1] = 8;
594    info.last_block[1] = height % info.block[1];
595    info.block[2] = 1;
596    info.grid[0] = DIV_ROUND_UP(width, info.block[0]);
597    info.grid[1] = DIV_ROUND_UP(height, info.block[1]);
598    info.grid[2] = 1;
599 
600    ctx->launch_grid(ctx, &info);
601 
602    /* Make the result visible to all clients. */
603    ctx->memory_barrier(ctx, PIPE_BARRIER_ALL);
604 
605 }
606 
607 static inline struct u_rect
calc_drawn_area(struct vl_compositor_state * s,struct vl_compositor_layer * layer)608 calc_drawn_area(struct vl_compositor_state *s,
609                 struct vl_compositor_layer *layer)
610 {
611    struct vertex2f tl, br;
612    struct u_rect result;
613 
614    assert(s && layer);
615 
616    tl = layer->dst.tl;
617    br = layer->dst.br;
618 
619    /* Scale */
620    result.x0 = tl.x * layer->viewport.scale[0] + layer->viewport.translate[0];
621    result.y0 = tl.y * layer->viewport.scale[1] + layer->viewport.translate[1];
622    result.x1 = br.x * layer->viewport.scale[0] + layer->viewport.translate[0];
623    result.y1 = br.y * layer->viewport.scale[1] + layer->viewport.translate[1];
624 
625    /* Clip */
626    result.x0 = MAX2(result.x0, s->scissor.minx);
627    result.y0 = MAX2(result.y0, s->scissor.miny);
628    result.x1 = MIN2(result.x1, s->scissor.maxx);
629    result.y1 = MIN2(result.y1, s->scissor.maxy);
630    return result;
631 }
632 
633 static inline float
chroma_offset_x(unsigned location)634 chroma_offset_x(unsigned location)
635 {
636    if (location & VL_COMPOSITOR_LOCATION_HORIZONTAL_LEFT)
637       return 0.5f;
638    else
639       return 0.0f;
640 }
641 
642 static inline float
chroma_offset_y(unsigned location)643 chroma_offset_y(unsigned location)
644 {
645    if (location & VL_COMPOSITOR_LOCATION_VERTICAL_TOP)
646       return 0.5f;
647    else if (location & VL_COMPOSITOR_LOCATION_VERTICAL_BOTTOM)
648       return -0.5f;
649    else
650       return 0.0f;
651 }
652 
653 static bool
set_viewport(struct vl_compositor_state * s,struct cs_viewport * drawn,struct pipe_sampler_view ** samplers)654 set_viewport(struct vl_compositor_state *s,
655              struct cs_viewport         *drawn,
656              struct pipe_sampler_view **samplers)
657 {
658    struct pipe_transfer *buf_transfer;
659 
660    assert(s && drawn);
661 
662    void *ptr = pipe_buffer_map(s->pipe, s->shader_params,
663                                PIPE_MAP_WRITE | PIPE_MAP_DISCARD_WHOLE_RESOURCE,
664                                &buf_transfer);
665 
666    if (!ptr)
667      return false;
668 
669    memcpy(ptr, &s->csc_matrix, sizeof(vl_csc_matrix));
670 
671    float *ptr_float = (float *)ptr;
672    ptr_float += sizeof(vl_csc_matrix) / sizeof(float);
673    *ptr_float++ = s->luma_min;
674    *ptr_float++ = s->luma_max;
675    *ptr_float++ = drawn->scale_x;
676    *ptr_float++ = drawn->scale_y;
677    *ptr_float++ = drawn->crop_x;
678    *ptr_float++ = drawn->crop_y;
679 
680    int *ptr_int = (int *)ptr_float;
681    *ptr_int++ = drawn->translate_x;
682    *ptr_int++ = drawn->translate_y;
683 
684    ptr_float = (float *)ptr_int;
685    *ptr_float++ = drawn->sampler0_w;
686    *ptr_float++ = drawn->sampler0_h;
687 
688    /* compute_shader_video_buffer uses pixel coordinates based on the
689     * Y sampler dimensions. If U/V are using separate planes and are
690     * subsampled, we need to scale the coordinates */
691    if (samplers[1]) {
692       float h_ratio = samplers[1]->texture->width0 /
693                      (float) samplers[0]->texture->width0;
694       *ptr_float++ = h_ratio;
695       float v_ratio = samplers[1]->texture->height0 /
696                      (float) samplers[0]->texture->height0;
697       *ptr_float++ = v_ratio;
698    }
699    else {
700       *ptr_float++ = 1.0f;
701       *ptr_float++ = 1.0f;
702    }
703 
704 
705    *ptr_float++ = drawn->clamp_x;
706    *ptr_float++ = drawn->clamp_y;
707    *ptr_float++ = drawn->chroma_clamp_x;
708    *ptr_float++ = drawn->chroma_clamp_y;
709    *ptr_float++ = drawn->chroma_offset_x;
710    *ptr_float++ = drawn->chroma_offset_y;
711 
712    pipe_buffer_unmap(s->pipe, buf_transfer);
713 
714    return true;
715 }
716 
717 static void
draw_layers(struct vl_compositor * c,struct vl_compositor_state * s,struct u_rect * dirty)718 draw_layers(struct vl_compositor       *c,
719             struct vl_compositor_state *s,
720             struct u_rect              *dirty)
721 {
722    unsigned i;
723 
724    assert(c);
725 
726    for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
727       if (s->used_layers & (1 << i)) {
728          struct vl_compositor_layer *layer = &s->layers[i];
729          struct pipe_sampler_view **samplers = &layer->sampler_views[0];
730          unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
731          struct pipe_sampler_view *sampler1 = samplers[1] ? samplers[1] : samplers[0];
732          struct cs_viewport drawn;
733 
734          drawn.area = calc_drawn_area(s, layer);
735          drawn.scale_x = layer->viewport.scale[0] /
736             ((float)layer->sampler_views[0]->texture->width0 *
737              (layer->src.br.x - layer->src.tl.x));
738          drawn.scale_y  = layer->viewport.scale[1] /
739             ((float)layer->sampler_views[0]->texture->height0 *
740              (layer->src.br.y - layer->src.tl.y));
741          drawn.crop_x = layer->src.tl.x * layer->sampler_views[0]->texture->width0;
742          drawn.translate_x = layer->viewport.translate[0];
743          drawn.crop_y = layer->src.tl.y * layer->sampler_views[0]->texture->height0;
744          drawn.translate_y = layer->viewport.translate[1];
745          drawn.sampler0_w = (float)layer->sampler_views[0]->texture->width0;
746          drawn.sampler0_h = (float)layer->sampler_views[0]->texture->height0;
747          drawn.clamp_x = (float)samplers[0]->texture->width0 * layer->src.br.x - 0.5;
748          drawn.clamp_y = (float)samplers[0]->texture->height0 * layer->src.br.y - 0.5;
749          drawn.chroma_clamp_x = (float)sampler1->texture->width0 * layer->src.br.x - 0.5;
750          drawn.chroma_clamp_y = (float)sampler1->texture->height0 * layer->src.br.y - 0.5;
751          drawn.chroma_offset_x = chroma_offset_x(s->chroma_location);
752          drawn.chroma_offset_y = chroma_offset_y(s->chroma_location);
753          set_viewport(s, &drawn, samplers);
754 
755          c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
756                         num_sampler_views, layer->samplers);
757          c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0,
758                         num_sampler_views, 0, false, samplers);
759 
760          cs_launch(c, layer->cs, &(drawn.area));
761 
762          /* Unbind. */
763          c->pipe->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 0, 1, NULL);
764          c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, false, NULL);
765          c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_FRAGMENT, 0, 0,
766                         num_sampler_views, false, NULL);
767          c->pipe->bind_compute_state(c->pipe, NULL);
768          c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
769                         num_sampler_views, NULL);
770 
771          if (dirty) {
772             struct u_rect drawn = calc_drawn_area(s, layer);
773             dirty->x0 = MIN2(drawn.x0, dirty->x0);
774             dirty->y0 = MIN2(drawn.y0, dirty->y0);
775             dirty->x1 = MAX2(drawn.x1, dirty->x1);
776             dirty->y1 = MAX2(drawn.y1, dirty->y1);
777          }
778       }
779    }
780 }
781 
782 void
vl_compositor_cs_render(struct vl_compositor_state * s,struct vl_compositor * c,struct pipe_surface * dst_surface,struct u_rect * dirty_area,bool clear_dirty)783 vl_compositor_cs_render(struct vl_compositor_state *s,
784                         struct vl_compositor       *c,
785                         struct pipe_surface        *dst_surface,
786                         struct u_rect              *dirty_area,
787                         bool                        clear_dirty)
788 {
789    assert(c && s);
790    assert(dst_surface);
791 
792    c->fb_state.width = dst_surface->width;
793    c->fb_state.height = dst_surface->height;
794    c->fb_state.cbufs[0] = dst_surface;
795 
796    if (!s->scissor_valid) {
797       s->scissor.minx = 0;
798       s->scissor.miny = 0;
799       s->scissor.maxx = dst_surface->width;
800       s->scissor.maxy = dst_surface->height;
801    }
802 
803    if (clear_dirty && dirty_area &&
804        (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) {
805 
806       c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color,
807                        0, 0, dst_surface->width, dst_surface->height, false);
808       dirty_area->x0 = dirty_area->y0 = VL_COMPOSITOR_MAX_DIRTY;
809       dirty_area->x1 = dirty_area->y1 = VL_COMPOSITOR_MIN_DIRTY;
810    }
811 
812    pipe_set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, s->shader_params);
813 
814    draw_layers(c, s, dirty_area);
815 }
816 
vl_compositor_cs_init_shaders(struct vl_compositor * c)817 bool vl_compositor_cs_init_shaders(struct vl_compositor *c)
818 {
819         assert(c);
820 
821         c->cs_video_buffer = create_video_buffer_shader(c);
822         if (!c->cs_video_buffer) {
823                 debug_printf("Unable to create video_buffer compute shader.\n");
824                 return false;
825         }
826 
827         c->cs_weave_rgb = create_weave_shader(c, true, false);
828         if (!c->cs_weave_rgb) {
829                 debug_printf("Unable to create weave_rgb compute shader.\n");
830                 return false;
831         }
832 
833         c->cs_yuv.weave.y = create_weave_shader(c, false, true);
834         c->cs_yuv.weave.uv = create_weave_shader(c, false, false);
835         c->cs_yuv.progressive.y = create_yuv_progressive_shader(c, true);
836         c->cs_yuv.progressive.uv = create_yuv_progressive_shader(c, false);
837         if (!c->cs_yuv.weave.y || !c->cs_yuv.weave.uv) {
838                 debug_printf("Unable to create YCbCr i-to-YCbCr p deint compute shader.\n");
839                 return false;
840         }
841         if (!c->cs_yuv.progressive.y || !c->cs_yuv.progressive.uv) {
842                 debug_printf("Unable to create YCbCr p-to-NV12 compute shader.\n");
843                 return false;
844         }
845 
846         c->cs_rgb_yuv.y = create_rgb_yuv_shader(c, true);
847         c->cs_rgb_yuv.uv = create_rgb_yuv_shader(c, false);
848         if (!c->cs_rgb_yuv.y || !c->cs_rgb_yuv.uv) {
849                 debug_printf("Unable to create RGB-to-NV12 compute shader.\n");
850                 return false;
851         }
852 
853         return true;
854 }
855 
vl_compositor_cs_cleanup_shaders(struct vl_compositor * c)856 void vl_compositor_cs_cleanup_shaders(struct vl_compositor *c)
857 {
858         assert(c);
859 
860         if (c->cs_video_buffer)
861                 c->pipe->delete_compute_state(c->pipe, c->cs_video_buffer);
862         if (c->cs_weave_rgb)
863                 c->pipe->delete_compute_state(c->pipe, c->cs_weave_rgb);
864         if (c->cs_yuv.weave.y)
865                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.y);
866         if (c->cs_yuv.weave.uv)
867                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.uv);
868         if (c->cs_yuv.progressive.y)
869                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.y);
870         if (c->cs_yuv.progressive.uv)
871                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.uv);
872         if (c->cs_rgb_yuv.y)
873                 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.y);
874         if (c->cs_rgb_yuv.uv)
875                 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.uv);
876 }
877