1 /**************************************************************************
2 *
3 * Copyright 2019 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 * Authors: James Zhu <james.zhu<@amd.com>
27 *
28 **************************************************************************/
29
30 #include <assert.h>
31
32 #include "nir/nir_builder.h"
33 #include "vl_compositor_cs.h"
34
35 struct cs_viewport {
36 float scale_x;
37 float scale_y;
38 struct u_rect area;
39 float crop_x; /* src */
40 float crop_y;
41 int translate_x; /* dst */
42 int translate_y;
43 float sampler0_w;
44 float sampler0_h;
45 float clamp_x;
46 float clamp_y;
47 float chroma_clamp_x;
48 float chroma_clamp_y;
49 float chroma_offset_x;
50 float chroma_offset_y;
51 };
52
53 struct cs_shader {
54 nir_builder b;
55 const char *name;
56 bool array;
57 unsigned num_samplers;
58 nir_variable *samplers[3];
59 nir_variable *image;
60 nir_def *params[8];
61 nir_def *fone;
62 nir_def *fzero;
63 };
64
65 enum coords_flags {
66 COORDS_LUMA = 0x0,
67 COORDS_CHROMA = 0x1,
68 COORDS_CHROMA_OFFSET = 0x2,
69 };
70
cs_create_shader(struct vl_compositor * c,struct cs_shader * s)71 static nir_def *cs_create_shader(struct vl_compositor *c, struct cs_shader *s)
72 {
73 /*
74 #version 450
75
76 layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
77 layout (binding = 0) uniform sampler2DRect samplers[3]; // or sampler2DArray
78 layout (binding = 0) uniform image2D image;
79
80 layout (std140, binding = 0) uniform ubo
81 {
82 vec4 csc_mat[3]; // params[0-2]
83 float luma_min; // params[3].x
84 float luma_max; // params[3].y
85 vec2 scale; // params[3].zw
86 vec2 crop; // params[4].xy
87 ivec2 translate; // params[4].zw
88 vec2 sampler0_wh; // params[5].xy
89 vec2 subsample_ratio; // params[5].zw
90 vec2 coord_clamp; // params[6].xy
91 vec2 chroma_clamp; // params[6].zw
92 vec2 chroma_offset; // params[7].xy
93 };
94
95 void main()
96 {
97 ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
98 }
99 */
100 enum glsl_sampler_dim sampler_dim = s->array ? GLSL_SAMPLER_DIM_2D : GLSL_SAMPLER_DIM_RECT;
101 const struct glsl_type *sampler_type =
102 glsl_sampler_type(sampler_dim, /*is_shadow*/ false, s->array, GLSL_TYPE_FLOAT);
103 const struct glsl_type *image_type =
104 glsl_image_type(GLSL_SAMPLER_DIM_2D, /*is_array*/ false, GLSL_TYPE_FLOAT);
105 const nir_shader_compiler_options *options =
106 c->pipe->screen->get_compiler_options(c->pipe->screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);
107
108 s->b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "vl:%s", s->name);
109 nir_builder *b = &s->b;
110 b->shader->info.workgroup_size[0] = 8;
111 b->shader->info.workgroup_size[1] = 8;
112 b->shader->info.workgroup_size[2] = 1;
113 b->shader->info.num_ubos = 1;
114 b->shader->num_uniforms = ARRAY_SIZE(s->params);
115
116 nir_def *zero = nir_imm_int(b, 0);
117 for (unsigned i = 0; i < b->shader->num_uniforms; ++i)
118 s->params[i] = nir_load_ubo(b, 4, 32, zero, nir_imm_int(b, i * 16), .align_mul = 4, .range = ~0);
119
120 for (unsigned i = 0; i < s->num_samplers; ++i) {
121 s->samplers[i] = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "sampler");
122 s->samplers[i]->data.binding = i;
123 BITSET_SET(b->shader->info.textures_used, i);
124 BITSET_SET(b->shader->info.samplers_used, i);
125 }
126
127 s->image = nir_variable_create(b->shader, nir_var_image, image_type, "image");
128 s->image->data.binding = 0;
129 BITSET_SET(b->shader->info.images_used, 0);
130
131 s->fone = nir_imm_float(b, 1.0f);
132 s->fzero = nir_imm_float(b, 0.0f);
133
134 nir_def *block_ids = nir_load_workgroup_id(b);
135 nir_def *local_ids = nir_load_local_invocation_id(b);
136 return nir_iadd(b, nir_imul(b, block_ids, nir_imm_ivec3(b, 8, 8, 1)), local_ids);
137 }
138
cs_create_shader_state(struct vl_compositor * c,struct cs_shader * s)139 static void *cs_create_shader_state(struct vl_compositor *c, struct cs_shader *s)
140 {
141 c->pipe->screen->finalize_nir(c->pipe->screen, s->b.shader);
142
143 struct pipe_compute_state state = {0};
144 state.ir_type = PIPE_SHADER_IR_NIR;
145 state.prog = s->b.shader;
146
147 /* create compute shader */
148 return c->pipe->create_compute_state(c->pipe, &state);
149 }
150
cs_translate(struct cs_shader * s,nir_def * src)151 static inline nir_def *cs_translate(struct cs_shader *s, nir_def *src)
152 {
153 /*
154 return src.xy + params[4].zw;
155 */
156 nir_builder *b = &s->b;
157 return nir_iadd(b, src, nir_channels(b, s->params[4], 0x3 << 2));
158 }
159
cs_texture_offset(struct cs_shader * s,nir_def * src)160 static inline nir_def *cs_texture_offset(struct cs_shader *s, nir_def *src)
161 {
162 /*
163 return src.xy + 0.5;
164 */
165 nir_builder *b = &s->b;
166 return nir_fadd_imm(b, src, 0.5f);
167 }
168
cs_chroma_subsampling(struct cs_shader * s,nir_def * src)169 static inline nir_def *cs_chroma_subsampling(struct cs_shader *s, nir_def *src)
170 {
171 /*
172 return src.xy * params[5].zw;
173 */
174 nir_builder *b = &s->b;
175 return nir_fmul(b, src, nir_channels(b, s->params[5], 0x3 << 2));
176 }
177
cs_scale(struct cs_shader * s,nir_def * src)178 static inline nir_def *cs_scale(struct cs_shader *s, nir_def *src)
179 {
180 /*
181 return src.xy / params[3].zw;
182 */
183 nir_builder *b = &s->b;
184 return nir_fdiv(b, src, nir_channels(b, s->params[3], 0x3 << 2));
185 }
186
cs_luma_key(struct cs_shader * s,nir_def * src)187 static inline nir_def *cs_luma_key(struct cs_shader *s, nir_def *src)
188 {
189 /*
190 bool luma_min = params[3].x >= src;
191 bool luma_max = params[3].y < src;
192 return float(luma_min || luma_max);
193 */
194 nir_builder *b = &s->b;
195 nir_def *luma_min = nir_fge(b, nir_channel(b, s->params[3], 0), src);
196 nir_def *luma_max = nir_flt(b, nir_channel(b, s->params[3], 1), src);
197 return nir_b2f32(b, nir_ior(b, luma_min, luma_max));
198 }
199
cs_chroma_offset(struct cs_shader * s,nir_def * src,unsigned flags)200 static inline nir_def *cs_chroma_offset(struct cs_shader *s, nir_def *src, unsigned flags)
201 {
202 /*
203 vec2 offset = params[7].xy;
204 if (flags & COORDS_CHROMA)
205 return src.xy + offset;
206 return offset * -0.5 + src.xy;
207 */
208 nir_builder *b = &s->b;
209 nir_def *offset = nir_channels(b, s->params[7], 0x3);
210 if (flags & COORDS_CHROMA)
211 return nir_fadd(b, src, offset);
212 return nir_ffma_imm1(b, offset, -0.5f, src);
213 }
214
cs_clamp(struct cs_shader * s,nir_def * src,unsigned flags)215 static inline nir_def *cs_clamp(struct cs_shader *s, nir_def *src, unsigned flags)
216 {
217 /*
218 vec2 coord_max;
219 if (flags & COORDS_CHROMA)
220 coord_max = params[6].zw;
221 else
222 coord_max = params[6].xy;
223 return min(src.xy, coord_max);
224 */
225 nir_builder *b = &s->b;
226 nir_component_mask_t mask = flags & COORDS_CHROMA ? 0x3 << 2 : 0x3;
227 return nir_fmin(b, src, nir_channels(b, s->params[6], mask));
228 }
229
cs_normalize(struct cs_shader * s,nir_def * src,unsigned flags)230 static inline nir_def *cs_normalize(struct cs_shader *s, nir_def *src, unsigned flags)
231 {
232 /*
233 vec2 div = params[5].xy;
234 if (flags & COORDS_CHROMA)
235 div = cs_chroma_subsampling(div);
236 return src.xy / div;
237 */
238 nir_builder *b = &s->b;
239 nir_def *div = nir_channels(b, s->params[5], 0x3);
240 if (flags & COORDS_CHROMA)
241 div = cs_chroma_subsampling(s, div);
242 return nir_fdiv(b, src, div);
243 }
244
cs_crop(struct cs_shader * s,nir_def * src,unsigned flags)245 static inline nir_def *cs_crop(struct cs_shader *s, nir_def *src, unsigned flags)
246 {
247 /*
248 vec2 crop = params[4].xy;
249 if (flags & COORDS_CHROMA)
250 crop = cs_chroma_subsampling(crop);
251 return src.xy + crop;
252 */
253 nir_builder *b = &s->b;
254 nir_def *crop = nir_channels(b, s->params[4], 0x3);
255 if (flags & COORDS_CHROMA)
256 crop = cs_chroma_subsampling(s, crop);
257 return nir_fadd(b, src, crop);
258 }
259
cs_color_space_conversion(struct cs_shader * s,nir_def * src,unsigned comp)260 static inline nir_def *cs_color_space_conversion(struct cs_shader *s, nir_def *src, unsigned comp)
261 {
262 /*
263 return dot(src, params[comp]);
264 */
265 nir_builder *b = &s->b;
266 return nir_fdot4(b, src, s->params[comp]);
267 }
268
cs_fetch_texel(struct cs_shader * s,nir_def * coords,unsigned sampler)269 static inline nir_def *cs_fetch_texel(struct cs_shader *s, nir_def *coords, unsigned sampler)
270 {
271 /*
272 return texture(samplers[sampler], s->array ? coords.xyz : coords.xy);
273 */
274 nir_builder *b = &s->b;
275 nir_deref_instr *tex_deref = nir_build_deref_var(b, s->samplers[sampler]);
276 nir_component_mask_t mask = s->array ? 0x7 : 0x3;
277 return nir_tex_deref(b, tex_deref, tex_deref, nir_channels(b, coords, mask));
278 }
279
cs_image_store(struct cs_shader * s,nir_def * pos,nir_def * color)280 static inline void cs_image_store(struct cs_shader *s, nir_def *pos, nir_def *color)
281 {
282 /*
283 imageStore(image, pos.xy, color);
284 */
285 nir_builder *b = &s->b;
286 nir_def *zero = nir_imm_int(b, 0);
287 nir_def *undef32 = nir_undef(b, 1, 32);
288 pos = nir_pad_vector_imm_int(b, pos, 0, 4);
289 nir_image_deref_store(b, &nir_build_deref_var(b, s->image)->def, pos, undef32, color, zero);
290 }
291
cs_tex_coords(struct cs_shader * s,nir_def * coords,unsigned flags)292 static nir_def *cs_tex_coords(struct cs_shader *s, nir_def *coords, unsigned flags)
293 {
294 nir_builder *b = &s->b;
295
296 coords = nir_u2f32(b, coords);
297 coords = cs_texture_offset(s, coords);
298
299 if (flags & COORDS_CHROMA_OFFSET)
300 coords = cs_chroma_offset(s, coords, flags);
301
302 if (flags & COORDS_CHROMA)
303 coords = cs_chroma_subsampling(s, coords);
304
305 coords = cs_scale(s, coords);
306 coords = cs_crop(s, coords, flags);
307 coords = cs_clamp(s, coords, flags);
308
309 return coords;
310 }
311
create_video_buffer_shader(struct vl_compositor * c)312 static void *create_video_buffer_shader(struct vl_compositor *c)
313 {
314 struct cs_shader s = {
315 .name = "video_buffer",
316 .num_samplers = 3,
317 };
318 nir_builder *b = &s.b;
319
320 nir_def *ipos = cs_create_shader(c, &s);
321 nir_def *pos[2] = {
322 cs_tex_coords(&s, ipos, COORDS_LUMA),
323 cs_tex_coords(&s, ipos, COORDS_CHROMA | COORDS_CHROMA_OFFSET),
324 };
325
326 nir_def *col[3];
327 for (unsigned i = 0; i < 3; ++i)
328 col[i] = cs_fetch_texel(&s, pos[MIN2(i, 1)], i);
329
330 nir_def *alpha = cs_luma_key(&s, col[2]);
331
332 nir_def *color = nir_vec4(b, col[0], col[1], col[2], s.fone);
333 for (unsigned i = 0; i < 3; ++i)
334 col[i] = cs_color_space_conversion(&s, color, i);
335
336 color = nir_vec4(b, col[0], col[1], col[2], alpha);
337 cs_image_store(&s, cs_translate(&s, ipos), color);
338
339 return cs_create_shader_state(c, &s);
340 }
341
create_yuv_progressive_shader(struct vl_compositor * c,bool y)342 static void *create_yuv_progressive_shader(struct vl_compositor *c, bool y)
343 {
344 struct cs_shader s = {
345 .name = y ? "yuv_progressive_y" : "yuv_progressive_uv",
346 .num_samplers = 3,
347 };
348 nir_builder *b = &s.b;
349
350 nir_def *ipos = cs_create_shader(c, &s);
351 nir_def *pos = cs_tex_coords(&s, ipos, y ? COORDS_LUMA : COORDS_CHROMA);
352
353 nir_def *color;
354 if (y) {
355 color = nir_channel(b, cs_fetch_texel(&s, pos, 0), 0);
356 } else {
357 nir_def *col1 = cs_fetch_texel(&s, pos, 1);
358 nir_def *col2 = cs_fetch_texel(&s, pos, 2);
359 color = nir_vec2(b, col1, col2);
360 }
361
362 cs_image_store(&s, cs_translate(&s, ipos), color);
363
364 return cs_create_shader_state(c, &s);
365 }
366
create_rgb_yuv_shader(struct vl_compositor * c,bool y)367 static void *create_rgb_yuv_shader(struct vl_compositor *c, bool y)
368 {
369 struct cs_shader s = {
370 .name = y ? "rgb_yuv_y" : "rgb_yuv_uv",
371 .num_samplers = 1,
372 };
373 nir_builder *b = &s.b;
374
375 nir_def *ipos = cs_create_shader(c, &s);
376 nir_def *color = NULL;
377
378 if (y) {
379 nir_def *pos = cs_tex_coords(&s, ipos, COORDS_LUMA);
380 color = cs_fetch_texel(&s, pos, 0);
381 } else {
382 /*
383 vec2 pos[4];
384 pos[0] = vec2(ipos);
385 pos[0] = cs_texture_offset(pos[0]);
386 pos[0] = cs_chroma_offset(pos[0], COORDS_LUMA);
387
388 // Sample offset
389 pos[3] = pos[0] + vec2( 0.25, -0.25);
390 pos[2] = pos[0] + vec2(-0.25, 0.25);
391 pos[1] = pos[0] + vec2(-0.25, -0.25);
392 pos[0] = pos[0] + vec2( 0.25, 0.25);
393
394 vec4 col[4];
395 for (uint i = 0; i < 4; ++i) {
396 pos[i] = cs_scale(pos[i]);
397 pos[i] = cs_crop(pos[i], COORDS_LUMA);
398 pos[i] = cs_clamp(pos[i], COORDS_LUMA);
399 col[i] = texture(samp[0], pos[i]);
400 }
401 color = (col[0] + col[1] + col[2] + col[3]) * 0.25;
402 */
403 nir_def *pos[4];
404 pos[0] = nir_u2f32(b, ipos);
405 pos[0] = cs_texture_offset(&s, pos[0]);
406 pos[0] = cs_chroma_offset(&s, pos[0], COORDS_LUMA);
407
408 /* Sample offset */
409 nir_def *o_plus = nir_imm_float(b, 0.25f);
410 nir_def *o_minus = nir_imm_float(b, -0.25f);
411 pos[3] = nir_fadd(b, pos[0], nir_vec2(b, o_plus, o_minus));
412 pos[2] = nir_fadd(b, pos[0], nir_vec2(b, o_minus, o_plus));
413 pos[1] = nir_fadd(b, pos[0], nir_vec2(b, o_minus, o_minus));
414 pos[0] = nir_fadd(b, pos[0], nir_vec2(b, o_plus, o_plus));
415
416 for (unsigned i = 0; i < 4; ++i) {
417 pos[i] = cs_scale(&s, pos[i]);
418 pos[i] = cs_crop(&s, pos[i], COORDS_LUMA);
419 pos[i] = cs_clamp(&s, pos[i], COORDS_LUMA);
420
421 nir_def *c = cs_fetch_texel(&s, pos[i], 0);
422 color = color ? nir_fadd(b, color, c) : c;
423 }
424 color = nir_fmul_imm(b, color, 0.25f);
425 }
426
427 color = nir_vector_insert_imm(b, color, s.fone, 3);
428
429 if (y) {
430 color = cs_color_space_conversion(&s, color, 0);
431 } else {
432 nir_def *col1 = cs_color_space_conversion(&s, color, 1);
433 nir_def *col2 = cs_color_space_conversion(&s, color, 2);
434 color = nir_vec2(b, col1, col2);
435 }
436
437 cs_image_store(&s, cs_translate(&s, ipos), color);
438
439 return cs_create_shader_state(c, &s);
440 }
441
create_weave_shader(struct vl_compositor * c,bool rgb,bool y)442 static nir_def *create_weave_shader(struct vl_compositor *c, bool rgb, bool y)
443 {
444 struct cs_shader s = {
445 .name = rgb ? "weave" : y ? "yuv_weave_y" : "yuv_weave_uv",
446 .array = true,
447 .num_samplers = 3,
448 };
449 nir_builder *b = &s.b;
450
451 nir_def *ipos = cs_create_shader(c, &s);
452
453 /*
454 vec2 top_y = cs_texture_offset(vec2(ipos));
455 vec2 top_uv = rgb ? cs_chroma_offset(top_y, COORDS_CHROMA) : top_y;
456 top_uv = cs_chroma_subsampling(top_uv);
457 vec2 down_y = top_y;
458 vec2 down_uv = top_uv;
459
460 top_y = cs_crop(cs_scale(top_y), COORDS_LUMA);
461 top_uv = cs_crop(cs_scale(top_uv), COORDS_CHROMA);
462 down_y = cs_crop(cs_scale(down_y), COORDS_LUMA);
463 down_uv = cs_crop(cs_scale(down_uv), COORDS_CHROMA);
464
465 // Weave offset
466 top_y = top_y + vec2(0.0, 0.25);
467 top_uv = top_uv + vec2(0.0, 0.25);
468 down_y = down_y + vec2(0.0, -0.25);
469 down_uv = down_uv + vec2(0.0, -0.25);
470
471 // Texture layer
472 vec3 tex_layer = vec3(top_y.y, top_uv.y, top_uv.y);
473 tex_layer = tex_layer + round(tex_layer) * -1.0;
474 tex_layer = abs(tex_layer) * 2.0;
475
476 top_y = cs_clamp(top_y, COORDS_LUMA);
477 top_y = cs_normalize(top_y, COORDS_LUMA);
478 top_uv = cs_clamp(top_uv, COORDS_CHROMA);
479 top_uv = cs_normalize(top_uv, COORDS_CHROMA);
480 down_y = cs_clamp(down_y, COORDS_LUMA);
481 down_y = cs_normalize(down_y, COORDS_LUMA);
482 down_uv = cs_clamp(down_uv, COORDS_CHROMA);
483 down_uv = cs_normalize(down_uv, COORDS_CHROMA);
484
485 vec4 top_col, down_col;
486 top_col.x = texture(samp[0], vec3(top_y, 0.0)).x;
487 top_col.y = texture(samp[1], vec3(top_uv, 0.0)).x;
488 top_col.z = texture(samp[2], vec3(top_uv, 0.0)).x;
489 top_col.w = 1.0;
490 down_col.x = texture(samp[0], vec3(down_y, 1.0)).x;
491 down_col.y = texture(samp[1], vec3(down_uv, 1.0)).x;
492 down_col.z = texture(samp[2], vec3(down_uv, 1.0)).x;
493 down_col.w = 1.0;
494
495 vec4 color = mix(down_col, top_col, tex_layer);
496 */
497 nir_def *pos[4];
498 /* Top Y */
499 pos[0] = nir_u2f32(b, ipos);
500 pos[0] = cs_texture_offset(&s, pos[0]);
501 /* Top UV */
502 pos[1] = rgb ? cs_chroma_offset(&s, pos[0], COORDS_CHROMA) : pos[0];
503 pos[1] = cs_chroma_subsampling(&s, pos[1]);
504 /* Down Y */
505 pos[2] = pos[0];
506 /* Down UV */
507 pos[3] = pos[1];
508
509 /* Weave offset */
510 nir_def *o_plus = nir_imm_vec2(b, 0.0f, 0.25f);
511 nir_def *o_minus = nir_imm_vec2(b, 0.0f, -0.25f);
512 for (unsigned i = 0; i < 4; ++i) {
513 pos[i] = cs_scale(&s, pos[i]);
514 pos[i] = cs_crop(&s, pos[i], i % 2 ? COORDS_CHROMA : COORDS_LUMA);
515 pos[i] = nir_fadd(b, pos[i], i < 2 ? o_plus : o_minus);
516 }
517
518 /* Texture layer */
519 nir_def *tex_layer = nir_vec3(b,
520 nir_channel(b, pos[0], 1),
521 nir_channel(b, pos[1], 1),
522 nir_channel(b, pos[1], 1));
523 tex_layer = nir_fadd(b, tex_layer,
524 nir_fneg(b, nir_fround_even(b, tex_layer)));
525 tex_layer = nir_fabs(b, tex_layer);
526 tex_layer = nir_fmul_imm(b, tex_layer, 2.0f);
527
528 nir_def *col[6];
529 for (unsigned i = 0; i < 4; ++i) {
530 bool top = i < 2;
531 unsigned j = top ? 0 : 3;
532 unsigned flags = i % 2 ? COORDS_CHROMA : COORDS_LUMA;
533 pos[i] = cs_clamp(&s, pos[i], flags);
534 pos[i] = cs_normalize(&s, pos[i], flags);
535 pos[i] = nir_vector_insert_imm(b, pos[i],
536 top ? s.fzero : s.fone, 2);
537 if (flags == COORDS_LUMA) {
538 col[j] = cs_fetch_texel(&s, pos[i], 0);
539 } else {
540 col[j + 1] = cs_fetch_texel(&s, pos[i], 1);
541 col[j + 2] = cs_fetch_texel(&s, pos[i], 2);
542 }
543 }
544
545 nir_def *color_top = nir_vec4(b, col[0], col[1], col[2], s.fone);
546 nir_def *color_down = nir_vec4(b, col[3], col[4], col[5], s.fone);
547 nir_def *color = nir_flrp(b, color_down, color_top, tex_layer);
548
549 if (rgb) {
550 nir_def *alpha = cs_luma_key(&s, nir_channel(b, color, 2));
551 for (unsigned i = 0; i < 3; ++i)
552 col[i] = cs_color_space_conversion(&s, color, i);
553 color = nir_vec4(b, col[0], col[1], col[2], alpha);
554 } else if (y) {
555 color = nir_channel(b, color, 0);
556 } else {
557 nir_def *col1 = nir_channel(b, color, 1);
558 nir_def *col2 = nir_channel(b, color, 2);
559 color = nir_vec2(b, col1, col2);
560 }
561
562 cs_image_store(&s, cs_translate(&s, ipos), color);
563
564 return cs_create_shader_state(c, &s);
565 }
566
567 static void
cs_launch(struct vl_compositor * c,void * cs,const struct u_rect * draw_area)568 cs_launch(struct vl_compositor *c,
569 void *cs,
570 const struct u_rect *draw_area)
571 {
572 struct pipe_context *ctx = c->pipe;
573 unsigned width, height;
574
575 width = draw_area->x1 - draw_area->x0;
576 height = draw_area->y1 - draw_area->y0;
577
578 /* Bind the image */
579 struct pipe_image_view image = {0};
580 image.resource = c->fb_state.cbufs[0]->texture;
581 image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE;
582 image.format = c->fb_state.cbufs[0]->texture->format;
583
584 ctx->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, 0, &image);
585
586 /* Bind compute shader */
587 ctx->bind_compute_state(ctx, cs);
588
589 /* Dispatch compute */
590 struct pipe_grid_info info = {0};
591 info.block[0] = 8;
592 info.last_block[0] = width % info.block[0];
593 info.block[1] = 8;
594 info.last_block[1] = height % info.block[1];
595 info.block[2] = 1;
596 info.grid[0] = DIV_ROUND_UP(width, info.block[0]);
597 info.grid[1] = DIV_ROUND_UP(height, info.block[1]);
598 info.grid[2] = 1;
599
600 ctx->launch_grid(ctx, &info);
601
602 /* Make the result visible to all clients. */
603 ctx->memory_barrier(ctx, PIPE_BARRIER_ALL);
604
605 }
606
607 static inline struct u_rect
calc_drawn_area(struct vl_compositor_state * s,struct vl_compositor_layer * layer)608 calc_drawn_area(struct vl_compositor_state *s,
609 struct vl_compositor_layer *layer)
610 {
611 struct vertex2f tl, br;
612 struct u_rect result;
613
614 assert(s && layer);
615
616 tl = layer->dst.tl;
617 br = layer->dst.br;
618
619 /* Scale */
620 result.x0 = tl.x * layer->viewport.scale[0] + layer->viewport.translate[0];
621 result.y0 = tl.y * layer->viewport.scale[1] + layer->viewport.translate[1];
622 result.x1 = br.x * layer->viewport.scale[0] + layer->viewport.translate[0];
623 result.y1 = br.y * layer->viewport.scale[1] + layer->viewport.translate[1];
624
625 /* Clip */
626 result.x0 = MAX2(result.x0, s->scissor.minx);
627 result.y0 = MAX2(result.y0, s->scissor.miny);
628 result.x1 = MIN2(result.x1, s->scissor.maxx);
629 result.y1 = MIN2(result.y1, s->scissor.maxy);
630 return result;
631 }
632
633 static inline float
chroma_offset_x(unsigned location)634 chroma_offset_x(unsigned location)
635 {
636 if (location & VL_COMPOSITOR_LOCATION_HORIZONTAL_LEFT)
637 return 0.5f;
638 else
639 return 0.0f;
640 }
641
642 static inline float
chroma_offset_y(unsigned location)643 chroma_offset_y(unsigned location)
644 {
645 if (location & VL_COMPOSITOR_LOCATION_VERTICAL_TOP)
646 return 0.5f;
647 else if (location & VL_COMPOSITOR_LOCATION_VERTICAL_BOTTOM)
648 return -0.5f;
649 else
650 return 0.0f;
651 }
652
653 static bool
set_viewport(struct vl_compositor_state * s,struct cs_viewport * drawn,struct pipe_sampler_view ** samplers)654 set_viewport(struct vl_compositor_state *s,
655 struct cs_viewport *drawn,
656 struct pipe_sampler_view **samplers)
657 {
658 struct pipe_transfer *buf_transfer;
659
660 assert(s && drawn);
661
662 void *ptr = pipe_buffer_map(s->pipe, s->shader_params,
663 PIPE_MAP_WRITE | PIPE_MAP_DISCARD_WHOLE_RESOURCE,
664 &buf_transfer);
665
666 if (!ptr)
667 return false;
668
669 memcpy(ptr, &s->csc_matrix, sizeof(vl_csc_matrix));
670
671 float *ptr_float = (float *)ptr;
672 ptr_float += sizeof(vl_csc_matrix) / sizeof(float);
673 *ptr_float++ = s->luma_min;
674 *ptr_float++ = s->luma_max;
675 *ptr_float++ = drawn->scale_x;
676 *ptr_float++ = drawn->scale_y;
677 *ptr_float++ = drawn->crop_x;
678 *ptr_float++ = drawn->crop_y;
679
680 int *ptr_int = (int *)ptr_float;
681 *ptr_int++ = drawn->translate_x;
682 *ptr_int++ = drawn->translate_y;
683
684 ptr_float = (float *)ptr_int;
685 *ptr_float++ = drawn->sampler0_w;
686 *ptr_float++ = drawn->sampler0_h;
687
688 /* compute_shader_video_buffer uses pixel coordinates based on the
689 * Y sampler dimensions. If U/V are using separate planes and are
690 * subsampled, we need to scale the coordinates */
691 if (samplers[1]) {
692 float h_ratio = samplers[1]->texture->width0 /
693 (float) samplers[0]->texture->width0;
694 *ptr_float++ = h_ratio;
695 float v_ratio = samplers[1]->texture->height0 /
696 (float) samplers[0]->texture->height0;
697 *ptr_float++ = v_ratio;
698 }
699 else {
700 *ptr_float++ = 1.0f;
701 *ptr_float++ = 1.0f;
702 }
703
704
705 *ptr_float++ = drawn->clamp_x;
706 *ptr_float++ = drawn->clamp_y;
707 *ptr_float++ = drawn->chroma_clamp_x;
708 *ptr_float++ = drawn->chroma_clamp_y;
709 *ptr_float++ = drawn->chroma_offset_x;
710 *ptr_float++ = drawn->chroma_offset_y;
711
712 pipe_buffer_unmap(s->pipe, buf_transfer);
713
714 return true;
715 }
716
717 static void
draw_layers(struct vl_compositor * c,struct vl_compositor_state * s,struct u_rect * dirty)718 draw_layers(struct vl_compositor *c,
719 struct vl_compositor_state *s,
720 struct u_rect *dirty)
721 {
722 unsigned i;
723
724 assert(c);
725
726 for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
727 if (s->used_layers & (1 << i)) {
728 struct vl_compositor_layer *layer = &s->layers[i];
729 struct pipe_sampler_view **samplers = &layer->sampler_views[0];
730 unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
731 struct pipe_sampler_view *sampler1 = samplers[1] ? samplers[1] : samplers[0];
732 struct cs_viewport drawn;
733
734 drawn.area = calc_drawn_area(s, layer);
735 drawn.scale_x = layer->viewport.scale[0] /
736 ((float)layer->sampler_views[0]->texture->width0 *
737 (layer->src.br.x - layer->src.tl.x));
738 drawn.scale_y = layer->viewport.scale[1] /
739 ((float)layer->sampler_views[0]->texture->height0 *
740 (layer->src.br.y - layer->src.tl.y));
741 drawn.crop_x = layer->src.tl.x * layer->sampler_views[0]->texture->width0;
742 drawn.translate_x = layer->viewport.translate[0];
743 drawn.crop_y = layer->src.tl.y * layer->sampler_views[0]->texture->height0;
744 drawn.translate_y = layer->viewport.translate[1];
745 drawn.sampler0_w = (float)layer->sampler_views[0]->texture->width0;
746 drawn.sampler0_h = (float)layer->sampler_views[0]->texture->height0;
747 drawn.clamp_x = (float)samplers[0]->texture->width0 * layer->src.br.x - 0.5;
748 drawn.clamp_y = (float)samplers[0]->texture->height0 * layer->src.br.y - 0.5;
749 drawn.chroma_clamp_x = (float)sampler1->texture->width0 * layer->src.br.x - 0.5;
750 drawn.chroma_clamp_y = (float)sampler1->texture->height0 * layer->src.br.y - 0.5;
751 drawn.chroma_offset_x = chroma_offset_x(s->chroma_location);
752 drawn.chroma_offset_y = chroma_offset_y(s->chroma_location);
753 set_viewport(s, &drawn, samplers);
754
755 c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
756 num_sampler_views, layer->samplers);
757 c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0,
758 num_sampler_views, 0, false, samplers);
759
760 cs_launch(c, layer->cs, &(drawn.area));
761
762 /* Unbind. */
763 c->pipe->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 0, 1, NULL);
764 c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, false, NULL);
765 c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_FRAGMENT, 0, 0,
766 num_sampler_views, false, NULL);
767 c->pipe->bind_compute_state(c->pipe, NULL);
768 c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
769 num_sampler_views, NULL);
770
771 if (dirty) {
772 struct u_rect drawn = calc_drawn_area(s, layer);
773 dirty->x0 = MIN2(drawn.x0, dirty->x0);
774 dirty->y0 = MIN2(drawn.y0, dirty->y0);
775 dirty->x1 = MAX2(drawn.x1, dirty->x1);
776 dirty->y1 = MAX2(drawn.y1, dirty->y1);
777 }
778 }
779 }
780 }
781
782 void
vl_compositor_cs_render(struct vl_compositor_state * s,struct vl_compositor * c,struct pipe_surface * dst_surface,struct u_rect * dirty_area,bool clear_dirty)783 vl_compositor_cs_render(struct vl_compositor_state *s,
784 struct vl_compositor *c,
785 struct pipe_surface *dst_surface,
786 struct u_rect *dirty_area,
787 bool clear_dirty)
788 {
789 assert(c && s);
790 assert(dst_surface);
791
792 c->fb_state.width = dst_surface->width;
793 c->fb_state.height = dst_surface->height;
794 c->fb_state.cbufs[0] = dst_surface;
795
796 if (!s->scissor_valid) {
797 s->scissor.minx = 0;
798 s->scissor.miny = 0;
799 s->scissor.maxx = dst_surface->width;
800 s->scissor.maxy = dst_surface->height;
801 }
802
803 if (clear_dirty && dirty_area &&
804 (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) {
805
806 c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color,
807 0, 0, dst_surface->width, dst_surface->height, false);
808 dirty_area->x0 = dirty_area->y0 = VL_COMPOSITOR_MAX_DIRTY;
809 dirty_area->x1 = dirty_area->y1 = VL_COMPOSITOR_MIN_DIRTY;
810 }
811
812 pipe_set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, s->shader_params);
813
814 draw_layers(c, s, dirty_area);
815 }
816
vl_compositor_cs_init_shaders(struct vl_compositor * c)817 bool vl_compositor_cs_init_shaders(struct vl_compositor *c)
818 {
819 assert(c);
820
821 c->cs_video_buffer = create_video_buffer_shader(c);
822 if (!c->cs_video_buffer) {
823 debug_printf("Unable to create video_buffer compute shader.\n");
824 return false;
825 }
826
827 c->cs_weave_rgb = create_weave_shader(c, true, false);
828 if (!c->cs_weave_rgb) {
829 debug_printf("Unable to create weave_rgb compute shader.\n");
830 return false;
831 }
832
833 c->cs_yuv.weave.y = create_weave_shader(c, false, true);
834 c->cs_yuv.weave.uv = create_weave_shader(c, false, false);
835 c->cs_yuv.progressive.y = create_yuv_progressive_shader(c, true);
836 c->cs_yuv.progressive.uv = create_yuv_progressive_shader(c, false);
837 if (!c->cs_yuv.weave.y || !c->cs_yuv.weave.uv) {
838 debug_printf("Unable to create YCbCr i-to-YCbCr p deint compute shader.\n");
839 return false;
840 }
841 if (!c->cs_yuv.progressive.y || !c->cs_yuv.progressive.uv) {
842 debug_printf("Unable to create YCbCr p-to-NV12 compute shader.\n");
843 return false;
844 }
845
846 c->cs_rgb_yuv.y = create_rgb_yuv_shader(c, true);
847 c->cs_rgb_yuv.uv = create_rgb_yuv_shader(c, false);
848 if (!c->cs_rgb_yuv.y || !c->cs_rgb_yuv.uv) {
849 debug_printf("Unable to create RGB-to-NV12 compute shader.\n");
850 return false;
851 }
852
853 return true;
854 }
855
vl_compositor_cs_cleanup_shaders(struct vl_compositor * c)856 void vl_compositor_cs_cleanup_shaders(struct vl_compositor *c)
857 {
858 assert(c);
859
860 if (c->cs_video_buffer)
861 c->pipe->delete_compute_state(c->pipe, c->cs_video_buffer);
862 if (c->cs_weave_rgb)
863 c->pipe->delete_compute_state(c->pipe, c->cs_weave_rgb);
864 if (c->cs_yuv.weave.y)
865 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.y);
866 if (c->cs_yuv.weave.uv)
867 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.uv);
868 if (c->cs_yuv.progressive.y)
869 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.y);
870 if (c->cs_yuv.progressive.uv)
871 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.uv);
872 if (c->cs_rgb_yuv.y)
873 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.y);
874 if (c->cs_rgb_yuv.uv)
875 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.uv);
876 }
877