1 /*
2 * Copyright © 2021 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "broadcom/common/v3d_macros.h"
26 #include "broadcom/cle/v3dx_pack.h"
27 #include "broadcom/compiler/v3d_compiler.h"
28
29 static uint8_t
blend_factor(VkBlendFactor factor,bool dst_alpha_one,bool * needs_constants)30 blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants)
31 {
32 switch (factor) {
33 case VK_BLEND_FACTOR_ZERO:
34 case VK_BLEND_FACTOR_ONE:
35 case VK_BLEND_FACTOR_SRC_COLOR:
36 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
37 case VK_BLEND_FACTOR_DST_COLOR:
38 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
39 case VK_BLEND_FACTOR_SRC_ALPHA:
40 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
41 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
42 return factor;
43 case VK_BLEND_FACTOR_CONSTANT_COLOR:
44 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
45 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
46 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
47 *needs_constants = true;
48 return factor;
49 case VK_BLEND_FACTOR_DST_ALPHA:
50 return dst_alpha_one ? V3D_BLEND_FACTOR_ONE :
51 V3D_BLEND_FACTOR_DST_ALPHA;
52 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
53 return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO :
54 V3D_BLEND_FACTOR_INV_DST_ALPHA;
55 case VK_BLEND_FACTOR_SRC1_COLOR:
56 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
57 case VK_BLEND_FACTOR_SRC1_ALPHA:
58 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
59 unreachable("Invalid blend factor: dual source blending not supported.");
60 default:
61 unreachable("Unknown blend factor.");
62 }
63 }
64
65 static void
pack_blend(struct v3dv_pipeline * pipeline,const VkPipelineColorBlendStateCreateInfo * cb_info)66 pack_blend(struct v3dv_pipeline *pipeline,
67 const VkPipelineColorBlendStateCreateInfo *cb_info)
68 {
69 /* By default, we are not enabling blending and all color channel writes are
70 * enabled. Color write enables are independent of whether blending is
71 * enabled or not.
72 *
73 * Vulkan specifies color write masks so that bits set correspond to
74 * enabled channels. Our hardware does it the other way around.
75 */
76 pipeline->blend.enables = 0;
77 pipeline->blend.color_write_masks = 0; /* All channels enabled */
78
79 if (!cb_info)
80 return;
81
82 const struct vk_render_pass_state *ri = &pipeline->rendering_info;
83 if (ri->color_attachment_count == 0)
84 return;
85
86 assert(ri->color_attachment_count == cb_info->attachmentCount);
87 pipeline->blend.needs_color_constants = false;
88 uint32_t color_write_masks = 0;
89 for (uint32_t i = 0; i < ri->color_attachment_count; i++) {
90 const VkPipelineColorBlendAttachmentState *b_state =
91 &cb_info->pAttachments[i];
92
93 const VkFormat vk_format = ri->color_attachment_formats[i];
94 if (vk_format == VK_FORMAT_UNDEFINED)
95 continue;
96
97 color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i);
98
99 if (!b_state->blendEnable)
100 continue;
101
102 const struct v3dv_format *format = v3dX(get_format)(vk_format);
103
104 /* We only do blending with render pass attachments, so we should not have
105 * multiplanar images here
106 */
107 assert(format->plane_count == 1);
108 bool dst_alpha_one = (format->planes[0].swizzle[3] == PIPE_SWIZZLE_1);
109
110 uint8_t rt_mask = 1 << i;
111 pipeline->blend.enables |= rt_mask;
112
113 v3dvx_pack(pipeline->blend.cfg[i], BLEND_CFG, config) {
114 config.render_target_mask = rt_mask;
115
116 config.color_blend_mode = b_state->colorBlendOp;
117 config.color_blend_dst_factor =
118 blend_factor(b_state->dstColorBlendFactor, dst_alpha_one,
119 &pipeline->blend.needs_color_constants);
120 config.color_blend_src_factor =
121 blend_factor(b_state->srcColorBlendFactor, dst_alpha_one,
122 &pipeline->blend.needs_color_constants);
123
124 config.alpha_blend_mode = b_state->alphaBlendOp;
125 config.alpha_blend_dst_factor =
126 blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one,
127 &pipeline->blend.needs_color_constants);
128 config.alpha_blend_src_factor =
129 blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one,
130 &pipeline->blend.needs_color_constants);
131 }
132 }
133
134 pipeline->blend.color_write_masks = color_write_masks;
135 }
136
137 /* This requires that pack_blend() had been called before so we can set
138 * the overall blend enable bit in the CFG_BITS packet.
139 */
140 static void
pack_cfg_bits(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info,const VkPipelineRasterizationStateCreateInfo * rs_info,const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT * pv_info,const VkPipelineRasterizationLineStateCreateInfoEXT * ls_info,const VkPipelineMultisampleStateCreateInfo * ms_info)141 pack_cfg_bits(struct v3dv_pipeline *pipeline,
142 const VkPipelineDepthStencilStateCreateInfo *ds_info,
143 const VkPipelineRasterizationStateCreateInfo *rs_info,
144 const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
145 const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info,
146 const VkPipelineMultisampleStateCreateInfo *ms_info)
147 {
148 assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS));
149
150 pipeline->msaa =
151 ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
152
153 v3dvx_pack(pipeline->cfg_bits, CFG_BITS, config) {
154 /* This is required to pass line rasterization tests in CTS while
155 * exposing, at least, a minimum of 4-bits of subpixel precision
156 * (the minimum requirement).
157 */
158 if (ls_info &&
159 ls_info->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT)
160 config.line_rasterization = V3D_LINE_RASTERIZATION_DIAMOND_EXIT;
161 else
162 config.line_rasterization = V3D_LINE_RASTERIZATION_PERP_END_CAPS;
163
164 if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) {
165 config.direct3d_wireframe_triangles_mode = true;
166 config.direct3d_point_fill_mode =
167 rs_info->polygonMode == VK_POLYGON_MODE_POINT;
168 }
169
170 /* diamond-exit rasterization does not support oversample */
171 config.rasterizer_oversample_mode =
172 (config.line_rasterization == V3D_LINE_RASTERIZATION_PERP_END_CAPS &&
173 pipeline->msaa) ? 1 : 0;
174
175 /* From the Vulkan spec:
176 *
177 * "Provoking Vertex:
178 *
179 * The vertex in a primitive from which flat shaded attribute
180 * values are taken. This is generally the “first” vertex in the
181 * primitive, and depends on the primitive topology."
182 *
183 * First vertex is the Direct3D style for provoking vertex. OpenGL uses
184 * the last vertex by default.
185 */
186 if (pv_info) {
187 config.direct3d_provoking_vertex =
188 pv_info->provokingVertexMode ==
189 VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT;
190 } else {
191 config.direct3d_provoking_vertex = true;
192 }
193
194 config.blend_enable = pipeline->blend.enables != 0;
195
196 #if V3D_VERSION >= 71
197 /* From the Vulkan spec:
198 *
199 * "depthClampEnable controls whether to clamp the fragment’s depth
200 * values as described in Depth Test. If the pipeline is not created
201 * with VkPipelineRasterizationDepthClipStateCreateInfoEXT present
202 * then enabling depth clamp will also disable clipping primitives to
203 * the z planes of the frustrum as described in Primitive Clipping.
204 * Otherwise depth clipping is controlled by the state set in
205 * VkPipelineRasterizationDepthClipStateCreateInfoEXT."
206 */
207 bool z_clamp_enable = rs_info && rs_info->depthClampEnable;
208 bool z_clip_enable = false;
209 const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
210 rs_info ? vk_find_struct_const(rs_info->pNext,
211 PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT) :
212 NULL;
213 if (clip_info)
214 z_clip_enable = clip_info->depthClipEnable;
215 else if (!z_clamp_enable)
216 z_clip_enable = true;
217
218 if (z_clip_enable) {
219 config.z_clipping_mode = pipeline->negative_one_to_one ?
220 V3D_Z_CLIP_MODE_MIN_ONE_TO_ONE : V3D_Z_CLIP_MODE_ZERO_TO_ONE;
221 } else {
222 config.z_clipping_mode = V3D_Z_CLIP_MODE_NONE;
223 }
224
225 config.z_clamp_mode = z_clamp_enable;
226 #endif
227 };
228 }
229
230 uint32_t
v3dX(translate_stencil_op)231 v3dX(translate_stencil_op)(VkStencilOp op)
232 {
233 switch (op) {
234 case VK_STENCIL_OP_KEEP:
235 return V3D_STENCIL_OP_KEEP;
236 case VK_STENCIL_OP_ZERO:
237 return V3D_STENCIL_OP_ZERO;
238 case VK_STENCIL_OP_REPLACE:
239 return V3D_STENCIL_OP_REPLACE;
240 case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
241 return V3D_STENCIL_OP_INCR;
242 case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
243 return V3D_STENCIL_OP_DECR;
244 case VK_STENCIL_OP_INVERT:
245 return V3D_STENCIL_OP_INVERT;
246 case VK_STENCIL_OP_INCREMENT_AND_WRAP:
247 return V3D_STENCIL_OP_INCWRAP;
248 case VK_STENCIL_OP_DECREMENT_AND_WRAP:
249 return V3D_STENCIL_OP_DECWRAP;
250 default:
251 unreachable("bad stencil op");
252 }
253 }
254
255 static void
pack_single_stencil_cfg(struct v3dv_pipeline * pipeline,uint8_t * stencil_cfg,bool is_front,bool is_back,const VkStencilOpState * stencil_state,const struct vk_graphics_pipeline_state * state)256 pack_single_stencil_cfg(struct v3dv_pipeline *pipeline,
257 uint8_t *stencil_cfg,
258 bool is_front,
259 bool is_back,
260 const VkStencilOpState *stencil_state,
261 const struct vk_graphics_pipeline_state *state)
262 {
263 /* From the Vulkan spec:
264 *
265 * "Reference is an integer reference value that is used in the unsigned
266 * stencil comparison. The reference value used by stencil comparison
267 * must be within the range [0,2^s-1] , where s is the number of bits in
268 * the stencil framebuffer attachment, otherwise the reference value is
269 * considered undefined."
270 *
271 * In our case, 's' is always 8, so we clamp to that to prevent our packing
272 * functions to assert in debug mode if they see larger values.
273 */
274 v3dvx_pack(stencil_cfg, STENCIL_CFG, config) {
275 config.front_config = is_front;
276 config.back_config = is_back;
277 config.stencil_write_mask = stencil_state->writeMask & 0xff;
278 config.stencil_test_mask = stencil_state->compareMask & 0xff;
279 config.stencil_test_function = stencil_state->compareOp;
280 config.stencil_pass_op =
281 v3dX(translate_stencil_op)(stencil_state->passOp);
282 config.depth_test_fail_op =
283 v3dX(translate_stencil_op)(stencil_state->depthFailOp);
284 config.stencil_test_fail_op =
285 v3dX(translate_stencil_op)(stencil_state->failOp);
286 config.stencil_ref_value = stencil_state->reference & 0xff;
287 }
288 }
289
290 static void
pack_stencil_cfg(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info,const struct vk_graphics_pipeline_state * state)291 pack_stencil_cfg(struct v3dv_pipeline *pipeline,
292 const VkPipelineDepthStencilStateCreateInfo *ds_info,
293 const struct vk_graphics_pipeline_state *state)
294 {
295 assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG));
296
297 if (!ds_info || !ds_info->stencilTestEnable)
298 return;
299
300 const struct vk_render_pass_state *ri = &pipeline->rendering_info;
301 if (ri->stencil_attachment_format == VK_FORMAT_UNDEFINED)
302 return;
303
304 const bool any_dynamic_stencil_states =
305 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
306 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
307 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) ||
308 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
309 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_OP);
310
311 /* If front != back or we have dynamic stencil state we can't emit a single
312 * packet for both faces.
313 */
314 bool needs_front_and_back = false;
315 if ((any_dynamic_stencil_states) ||
316 memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front))) {
317 needs_front_and_back = true;
318 }
319
320 /* If the front and back configurations are the same we can emit both with
321 * a single packet.
322 */
323 pipeline->emit_stencil_cfg[0] = true;
324 if (!needs_front_and_back) {
325 pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
326 true, true, &ds_info->front, state);
327 } else {
328 pipeline->emit_stencil_cfg[1] = true;
329 pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
330 true, false, &ds_info->front, state);
331 pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1],
332 false, true, &ds_info->back, state);
333 }
334 }
335
336
337 /* FIXME: Now that we are passing the vk_graphics_pipeline_state we could
338 * avoid passing all those parameters. But doing that we would need to change
339 * all the code that uses the VkXXX structures, and use instead the equivalent
340 * vk_xxx
341 */
342 void
v3dX(pipeline_pack_state)343 v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline,
344 const VkPipelineColorBlendStateCreateInfo *cb_info,
345 const VkPipelineDepthStencilStateCreateInfo *ds_info,
346 const VkPipelineRasterizationStateCreateInfo *rs_info,
347 const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
348 const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info,
349 const VkPipelineMultisampleStateCreateInfo *ms_info,
350 const struct vk_graphics_pipeline_state *state)
351 {
352 pack_blend(pipeline, cb_info);
353 pack_cfg_bits(pipeline, ds_info, rs_info, pv_info, ls_info, ms_info);
354 pack_stencil_cfg(pipeline, ds_info, state);
355 }
356
357 static void
pack_shader_state_record(struct v3dv_pipeline * pipeline)358 pack_shader_state_record(struct v3dv_pipeline *pipeline)
359 {
360 /* To siplify the code we ignore here GL_SHADER_STATE_RECORD_DRAW_INDEX
361 * used with 2712D0, since we know that has the same size as the regular
362 * version.
363 */
364 assert(sizeof(pipeline->shader_state_record) >=
365 cl_packet_length(GL_SHADER_STATE_RECORD));
366
367 struct v3d_fs_prog_data *prog_data_fs =
368 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs;
369
370 struct v3d_vs_prog_data *prog_data_vs =
371 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
372
373 struct v3d_vs_prog_data *prog_data_vs_bin =
374 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs;
375
376 bool point_size_in_shaded_vertex_data;
377 if (!pipeline->has_gs) {
378 struct v3d_vs_prog_data *prog_data_vs =
379 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
380 point_size_in_shaded_vertex_data = prog_data_vs->writes_psiz;
381 } else {
382 struct v3d_gs_prog_data *prog_data_gs =
383 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]->prog_data.gs;
384 point_size_in_shaded_vertex_data = prog_data_gs->writes_psiz;
385 }
386
387 /* Note: we are not packing addresses, as we need the job (see
388 * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this
389 * point as they depend on dynamic info that can be set after create the
390 * pipeline (like viewport), . Would need to be filled later, so we are
391 * doing a partial prepacking.
392 */
393 #if V3D_VERSION >= 71
394 /* 2712D0 (V3D 7.1.10) has included draw index and base vertex, shuffling all
395 * the fields in the packet. Since the versioning framework doesn't handle
396 * revision numbers, the XML has a different shader state record packet
397 * including the new fields and we device at run time which packet we need
398 * to emit.
399 */
400 if (v3d_device_has_draw_index(&pipeline->device->devinfo)) {
401 v3dvx_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD_DRAW_INDEX, shader) {
402 shader.enable_clipping = true;
403 shader.point_size_in_shaded_vertex_data = point_size_in_shaded_vertex_data;
404 shader.fragment_shader_does_z_writes = prog_data_fs->writes_z;
405 shader.turn_off_early_z_test = prog_data_fs->disable_ez;
406 shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
407 prog_data_fs->uses_center_w;
408 shader.enable_sample_rate_shading =
409 pipeline->sample_rate_shading ||
410 (pipeline->msaa && prog_data_fs->force_per_sample_msaa);
411 shader.any_shader_reads_hardware_written_primitive_id = false;
412 shader.do_scoreboard_wait_on_first_thread_switch =
413 prog_data_fs->lock_scoreboard_on_first_thrsw;
414 shader.disable_implicit_point_line_varyings =
415 !prog_data_fs->uses_implicit_point_line_varyings;
416 shader.number_of_varyings_in_fragment_shader = prog_data_fs->num_inputs;
417 shader.coordinate_shader_input_vpm_segment_size = prog_data_vs_bin->vpm_input_size;
418 shader.vertex_shader_input_vpm_segment_size = prog_data_vs->vpm_input_size;
419 shader.coordinate_shader_output_vpm_segment_size = prog_data_vs_bin->vpm_output_size;
420 shader.vertex_shader_output_vpm_segment_size = prog_data_vs->vpm_output_size;
421 shader.min_coord_shader_input_segments_required_in_play =
422 pipeline->vpm_cfg_bin.As;
423 shader.min_vertex_shader_input_segments_required_in_play =
424 pipeline->vpm_cfg.As;
425 shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
426 pipeline->vpm_cfg_bin.Ve;
427 shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
428 pipeline->vpm_cfg.Ve;
429 shader.coordinate_shader_4_way_threadable = prog_data_vs_bin->base.threads == 4;
430 shader.vertex_shader_4_way_threadable = prog_data_vs->base.threads == 4;
431 shader.fragment_shader_4_way_threadable = prog_data_fs->base.threads == 4;
432 shader.coordinate_shader_start_in_final_thread_section = prog_data_vs_bin->base.single_seg;
433 shader.vertex_shader_start_in_final_thread_section = prog_data_vs->base.single_seg;
434 shader.fragment_shader_start_in_final_thread_section = prog_data_fs->base.single_seg;
435 shader.vertex_id_read_by_coordinate_shader = prog_data_vs_bin->uses_vid;
436 shader.base_instance_id_read_by_coordinate_shader = prog_data_vs_bin->uses_biid;
437 shader.instance_id_read_by_coordinate_shader = prog_data_vs_bin->uses_iid;
438 shader.vertex_id_read_by_vertex_shader = prog_data_vs->uses_vid;
439 shader.base_instance_id_read_by_vertex_shader = prog_data_vs->uses_biid;
440 shader.instance_id_read_by_vertex_shader = prog_data_vs->uses_iid;
441 }
442 return;
443 }
444 #endif
445
446 v3dvx_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) {
447 shader.enable_clipping = true;
448 shader.point_size_in_shaded_vertex_data = point_size_in_shaded_vertex_data;
449
450 /* Must be set if the shader modifies Z, discards, or modifies
451 * the sample mask. For any of these cases, the fragment
452 * shader needs to write the Z value (even just discards).
453 */
454 shader.fragment_shader_does_z_writes = prog_data_fs->writes_z;
455
456 /* Set if the EZ test must be disabled (due to shader side
457 * effects and the early_z flag not being present in the
458 * shader).
459 */
460 shader.turn_off_early_z_test = prog_data_fs->disable_ez;
461
462 shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
463 prog_data_fs->uses_center_w;
464
465 /* The description for gl_SampleID states that if a fragment shader reads
466 * it, then we should automatically activate per-sample shading. However,
467 * the Vulkan spec also states that if a framebuffer has no attachments:
468 *
469 * "The subpass continues to use the width, height, and layers of the
470 * framebuffer to define the dimensions of the rendering area, and the
471 * rasterizationSamples from each pipeline’s
472 * VkPipelineMultisampleStateCreateInfo to define the number of
473 * samples used in rasterization multisample rasterization."
474 *
475 * So in this scenario, if the pipeline doesn't enable multiple samples
476 * but the fragment shader accesses gl_SampleID we would be requested
477 * to do per-sample shading in single sample rasterization mode, which
478 * is pointless, so just disable it in that case.
479 */
480 shader.enable_sample_rate_shading =
481 pipeline->sample_rate_shading ||
482 (pipeline->msaa && prog_data_fs->force_per_sample_msaa);
483
484 shader.any_shader_reads_hardware_written_primitive_id = false;
485
486 shader.do_scoreboard_wait_on_first_thread_switch =
487 prog_data_fs->lock_scoreboard_on_first_thrsw;
488 shader.disable_implicit_point_line_varyings =
489 !prog_data_fs->uses_implicit_point_line_varyings;
490
491 shader.number_of_varyings_in_fragment_shader =
492 prog_data_fs->num_inputs;
493
494 /* Note: see previous note about addresses */
495 /* shader.coordinate_shader_code_address */
496 /* shader.vertex_shader_code_address */
497 /* shader.fragment_shader_code_address */
498
499 #if V3D_VERSION == 42
500 shader.coordinate_shader_propagate_nans = true;
501 shader.vertex_shader_propagate_nans = true;
502 shader.fragment_shader_propagate_nans = true;
503
504 /* FIXME: Use combined input/output size flag in the common case (also
505 * on v3d, see v3dx_draw).
506 */
507 shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
508 prog_data_vs_bin->separate_segments;
509 shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
510 prog_data_vs->separate_segments;
511 shader.coordinate_shader_input_vpm_segment_size =
512 prog_data_vs_bin->separate_segments ?
513 prog_data_vs_bin->vpm_input_size : 1;
514 shader.vertex_shader_input_vpm_segment_size =
515 prog_data_vs->separate_segments ?
516 prog_data_vs->vpm_input_size : 1;
517 #endif
518
519 /* On V3D 7.1 there isn't a specific flag to set if we are using
520 * shared/separate segments or not. We just set the value of
521 * vpm_input_size to 0, and set output to the max needed. That should be
522 * already properly set on prog_data_vs_bin
523 */
524 #if V3D_VERSION == 71
525 shader.coordinate_shader_input_vpm_segment_size =
526 prog_data_vs_bin->vpm_input_size;
527 shader.vertex_shader_input_vpm_segment_size =
528 prog_data_vs->vpm_input_size;
529 #endif
530
531 shader.coordinate_shader_output_vpm_segment_size =
532 prog_data_vs_bin->vpm_output_size;
533 shader.vertex_shader_output_vpm_segment_size =
534 prog_data_vs->vpm_output_size;
535
536 /* Note: see previous note about addresses */
537 /* shader.coordinate_shader_uniforms_address */
538 /* shader.vertex_shader_uniforms_address */
539 /* shader.fragment_shader_uniforms_address */
540
541 shader.min_coord_shader_input_segments_required_in_play =
542 pipeline->vpm_cfg_bin.As;
543 shader.min_vertex_shader_input_segments_required_in_play =
544 pipeline->vpm_cfg.As;
545
546 shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
547 pipeline->vpm_cfg_bin.Ve;
548 shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
549 pipeline->vpm_cfg.Ve;
550
551 shader.coordinate_shader_4_way_threadable =
552 prog_data_vs_bin->base.threads == 4;
553 shader.vertex_shader_4_way_threadable =
554 prog_data_vs->base.threads == 4;
555 shader.fragment_shader_4_way_threadable =
556 prog_data_fs->base.threads == 4;
557
558 shader.coordinate_shader_start_in_final_thread_section =
559 prog_data_vs_bin->base.single_seg;
560 shader.vertex_shader_start_in_final_thread_section =
561 prog_data_vs->base.single_seg;
562 shader.fragment_shader_start_in_final_thread_section =
563 prog_data_fs->base.single_seg;
564
565 shader.vertex_id_read_by_coordinate_shader =
566 prog_data_vs_bin->uses_vid;
567 shader.base_instance_id_read_by_coordinate_shader =
568 prog_data_vs_bin->uses_biid;
569 shader.instance_id_read_by_coordinate_shader =
570 prog_data_vs_bin->uses_iid;
571 shader.vertex_id_read_by_vertex_shader =
572 prog_data_vs->uses_vid;
573 shader.base_instance_id_read_by_vertex_shader =
574 prog_data_vs->uses_biid;
575 shader.instance_id_read_by_vertex_shader =
576 prog_data_vs->uses_iid;
577
578 /* Note: see previous note about addresses */
579 /* shader.address_of_default_attribute_values */
580 }
581 }
582
583 static void
pack_vcm_cache_size(struct v3dv_pipeline * pipeline)584 pack_vcm_cache_size(struct v3dv_pipeline *pipeline)
585 {
586 assert(sizeof(pipeline->vcm_cache_size) ==
587 cl_packet_length(VCM_CACHE_SIZE));
588
589 v3dvx_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) {
590 vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc;
591 vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc;
592 }
593 }
594
595 /* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */
596 static uint8_t
get_attr_type(const struct util_format_description * desc)597 get_attr_type(const struct util_format_description *desc)
598 {
599 uint32_t r_size = desc->channel[0].size;
600 uint8_t attr_type = ATTRIBUTE_FLOAT;
601
602 switch (desc->channel[0].type) {
603 case UTIL_FORMAT_TYPE_FLOAT:
604 if (r_size == 32) {
605 attr_type = ATTRIBUTE_FLOAT;
606 } else {
607 assert(r_size == 16);
608 attr_type = ATTRIBUTE_HALF_FLOAT;
609 }
610 break;
611
612 case UTIL_FORMAT_TYPE_SIGNED:
613 case UTIL_FORMAT_TYPE_UNSIGNED:
614 switch (r_size) {
615 case 32:
616 attr_type = ATTRIBUTE_INT;
617 break;
618 case 16:
619 attr_type = ATTRIBUTE_SHORT;
620 break;
621 case 10:
622 attr_type = ATTRIBUTE_INT2_10_10_10;
623 break;
624 case 8:
625 attr_type = ATTRIBUTE_BYTE;
626 break;
627 default:
628 fprintf(stderr,
629 "format %s unsupported\n",
630 desc->name);
631 attr_type = ATTRIBUTE_BYTE;
632 abort();
633 }
634 break;
635
636 default:
637 fprintf(stderr,
638 "format %s unsupported\n",
639 desc->name);
640 abort();
641 }
642
643 return attr_type;
644 }
645
646 static void
pack_shader_state_attribute_record(struct v3dv_pipeline * pipeline,uint32_t index,const VkVertexInputAttributeDescription * vi_desc)647 pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline,
648 uint32_t index,
649 const VkVertexInputAttributeDescription *vi_desc)
650 {
651 const uint32_t packet_length =
652 cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
653
654 const struct util_format_description *desc =
655 vk_format_description(vi_desc->format);
656
657 uint32_t binding = vi_desc->binding;
658
659 v3dvx_pack(&pipeline->vertex_attrs[index * packet_length],
660 GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
661
662 /* vec_size == 0 means 4 */
663 attr.vec_size = desc->nr_channels & 3;
664 attr.signed_int_type = (desc->channel[0].type ==
665 UTIL_FORMAT_TYPE_SIGNED);
666 attr.normalized_int_type = desc->channel[0].normalized;
667 attr.read_as_int_uint = desc->channel[0].pure_integer;
668
669 attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor,
670 V3D_MAX_VERTEX_ATTRIB_DIVISOR);
671 attr.type = get_attr_type(desc);
672 }
673 }
674
675 void
v3dX(pipeline_pack_compile_state)676 v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
677 const VkPipelineVertexInputStateCreateInfo *vi_info,
678 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info)
679 {
680 pack_shader_state_record(pipeline);
681 pack_vcm_cache_size(pipeline);
682
683 pipeline->vb_count = vi_info->vertexBindingDescriptionCount;
684 for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
685 const VkVertexInputBindingDescription *desc =
686 &vi_info->pVertexBindingDescriptions[i];
687
688 pipeline->vb[desc->binding].instance_divisor = desc->inputRate;
689 }
690
691 if (vd_info) {
692 for (uint32_t i = 0; i < vd_info->vertexBindingDivisorCount; i++) {
693 const VkVertexInputBindingDivisorDescriptionEXT *desc =
694 &vd_info->pVertexBindingDivisors[i];
695
696 pipeline->vb[desc->binding].instance_divisor = desc->divisor;
697 }
698 }
699
700 pipeline->va_count = 0;
701 struct v3d_vs_prog_data *prog_data_vs =
702 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
703
704 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
705 const VkVertexInputAttributeDescription *desc =
706 &vi_info->pVertexAttributeDescriptions[i];
707 uint32_t location = desc->location + VERT_ATTRIB_GENERIC0;
708
709 /* We use a custom driver_location_map instead of
710 * nir_find_variable_with_location because if we were able to get the
711 * shader variant from the cache, we would not have the nir shader
712 * available.
713 */
714 uint32_t driver_location =
715 prog_data_vs->driver_location_map[location];
716
717 if (driver_location != -1) {
718 assert(driver_location < MAX_VERTEX_ATTRIBS);
719 pipeline->va[driver_location].offset = desc->offset;
720 pipeline->va[driver_location].binding = desc->binding;
721 pipeline->va[driver_location].vk_format = desc->format;
722
723 pack_shader_state_attribute_record(pipeline, driver_location, desc);
724
725 pipeline->va_count++;
726 }
727 }
728 }
729
730 #if V3D_VERSION == 42
731 static bool
pipeline_has_integer_vertex_attrib(struct v3dv_pipeline * pipeline)732 pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
733 {
734 for (uint8_t i = 0; i < pipeline->va_count; i++) {
735 if (vk_format_is_int(pipeline->va[i].vk_format))
736 return true;
737 }
738 return false;
739 }
740 #endif
741
742 bool
v3dX(pipeline_needs_default_attribute_values)743 v3dX(pipeline_needs_default_attribute_values)(struct v3dv_pipeline *pipeline)
744 {
745 #if V3D_VERSION == 42
746 return pipeline_has_integer_vertex_attrib(pipeline);
747 #endif
748
749 return false;
750 }
751
752 /* @pipeline can be NULL. In that case we assume the most common case. For
753 * example, for v42 we assume in that case that all the attributes have a
754 * float format (we only create an all-float BO once and we reuse it with all
755 * float pipelines), otherwise we look at the actual type of each attribute
756 * used with the specific pipeline passed in.
757 */
758 struct v3dv_bo *
v3dX(create_default_attribute_values)759 v3dX(create_default_attribute_values)(struct v3dv_device *device,
760 struct v3dv_pipeline *pipeline)
761 {
762 #if V3D_VERSION >= 71
763 return NULL;
764 #endif
765
766 uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
767 struct v3dv_bo *bo;
768
769 bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
770
771 if (!bo) {
772 fprintf(stderr, "failed to allocate memory for the default "
773 "attribute values\n");
774 return NULL;
775 }
776
777 bool ok = v3dv_bo_map(device, bo, size);
778 if (!ok) {
779 fprintf(stderr, "failed to map default attribute values buffer\n");
780 return NULL;
781 }
782
783 uint32_t *attrs = bo->map;
784 uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
785 for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
786 attrs[i * 4 + 0] = 0;
787 attrs[i * 4 + 1] = 0;
788 attrs[i * 4 + 2] = 0;
789 VkFormat attr_format =
790 pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
791 if (i < va_count && vk_format_is_int(attr_format)) {
792 attrs[i * 4 + 3] = 1;
793 } else {
794 attrs[i * 4 + 3] = fui(1.0);
795 }
796 }
797
798 v3dv_bo_unmap(device, bo);
799
800 return bo;
801 }
802