xref: /aosp_15_r20/external/mesa3d/src/broadcom/vulkan/v3dvx_pipeline.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "broadcom/common/v3d_macros.h"
26 #include "broadcom/cle/v3dx_pack.h"
27 #include "broadcom/compiler/v3d_compiler.h"
28 
29 static uint8_t
blend_factor(VkBlendFactor factor,bool dst_alpha_one,bool * needs_constants)30 blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants)
31 {
32    switch (factor) {
33    case VK_BLEND_FACTOR_ZERO:
34    case VK_BLEND_FACTOR_ONE:
35    case VK_BLEND_FACTOR_SRC_COLOR:
36    case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
37    case VK_BLEND_FACTOR_DST_COLOR:
38    case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
39    case VK_BLEND_FACTOR_SRC_ALPHA:
40    case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
41    case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
42       return factor;
43    case VK_BLEND_FACTOR_CONSTANT_COLOR:
44    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
45    case VK_BLEND_FACTOR_CONSTANT_ALPHA:
46    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
47       *needs_constants = true;
48       return factor;
49    case VK_BLEND_FACTOR_DST_ALPHA:
50       return dst_alpha_one ? V3D_BLEND_FACTOR_ONE :
51                              V3D_BLEND_FACTOR_DST_ALPHA;
52    case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
53       return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO :
54                              V3D_BLEND_FACTOR_INV_DST_ALPHA;
55    case VK_BLEND_FACTOR_SRC1_COLOR:
56    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
57    case VK_BLEND_FACTOR_SRC1_ALPHA:
58    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
59       unreachable("Invalid blend factor: dual source blending not supported.");
60    default:
61       unreachable("Unknown blend factor.");
62    }
63 }
64 
65 static void
pack_blend(struct v3dv_pipeline * pipeline,const VkPipelineColorBlendStateCreateInfo * cb_info)66 pack_blend(struct v3dv_pipeline *pipeline,
67            const VkPipelineColorBlendStateCreateInfo *cb_info)
68 {
69    /* By default, we are not enabling blending and all color channel writes are
70     * enabled. Color write enables are independent of whether blending is
71     * enabled or not.
72     *
73     * Vulkan specifies color write masks so that bits set correspond to
74     * enabled channels. Our hardware does it the other way around.
75     */
76    pipeline->blend.enables = 0;
77    pipeline->blend.color_write_masks = 0; /* All channels enabled */
78 
79    if (!cb_info)
80       return;
81 
82    const struct vk_render_pass_state *ri = &pipeline->rendering_info;
83    if (ri->color_attachment_count == 0)
84       return;
85 
86    assert(ri->color_attachment_count == cb_info->attachmentCount);
87    pipeline->blend.needs_color_constants = false;
88    uint32_t color_write_masks = 0;
89    for (uint32_t i = 0; i < ri->color_attachment_count; i++) {
90       const VkPipelineColorBlendAttachmentState *b_state =
91          &cb_info->pAttachments[i];
92 
93       const VkFormat vk_format = ri->color_attachment_formats[i];
94       if (vk_format == VK_FORMAT_UNDEFINED)
95          continue;
96 
97       color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i);
98 
99       if (!b_state->blendEnable)
100          continue;
101 
102       const struct v3dv_format *format = v3dX(get_format)(vk_format);
103 
104       /* We only do blending with render pass attachments, so we should not have
105        * multiplanar images here
106        */
107       assert(format->plane_count == 1);
108       bool dst_alpha_one = (format->planes[0].swizzle[3] == PIPE_SWIZZLE_1);
109 
110       uint8_t rt_mask = 1 << i;
111       pipeline->blend.enables |= rt_mask;
112 
113       v3dvx_pack(pipeline->blend.cfg[i], BLEND_CFG, config) {
114          config.render_target_mask = rt_mask;
115 
116          config.color_blend_mode = b_state->colorBlendOp;
117          config.color_blend_dst_factor =
118             blend_factor(b_state->dstColorBlendFactor, dst_alpha_one,
119                          &pipeline->blend.needs_color_constants);
120          config.color_blend_src_factor =
121             blend_factor(b_state->srcColorBlendFactor, dst_alpha_one,
122                          &pipeline->blend.needs_color_constants);
123 
124          config.alpha_blend_mode = b_state->alphaBlendOp;
125          config.alpha_blend_dst_factor =
126             blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one,
127                          &pipeline->blend.needs_color_constants);
128          config.alpha_blend_src_factor =
129             blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one,
130                          &pipeline->blend.needs_color_constants);
131       }
132    }
133 
134    pipeline->blend.color_write_masks = color_write_masks;
135 }
136 
137 /* This requires that pack_blend() had been called before so we can set
138  * the overall blend enable bit in the CFG_BITS packet.
139  */
140 static void
pack_cfg_bits(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info,const VkPipelineRasterizationStateCreateInfo * rs_info,const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT * pv_info,const VkPipelineRasterizationLineStateCreateInfoEXT * ls_info,const VkPipelineMultisampleStateCreateInfo * ms_info)141 pack_cfg_bits(struct v3dv_pipeline *pipeline,
142               const VkPipelineDepthStencilStateCreateInfo *ds_info,
143               const VkPipelineRasterizationStateCreateInfo *rs_info,
144               const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
145               const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info,
146               const VkPipelineMultisampleStateCreateInfo *ms_info)
147 {
148    assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS));
149 
150    pipeline->msaa =
151       ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
152 
153    v3dvx_pack(pipeline->cfg_bits, CFG_BITS, config) {
154       /* This is required to pass line rasterization tests in CTS while
155        * exposing, at least, a minimum of 4-bits of subpixel precision
156        * (the minimum requirement).
157        */
158       if (ls_info &&
159           ls_info->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT)
160          config.line_rasterization = V3D_LINE_RASTERIZATION_DIAMOND_EXIT;
161       else
162          config.line_rasterization = V3D_LINE_RASTERIZATION_PERP_END_CAPS;
163 
164       if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) {
165          config.direct3d_wireframe_triangles_mode = true;
166          config.direct3d_point_fill_mode =
167             rs_info->polygonMode == VK_POLYGON_MODE_POINT;
168       }
169 
170       /* diamond-exit rasterization does not support oversample */
171       config.rasterizer_oversample_mode =
172          (config.line_rasterization == V3D_LINE_RASTERIZATION_PERP_END_CAPS &&
173           pipeline->msaa) ? 1 : 0;
174 
175       /* From the Vulkan spec:
176        *
177        *   "Provoking Vertex:
178        *
179        *       The vertex in a primitive from which flat shaded attribute
180        *       values are taken. This is generally the “first” vertex in the
181        *       primitive, and depends on the primitive topology."
182        *
183        * First vertex is the Direct3D style for provoking vertex. OpenGL uses
184        * the last vertex by default.
185        */
186       if (pv_info) {
187          config.direct3d_provoking_vertex =
188             pv_info->provokingVertexMode ==
189                VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT;
190       } else {
191          config.direct3d_provoking_vertex = true;
192       }
193 
194       config.blend_enable = pipeline->blend.enables != 0;
195 
196 #if V3D_VERSION >= 71
197       /* From the Vulkan spec:
198        *
199        *    "depthClampEnable controls whether to clamp the fragment’s depth
200        *     values as described in Depth Test. If the pipeline is not created
201        *     with VkPipelineRasterizationDepthClipStateCreateInfoEXT present
202        *     then enabling depth clamp will also disable clipping primitives to
203        *     the z planes of the frustrum as described in Primitive Clipping.
204        *     Otherwise depth clipping is controlled by the state set in
205        *     VkPipelineRasterizationDepthClipStateCreateInfoEXT."
206        */
207       bool z_clamp_enable = rs_info && rs_info->depthClampEnable;
208       bool z_clip_enable = false;
209       const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
210          rs_info ? vk_find_struct_const(rs_info->pNext,
211                                         PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT) :
212                    NULL;
213       if (clip_info)
214          z_clip_enable = clip_info->depthClipEnable;
215       else if (!z_clamp_enable)
216          z_clip_enable = true;
217 
218       if (z_clip_enable) {
219          config.z_clipping_mode = pipeline->negative_one_to_one ?
220 	    V3D_Z_CLIP_MODE_MIN_ONE_TO_ONE : V3D_Z_CLIP_MODE_ZERO_TO_ONE;
221       } else {
222          config.z_clipping_mode = V3D_Z_CLIP_MODE_NONE;
223       }
224 
225       config.z_clamp_mode = z_clamp_enable;
226 #endif
227    };
228 }
229 
230 uint32_t
v3dX(translate_stencil_op)231 v3dX(translate_stencil_op)(VkStencilOp op)
232 {
233    switch (op) {
234    case VK_STENCIL_OP_KEEP:
235       return V3D_STENCIL_OP_KEEP;
236    case VK_STENCIL_OP_ZERO:
237       return V3D_STENCIL_OP_ZERO;
238    case VK_STENCIL_OP_REPLACE:
239       return V3D_STENCIL_OP_REPLACE;
240    case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
241       return V3D_STENCIL_OP_INCR;
242    case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
243       return V3D_STENCIL_OP_DECR;
244    case VK_STENCIL_OP_INVERT:
245       return V3D_STENCIL_OP_INVERT;
246    case VK_STENCIL_OP_INCREMENT_AND_WRAP:
247       return V3D_STENCIL_OP_INCWRAP;
248    case VK_STENCIL_OP_DECREMENT_AND_WRAP:
249       return V3D_STENCIL_OP_DECWRAP;
250    default:
251       unreachable("bad stencil op");
252    }
253 }
254 
255 static void
pack_single_stencil_cfg(struct v3dv_pipeline * pipeline,uint8_t * stencil_cfg,bool is_front,bool is_back,const VkStencilOpState * stencil_state,const struct vk_graphics_pipeline_state * state)256 pack_single_stencil_cfg(struct v3dv_pipeline *pipeline,
257                         uint8_t *stencil_cfg,
258                         bool is_front,
259                         bool is_back,
260                         const VkStencilOpState *stencil_state,
261                         const struct vk_graphics_pipeline_state *state)
262 {
263    /* From the Vulkan spec:
264     *
265     *   "Reference is an integer reference value that is used in the unsigned
266     *    stencil comparison. The reference value used by stencil comparison
267     *    must be within the range [0,2^s-1] , where s is the number of bits in
268     *    the stencil framebuffer attachment, otherwise the reference value is
269     *    considered undefined."
270     *
271     * In our case, 's' is always 8, so we clamp to that to prevent our packing
272     * functions to assert in debug mode if they see larger values.
273     */
274    v3dvx_pack(stencil_cfg, STENCIL_CFG, config) {
275       config.front_config = is_front;
276       config.back_config = is_back;
277       config.stencil_write_mask = stencil_state->writeMask & 0xff;
278       config.stencil_test_mask = stencil_state->compareMask & 0xff;
279       config.stencil_test_function = stencil_state->compareOp;
280       config.stencil_pass_op =
281          v3dX(translate_stencil_op)(stencil_state->passOp);
282       config.depth_test_fail_op =
283          v3dX(translate_stencil_op)(stencil_state->depthFailOp);
284       config.stencil_test_fail_op =
285          v3dX(translate_stencil_op)(stencil_state->failOp);
286       config.stencil_ref_value = stencil_state->reference & 0xff;
287    }
288 }
289 
290 static void
pack_stencil_cfg(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info,const struct vk_graphics_pipeline_state * state)291 pack_stencil_cfg(struct v3dv_pipeline *pipeline,
292                  const VkPipelineDepthStencilStateCreateInfo *ds_info,
293                  const struct vk_graphics_pipeline_state *state)
294 {
295    assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG));
296 
297    if (!ds_info || !ds_info->stencilTestEnable)
298       return;
299 
300    const struct vk_render_pass_state *ri = &pipeline->rendering_info;
301    if (ri->stencil_attachment_format == VK_FORMAT_UNDEFINED)
302       return;
303 
304    const bool any_dynamic_stencil_states =
305       BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
306       BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
307       BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) ||
308       BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
309       BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_OP);
310 
311    /* If front != back or we have dynamic stencil state we can't emit a single
312     * packet for both faces.
313     */
314    bool needs_front_and_back = false;
315    if ((any_dynamic_stencil_states) ||
316        memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front))) {
317       needs_front_and_back = true;
318    }
319 
320    /* If the front and back configurations are the same we can emit both with
321     * a single packet.
322     */
323    pipeline->emit_stencil_cfg[0] = true;
324    if (!needs_front_and_back) {
325       pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
326                               true, true, &ds_info->front, state);
327    } else {
328       pipeline->emit_stencil_cfg[1] = true;
329       pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
330                               true, false, &ds_info->front, state);
331       pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1],
332                               false, true, &ds_info->back, state);
333    }
334 }
335 
336 
337 /* FIXME: Now that we are passing the vk_graphics_pipeline_state we could
338  * avoid passing all those parameters. But doing that we would need to change
339  * all the code that uses the VkXXX structures, and use instead the equivalent
340  * vk_xxx
341  */
342 void
v3dX(pipeline_pack_state)343 v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline,
344                           const VkPipelineColorBlendStateCreateInfo *cb_info,
345                           const VkPipelineDepthStencilStateCreateInfo *ds_info,
346                           const VkPipelineRasterizationStateCreateInfo *rs_info,
347                           const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
348                           const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info,
349                           const VkPipelineMultisampleStateCreateInfo *ms_info,
350                           const struct vk_graphics_pipeline_state *state)
351 {
352    pack_blend(pipeline, cb_info);
353    pack_cfg_bits(pipeline, ds_info, rs_info, pv_info, ls_info, ms_info);
354    pack_stencil_cfg(pipeline, ds_info, state);
355 }
356 
357 static void
pack_shader_state_record(struct v3dv_pipeline * pipeline)358 pack_shader_state_record(struct v3dv_pipeline *pipeline)
359 {
360    /* To siplify the code we ignore here GL_SHADER_STATE_RECORD_DRAW_INDEX
361     * used with 2712D0, since we know that has the same size as the regular
362     * version.
363     */
364    assert(sizeof(pipeline->shader_state_record) >=
365           cl_packet_length(GL_SHADER_STATE_RECORD));
366 
367    struct v3d_fs_prog_data *prog_data_fs =
368       pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs;
369 
370    struct v3d_vs_prog_data *prog_data_vs =
371       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
372 
373    struct v3d_vs_prog_data *prog_data_vs_bin =
374       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs;
375 
376    bool point_size_in_shaded_vertex_data;
377    if (!pipeline->has_gs) {
378       struct v3d_vs_prog_data *prog_data_vs =
379          pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
380          point_size_in_shaded_vertex_data = prog_data_vs->writes_psiz;
381    } else {
382       struct v3d_gs_prog_data *prog_data_gs =
383          pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]->prog_data.gs;
384          point_size_in_shaded_vertex_data = prog_data_gs->writes_psiz;
385    }
386 
387    /* Note: we are not packing addresses, as we need the job (see
388     * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this
389     * point as they depend on dynamic info that can be set after create the
390     * pipeline (like viewport), . Would need to be filled later, so we are
391     * doing a partial prepacking.
392     */
393 #if V3D_VERSION >= 71
394    /* 2712D0 (V3D 7.1.10) has included draw index and base vertex, shuffling all
395     * the fields in the packet. Since the versioning framework doesn't handle
396     * revision numbers, the XML has a different shader state record packet
397     * including the new fields and we device at run time which packet we need
398     * to emit.
399     */
400    if (v3d_device_has_draw_index(&pipeline->device->devinfo)) {
401       v3dvx_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD_DRAW_INDEX, shader) {
402          shader.enable_clipping = true;
403          shader.point_size_in_shaded_vertex_data = point_size_in_shaded_vertex_data;
404          shader.fragment_shader_does_z_writes = prog_data_fs->writes_z;
405          shader.turn_off_early_z_test = prog_data_fs->disable_ez;
406          shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
407             prog_data_fs->uses_center_w;
408          shader.enable_sample_rate_shading =
409             pipeline->sample_rate_shading ||
410             (pipeline->msaa && prog_data_fs->force_per_sample_msaa);
411          shader.any_shader_reads_hardware_written_primitive_id = false;
412          shader.do_scoreboard_wait_on_first_thread_switch =
413             prog_data_fs->lock_scoreboard_on_first_thrsw;
414          shader.disable_implicit_point_line_varyings =
415             !prog_data_fs->uses_implicit_point_line_varyings;
416          shader.number_of_varyings_in_fragment_shader = prog_data_fs->num_inputs;
417          shader.coordinate_shader_input_vpm_segment_size = prog_data_vs_bin->vpm_input_size;
418          shader.vertex_shader_input_vpm_segment_size = prog_data_vs->vpm_input_size;
419          shader.coordinate_shader_output_vpm_segment_size = prog_data_vs_bin->vpm_output_size;
420          shader.vertex_shader_output_vpm_segment_size = prog_data_vs->vpm_output_size;
421          shader.min_coord_shader_input_segments_required_in_play =
422             pipeline->vpm_cfg_bin.As;
423          shader.min_vertex_shader_input_segments_required_in_play =
424             pipeline->vpm_cfg.As;
425          shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
426             pipeline->vpm_cfg_bin.Ve;
427          shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
428             pipeline->vpm_cfg.Ve;
429          shader.coordinate_shader_4_way_threadable = prog_data_vs_bin->base.threads == 4;
430          shader.vertex_shader_4_way_threadable = prog_data_vs->base.threads == 4;
431          shader.fragment_shader_4_way_threadable = prog_data_fs->base.threads == 4;
432          shader.coordinate_shader_start_in_final_thread_section = prog_data_vs_bin->base.single_seg;
433          shader.vertex_shader_start_in_final_thread_section = prog_data_vs->base.single_seg;
434          shader.fragment_shader_start_in_final_thread_section = prog_data_fs->base.single_seg;
435          shader.vertex_id_read_by_coordinate_shader = prog_data_vs_bin->uses_vid;
436          shader.base_instance_id_read_by_coordinate_shader = prog_data_vs_bin->uses_biid;
437          shader.instance_id_read_by_coordinate_shader = prog_data_vs_bin->uses_iid;
438          shader.vertex_id_read_by_vertex_shader = prog_data_vs->uses_vid;
439          shader.base_instance_id_read_by_vertex_shader = prog_data_vs->uses_biid;
440          shader.instance_id_read_by_vertex_shader = prog_data_vs->uses_iid;
441       }
442       return;
443    }
444 #endif
445 
446    v3dvx_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) {
447       shader.enable_clipping = true;
448       shader.point_size_in_shaded_vertex_data = point_size_in_shaded_vertex_data;
449 
450       /* Must be set if the shader modifies Z, discards, or modifies
451        * the sample mask.  For any of these cases, the fragment
452        * shader needs to write the Z value (even just discards).
453        */
454       shader.fragment_shader_does_z_writes = prog_data_fs->writes_z;
455 
456       /* Set if the EZ test must be disabled (due to shader side
457        * effects and the early_z flag not being present in the
458        * shader).
459        */
460       shader.turn_off_early_z_test = prog_data_fs->disable_ez;
461 
462       shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
463          prog_data_fs->uses_center_w;
464 
465       /* The description for gl_SampleID states that if a fragment shader reads
466        * it, then we should automatically activate per-sample shading. However,
467        * the Vulkan spec also states that if a framebuffer has no attachments:
468        *
469        *    "The subpass continues to use the width, height, and layers of the
470        *     framebuffer to define the dimensions of the rendering area, and the
471        *     rasterizationSamples from each pipeline’s
472        *     VkPipelineMultisampleStateCreateInfo to define the number of
473        *     samples used in rasterization multisample rasterization."
474        *
475        * So in this scenario, if the pipeline doesn't enable multiple samples
476        * but the fragment shader accesses gl_SampleID we would be requested
477        * to do per-sample shading in single sample rasterization mode, which
478        * is pointless, so just disable it in that case.
479        */
480       shader.enable_sample_rate_shading =
481          pipeline->sample_rate_shading ||
482          (pipeline->msaa && prog_data_fs->force_per_sample_msaa);
483 
484       shader.any_shader_reads_hardware_written_primitive_id = false;
485 
486       shader.do_scoreboard_wait_on_first_thread_switch =
487          prog_data_fs->lock_scoreboard_on_first_thrsw;
488       shader.disable_implicit_point_line_varyings =
489          !prog_data_fs->uses_implicit_point_line_varyings;
490 
491       shader.number_of_varyings_in_fragment_shader =
492          prog_data_fs->num_inputs;
493 
494       /* Note: see previous note about addresses */
495       /* shader.coordinate_shader_code_address */
496       /* shader.vertex_shader_code_address */
497       /* shader.fragment_shader_code_address */
498 
499 #if V3D_VERSION == 42
500       shader.coordinate_shader_propagate_nans = true;
501       shader.vertex_shader_propagate_nans = true;
502       shader.fragment_shader_propagate_nans = true;
503 
504       /* FIXME: Use combined input/output size flag in the common case (also
505        * on v3d, see v3dx_draw).
506        */
507       shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
508          prog_data_vs_bin->separate_segments;
509       shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
510          prog_data_vs->separate_segments;
511       shader.coordinate_shader_input_vpm_segment_size =
512          prog_data_vs_bin->separate_segments ?
513          prog_data_vs_bin->vpm_input_size : 1;
514       shader.vertex_shader_input_vpm_segment_size =
515          prog_data_vs->separate_segments ?
516          prog_data_vs->vpm_input_size : 1;
517 #endif
518 
519       /* On V3D 7.1 there isn't a specific flag to set if we are using
520        * shared/separate segments or not. We just set the value of
521        * vpm_input_size to 0, and set output to the max needed. That should be
522        * already properly set on prog_data_vs_bin
523        */
524 #if V3D_VERSION == 71
525       shader.coordinate_shader_input_vpm_segment_size =
526          prog_data_vs_bin->vpm_input_size;
527       shader.vertex_shader_input_vpm_segment_size =
528          prog_data_vs->vpm_input_size;
529 #endif
530 
531       shader.coordinate_shader_output_vpm_segment_size =
532          prog_data_vs_bin->vpm_output_size;
533       shader.vertex_shader_output_vpm_segment_size =
534          prog_data_vs->vpm_output_size;
535 
536       /* Note: see previous note about addresses */
537       /* shader.coordinate_shader_uniforms_address */
538       /* shader.vertex_shader_uniforms_address */
539       /* shader.fragment_shader_uniforms_address */
540 
541       shader.min_coord_shader_input_segments_required_in_play =
542          pipeline->vpm_cfg_bin.As;
543       shader.min_vertex_shader_input_segments_required_in_play =
544          pipeline->vpm_cfg.As;
545 
546       shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
547          pipeline->vpm_cfg_bin.Ve;
548       shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
549          pipeline->vpm_cfg.Ve;
550 
551       shader.coordinate_shader_4_way_threadable =
552          prog_data_vs_bin->base.threads == 4;
553       shader.vertex_shader_4_way_threadable =
554          prog_data_vs->base.threads == 4;
555       shader.fragment_shader_4_way_threadable =
556          prog_data_fs->base.threads == 4;
557 
558       shader.coordinate_shader_start_in_final_thread_section =
559          prog_data_vs_bin->base.single_seg;
560       shader.vertex_shader_start_in_final_thread_section =
561          prog_data_vs->base.single_seg;
562       shader.fragment_shader_start_in_final_thread_section =
563          prog_data_fs->base.single_seg;
564 
565       shader.vertex_id_read_by_coordinate_shader =
566          prog_data_vs_bin->uses_vid;
567       shader.base_instance_id_read_by_coordinate_shader =
568          prog_data_vs_bin->uses_biid;
569       shader.instance_id_read_by_coordinate_shader =
570          prog_data_vs_bin->uses_iid;
571       shader.vertex_id_read_by_vertex_shader =
572          prog_data_vs->uses_vid;
573       shader.base_instance_id_read_by_vertex_shader =
574          prog_data_vs->uses_biid;
575       shader.instance_id_read_by_vertex_shader =
576          prog_data_vs->uses_iid;
577 
578       /* Note: see previous note about addresses */
579       /* shader.address_of_default_attribute_values */
580    }
581 }
582 
583 static void
pack_vcm_cache_size(struct v3dv_pipeline * pipeline)584 pack_vcm_cache_size(struct v3dv_pipeline *pipeline)
585 {
586    assert(sizeof(pipeline->vcm_cache_size) ==
587           cl_packet_length(VCM_CACHE_SIZE));
588 
589    v3dvx_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) {
590       vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc;
591       vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc;
592    }
593 }
594 
595 /* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */
596 static uint8_t
get_attr_type(const struct util_format_description * desc)597 get_attr_type(const struct util_format_description *desc)
598 {
599    uint32_t r_size = desc->channel[0].size;
600    uint8_t attr_type = ATTRIBUTE_FLOAT;
601 
602    switch (desc->channel[0].type) {
603    case UTIL_FORMAT_TYPE_FLOAT:
604       if (r_size == 32) {
605          attr_type = ATTRIBUTE_FLOAT;
606       } else {
607          assert(r_size == 16);
608          attr_type = ATTRIBUTE_HALF_FLOAT;
609       }
610       break;
611 
612    case UTIL_FORMAT_TYPE_SIGNED:
613    case UTIL_FORMAT_TYPE_UNSIGNED:
614       switch (r_size) {
615       case 32:
616          attr_type = ATTRIBUTE_INT;
617          break;
618       case 16:
619          attr_type = ATTRIBUTE_SHORT;
620          break;
621       case 10:
622          attr_type = ATTRIBUTE_INT2_10_10_10;
623          break;
624       case 8:
625          attr_type = ATTRIBUTE_BYTE;
626          break;
627       default:
628          fprintf(stderr,
629                  "format %s unsupported\n",
630                  desc->name);
631          attr_type = ATTRIBUTE_BYTE;
632          abort();
633       }
634       break;
635 
636    default:
637       fprintf(stderr,
638               "format %s unsupported\n",
639               desc->name);
640       abort();
641    }
642 
643    return attr_type;
644 }
645 
646 static void
pack_shader_state_attribute_record(struct v3dv_pipeline * pipeline,uint32_t index,const VkVertexInputAttributeDescription * vi_desc)647 pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline,
648                                    uint32_t index,
649                                    const VkVertexInputAttributeDescription *vi_desc)
650 {
651    const uint32_t packet_length =
652       cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
653 
654    const struct util_format_description *desc =
655       vk_format_description(vi_desc->format);
656 
657    uint32_t binding = vi_desc->binding;
658 
659    v3dvx_pack(&pipeline->vertex_attrs[index * packet_length],
660              GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
661 
662       /* vec_size == 0 means 4 */
663       attr.vec_size = desc->nr_channels & 3;
664       attr.signed_int_type = (desc->channel[0].type ==
665                               UTIL_FORMAT_TYPE_SIGNED);
666       attr.normalized_int_type = desc->channel[0].normalized;
667       attr.read_as_int_uint = desc->channel[0].pure_integer;
668 
669       attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor,
670                                    V3D_MAX_VERTEX_ATTRIB_DIVISOR);
671       attr.type = get_attr_type(desc);
672    }
673 }
674 
675 void
v3dX(pipeline_pack_compile_state)676 v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
677                                   const VkPipelineVertexInputStateCreateInfo *vi_info,
678                                   const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info)
679 {
680    pack_shader_state_record(pipeline);
681    pack_vcm_cache_size(pipeline);
682 
683    pipeline->vb_count = vi_info->vertexBindingDescriptionCount;
684    for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
685       const VkVertexInputBindingDescription *desc =
686          &vi_info->pVertexBindingDescriptions[i];
687 
688       pipeline->vb[desc->binding].instance_divisor = desc->inputRate;
689    }
690 
691    if (vd_info) {
692       for (uint32_t i = 0; i < vd_info->vertexBindingDivisorCount; i++) {
693          const VkVertexInputBindingDivisorDescriptionEXT *desc =
694             &vd_info->pVertexBindingDivisors[i];
695 
696          pipeline->vb[desc->binding].instance_divisor = desc->divisor;
697       }
698    }
699 
700    pipeline->va_count = 0;
701    struct v3d_vs_prog_data *prog_data_vs =
702       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
703 
704    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
705       const VkVertexInputAttributeDescription *desc =
706          &vi_info->pVertexAttributeDescriptions[i];
707       uint32_t location = desc->location + VERT_ATTRIB_GENERIC0;
708 
709       /* We use a custom driver_location_map instead of
710        * nir_find_variable_with_location because if we were able to get the
711        * shader variant from the cache, we would not have the nir shader
712        * available.
713        */
714       uint32_t driver_location =
715          prog_data_vs->driver_location_map[location];
716 
717       if (driver_location != -1) {
718          assert(driver_location < MAX_VERTEX_ATTRIBS);
719          pipeline->va[driver_location].offset = desc->offset;
720          pipeline->va[driver_location].binding = desc->binding;
721          pipeline->va[driver_location].vk_format = desc->format;
722 
723          pack_shader_state_attribute_record(pipeline, driver_location, desc);
724 
725          pipeline->va_count++;
726       }
727    }
728 }
729 
730 #if V3D_VERSION == 42
731 static bool
pipeline_has_integer_vertex_attrib(struct v3dv_pipeline * pipeline)732 pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
733 {
734    for (uint8_t i = 0; i < pipeline->va_count; i++) {
735       if (vk_format_is_int(pipeline->va[i].vk_format))
736          return true;
737    }
738    return false;
739 }
740 #endif
741 
742 bool
v3dX(pipeline_needs_default_attribute_values)743 v3dX(pipeline_needs_default_attribute_values)(struct v3dv_pipeline *pipeline)
744 {
745 #if V3D_VERSION == 42
746    return pipeline_has_integer_vertex_attrib(pipeline);
747 #endif
748 
749    return false;
750 }
751 
752 /* @pipeline can be NULL. In that case we assume the most common case. For
753  * example, for v42 we assume in that case that all the attributes have a
754  * float format (we only create an all-float BO once and we reuse it with all
755  * float pipelines), otherwise we look at the actual type of each attribute
756  * used with the specific pipeline passed in.
757  */
758 struct v3dv_bo *
v3dX(create_default_attribute_values)759 v3dX(create_default_attribute_values)(struct v3dv_device *device,
760                                       struct v3dv_pipeline *pipeline)
761 {
762 #if V3D_VERSION >= 71
763    return NULL;
764 #endif
765 
766    uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
767    struct v3dv_bo *bo;
768 
769    bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
770 
771    if (!bo) {
772       fprintf(stderr, "failed to allocate memory for the default "
773               "attribute values\n");
774       return NULL;
775    }
776 
777    bool ok = v3dv_bo_map(device, bo, size);
778    if (!ok) {
779       fprintf(stderr, "failed to map default attribute values buffer\n");
780       return NULL;
781    }
782 
783    uint32_t *attrs = bo->map;
784    uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
785    for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
786       attrs[i * 4 + 0] = 0;
787       attrs[i * 4 + 1] = 0;
788       attrs[i * 4 + 2] = 0;
789       VkFormat attr_format =
790          pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
791       if (i < va_count && vk_format_is_int(attr_format)) {
792          attrs[i * 4 + 3] = 1;
793       } else {
794          attrs[i * 4 + 3] = fui(1.0);
795       }
796    }
797 
798    v3dv_bo_unmap(device, bo);
799 
800    return bo;
801 }
802