xref: /aosp_15_r20/external/mesa3d/src/microsoft/vulkan/dzn_meta.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "dzn_private.h"
25 
26 #include "spirv_to_dxil.h"
27 #include "nir_to_dxil.h"
28 
29 #include "dxil_nir.h"
30 #include "dxil_nir_lower_int_samplers.h"
31 #include "dxil_validator.h"
32 
33 static void
dzn_meta_compile_shader(struct dzn_device * device,nir_shader * nir,D3D12_SHADER_BYTECODE * slot)34 dzn_meta_compile_shader(struct dzn_device *device, nir_shader *nir,
35                         D3D12_SHADER_BYTECODE *slot)
36 {
37    struct dzn_instance *instance =
38       container_of(device->vk.physical->instance, struct dzn_instance, vk);
39    struct dzn_physical_device *pdev =
40       container_of(device->vk.physical, struct dzn_physical_device, vk);
41 
42    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
43 
44    if ((instance->debug_flags & DZN_DEBUG_NIR) &&
45        (instance->debug_flags & DZN_DEBUG_INTERNAL))
46       nir_print_shader(nir, stderr);
47 
48    struct nir_to_dxil_options opts = {
49       .environment = DXIL_ENVIRONMENT_VULKAN,
50       .shader_model_max = dzn_get_shader_model(pdev),
51 #ifdef _WIN32
52       .validator_version_max = dxil_get_validator_version(instance->dxil_validator),
53 #endif
54    };
55    struct blob dxil_blob;
56    ASSERTED bool ret = nir_to_dxil(nir, &opts, NULL, &dxil_blob);
57    assert(ret);
58 
59 #ifdef _WIN32
60    char *err = NULL;
61    bool res = dxil_validate_module(instance->dxil_validator,
62                                    dxil_blob.data,
63                                    dxil_blob.size, &err);
64 
65    if ((instance->debug_flags & DZN_DEBUG_DXIL) &&
66        (instance->debug_flags & DZN_DEBUG_INTERNAL)) {
67       char *disasm = dxil_disasm_module(instance->dxil_validator,
68                                         dxil_blob.data,
69                                         dxil_blob.size);
70       if (disasm) {
71          fprintf(stderr,
72                  "== BEGIN SHADER ============================================\n"
73                  "%s\n"
74                  "== END SHADER ==============================================\n",
75                   disasm);
76          ralloc_free(disasm);
77       }
78    }
79 
80    if ((instance->debug_flags & DZN_DEBUG_DXIL) &&
81        (instance->debug_flags & DZN_DEBUG_INTERNAL) &&
82        !res && !(instance->debug_flags & DZN_DEBUG_EXPERIMENTAL)) {
83       fprintf(stderr,
84             "== VALIDATION ERROR =============================================\n"
85             "%s\n"
86             "== END ==========================================================\n",
87             err ? err : "unknown");
88       ralloc_free(err);
89    }
90    assert(res || (instance->debug_flags & DZN_DEBUG_EXPERIMENTAL));
91 #endif
92 
93    void *data;
94    size_t size;
95    blob_finish_get_buffer(&dxil_blob, &data, &size);
96    slot->pShaderBytecode = data;
97    slot->BytecodeLength = size;
98 }
99 
100 #define DZN_META_INDIRECT_DRAW_MAX_PARAM_COUNT 5
101 
102 static void
dzn_meta_indirect_draw_finish(struct dzn_device * device,struct dzn_indirect_draw_type type)103 dzn_meta_indirect_draw_finish(struct dzn_device *device, struct dzn_indirect_draw_type type)
104 {
105    struct dzn_meta_indirect_draw *meta = &device->indirect_draws[type.value];
106 
107    if (meta->root_sig)
108       ID3D12RootSignature_Release(meta->root_sig);
109 
110    if (meta->pipeline_state)
111       ID3D12PipelineState_Release(meta->pipeline_state);
112 }
113 
114 static VkResult
dzn_meta_indirect_draw_init(struct dzn_device * device,struct dzn_indirect_draw_type type)115 dzn_meta_indirect_draw_init(struct dzn_device *device,
116                             struct dzn_indirect_draw_type type)
117 {
118    struct dzn_meta_indirect_draw *meta = &device->indirect_draws[type.value];
119    struct dzn_instance *instance =
120       container_of(device->vk.physical->instance, struct dzn_instance, vk);
121    VkResult ret = VK_SUCCESS;
122 
123    glsl_type_singleton_init_or_ref();
124 
125    nir_shader *nir = dzn_nir_indirect_draw_shader(type);
126    uint32_t shader_params_size =
127       type.triangle_fan_primitive_restart ?
128       sizeof(struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params) :
129       type.triangle_fan ?
130       sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params) :
131       sizeof(struct dzn_indirect_draw_rewrite_params);
132 
133    uint32_t root_param_count = 0;
134    D3D12_ROOT_PARAMETER1 root_params[DZN_META_INDIRECT_DRAW_MAX_PARAM_COUNT];
135 
136    root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
137       .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
138       .Constants = {
139          .ShaderRegister = 0,
140          .RegisterSpace = 0,
141          .Num32BitValues = shader_params_size / 4,
142       },
143       .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
144    };
145 
146    root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
147       .ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV,
148       .Descriptor = {
149          .ShaderRegister = 1,
150          .RegisterSpace = 0,
151          .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
152       },
153       .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
154    };
155 
156    root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
157       .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV,
158       .Descriptor = {
159          .ShaderRegister = 2,
160          .RegisterSpace = 0,
161          .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
162       },
163       .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
164    };
165 
166    if (type.indirect_count) {
167       root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
168          .ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV,
169          .Descriptor = {
170             .ShaderRegister = 3,
171             .RegisterSpace = 0,
172             .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
173          },
174          .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
175       };
176    }
177 
178 
179    if (type.triangle_fan) {
180       root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
181          .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV,
182          .Descriptor = {
183             .ShaderRegister = 4,
184             .RegisterSpace = 0,
185             .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
186          },
187          .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
188       };
189    }
190 
191    assert(root_param_count <= ARRAY_SIZE(root_params));
192 
193    D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {
194       .Version = D3D_ROOT_SIGNATURE_VERSION_1_1,
195       .Desc_1_1 = {
196          .NumParameters = root_param_count,
197          .pParameters = root_params,
198          .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE,
199       },
200    };
201 
202    D3D12_COMPUTE_PIPELINE_STATE_DESC desc = {
203       .Flags = D3D12_PIPELINE_STATE_FLAG_NONE,
204    };
205 
206    meta->root_sig =
207       dzn_device_create_root_sig(device, &root_sig_desc);
208    if (!meta->root_sig) {
209       ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
210       goto out;
211    }
212 
213    desc.pRootSignature = meta->root_sig;
214    dzn_meta_compile_shader(device, nir, &desc.CS);
215    assert(desc.CS.pShaderBytecode);
216 
217    if (FAILED(ID3D12Device1_CreateComputePipelineState(device->dev, &desc,
218                                                        &IID_ID3D12PipelineState,
219                                                        (void **)&meta->pipeline_state)))
220       ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
221 
222 out:
223    if (ret != VK_SUCCESS)
224       dzn_meta_indirect_draw_finish(device, type);
225 
226    free((void *)desc.CS.pShaderBytecode);
227    ralloc_free(nir);
228    glsl_type_singleton_decref();
229 
230    return ret;
231 }
232 
233 #define DZN_META_TRIANGLE_FAN_REWRITE_IDX_MAX_PARAM_COUNT 4
234 
235 static void
dzn_meta_triangle_fan_rewrite_index_finish(struct dzn_device * device,enum dzn_index_type old_index_type)236 dzn_meta_triangle_fan_rewrite_index_finish(struct dzn_device *device,
237                                            enum dzn_index_type old_index_type)
238 {
239    struct dzn_meta_triangle_fan_rewrite_index *meta =
240       &device->triangle_fan[old_index_type];
241 
242    if (meta->root_sig)
243       ID3D12RootSignature_Release(meta->root_sig);
244    if (meta->pipeline_state)
245       ID3D12PipelineState_Release(meta->pipeline_state);
246    if (meta->cmd_sig)
247       ID3D12CommandSignature_Release(meta->cmd_sig);
248 }
249 
250 static VkResult
dzn_meta_triangle_fan_rewrite_index_init(struct dzn_device * device,enum dzn_index_type old_index_type)251 dzn_meta_triangle_fan_rewrite_index_init(struct dzn_device *device,
252                                          enum dzn_index_type old_index_type)
253 {
254    struct dzn_meta_triangle_fan_rewrite_index *meta =
255       &device->triangle_fan[old_index_type];
256    struct dzn_instance *instance =
257       container_of(device->vk.physical->instance, struct dzn_instance, vk);
258    VkResult ret = VK_SUCCESS;
259 
260    glsl_type_singleton_init_or_ref();
261 
262    uint8_t old_index_size = dzn_index_size(old_index_type);
263    bool prim_restart =
264       old_index_type == DZN_INDEX_2B_WITH_PRIM_RESTART ||
265       old_index_type == DZN_INDEX_4B_WITH_PRIM_RESTART;
266 
267    nir_shader *nir =
268       prim_restart ?
269       dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(old_index_size) :
270       dzn_nir_triangle_fan_rewrite_index_shader(old_index_size);
271 
272    uint32_t root_param_count = 0;
273    D3D12_ROOT_PARAMETER1 root_params[DZN_META_TRIANGLE_FAN_REWRITE_IDX_MAX_PARAM_COUNT];
274 
275    root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
276       .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV,
277       .Descriptor = {
278          .ShaderRegister = 1,
279          .RegisterSpace = 0,
280          .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
281       },
282       .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
283    };
284 
285    uint32_t params_size =
286       prim_restart ?
287       sizeof(struct dzn_triangle_fan_prim_restart_rewrite_index_params) :
288       sizeof(struct dzn_triangle_fan_rewrite_index_params);
289 
290    root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
291       .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
292       .Constants = {
293          .ShaderRegister = 0,
294          .RegisterSpace = 0,
295          .Num32BitValues = params_size / 4,
296       },
297       .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
298    };
299 
300    if (old_index_type != DZN_NO_INDEX) {
301       root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
302          .ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV,
303          .Descriptor = {
304             .ShaderRegister = 2,
305             .RegisterSpace = 0,
306             .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
307          },
308          .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
309       };
310    }
311 
312    if (prim_restart) {
313       root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
314          .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV,
315          .Descriptor = {
316             .ShaderRegister = 3,
317             .RegisterSpace = 0,
318             .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
319          },
320          .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
321       };
322    }
323 
324    assert(root_param_count <= ARRAY_SIZE(root_params));
325 
326    D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {
327       .Version = D3D_ROOT_SIGNATURE_VERSION_1_1,
328       .Desc_1_1 = {
329          .NumParameters = root_param_count,
330          .pParameters = root_params,
331          .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE,
332       },
333    };
334 
335    D3D12_COMPUTE_PIPELINE_STATE_DESC desc = {
336       .Flags = D3D12_PIPELINE_STATE_FLAG_NONE,
337    };
338 
339    uint32_t cmd_arg_count = 0;
340    D3D12_INDIRECT_ARGUMENT_DESC cmd_args[4];
341 
342    cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
343       .Type = D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW,
344       .UnorderedAccessView = {
345          .RootParameterIndex = 0,
346       },
347    };
348 
349    cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
350       .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
351       .Constant = {
352          .RootParameterIndex = 1,
353          .DestOffsetIn32BitValues = 0,
354          .Num32BitValuesToSet = params_size / 4,
355       },
356    };
357 
358    if (prim_restart) {
359       cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
360          .Type = D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW,
361          .UnorderedAccessView = {
362             .RootParameterIndex = 3,
363          },
364       };
365    }
366 
367    cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
368       .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
369    };
370 
371    assert(cmd_arg_count <= ARRAY_SIZE(cmd_args));
372 
373    uint32_t exec_params_size =
374       prim_restart ?
375       sizeof(struct dzn_indirect_triangle_fan_prim_restart_rewrite_index_exec_params) :
376       sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
377 
378    D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = {
379       .ByteStride = exec_params_size,
380       .NumArgumentDescs = cmd_arg_count,
381       .pArgumentDescs = cmd_args,
382    };
383 
384    assert((cmd_sig_desc.ByteStride & 7) == 0);
385 
386    meta->root_sig = dzn_device_create_root_sig(device, &root_sig_desc);
387    if (!meta->root_sig) {
388       ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
389       goto out;
390    }
391 
392 
393    desc.pRootSignature = meta->root_sig;
394    dzn_meta_compile_shader(device, nir, &desc.CS);
395 
396    if (FAILED(ID3D12Device1_CreateComputePipelineState(device->dev, &desc,
397                                                        &IID_ID3D12PipelineState,
398                                                        (void **)&meta->pipeline_state))) {
399       ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
400       goto out;
401    }
402 
403    if (FAILED(ID3D12Device1_CreateCommandSignature(device->dev, &cmd_sig_desc,
404                                                    meta->root_sig,
405                                                    &IID_ID3D12CommandSignature,
406                                                    (void **)&meta->cmd_sig)))
407       ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
408 
409 out:
410    if (ret != VK_SUCCESS)
411       dzn_meta_triangle_fan_rewrite_index_finish(device, old_index_type);
412 
413    free((void *)desc.CS.pShaderBytecode);
414    ralloc_free(nir);
415    glsl_type_singleton_decref();
416 
417    return ret;
418 }
419 
420 static const D3D12_SHADER_BYTECODE *
dzn_meta_blits_get_vs(struct dzn_device * device)421 dzn_meta_blits_get_vs(struct dzn_device *device)
422 {
423    struct dzn_meta_blits *meta = &device->blits;
424 
425    mtx_lock(&meta->shaders_lock);
426 
427    if (meta->vs.pShaderBytecode == NULL) {
428       nir_shader *nir = dzn_nir_blit_vs();
429 
430       NIR_PASS_V(nir, nir_lower_system_values);
431 
432       gl_system_value system_values[] = {
433          SYSTEM_VALUE_FIRST_VERTEX,
434          SYSTEM_VALUE_BASE_VERTEX,
435       };
436 
437       NIR_PASS_V(nir, dxil_nir_lower_system_values_to_zero, system_values,
438                 ARRAY_SIZE(system_values));
439 
440       D3D12_SHADER_BYTECODE bc;
441 
442       dzn_meta_compile_shader(device, nir, &bc);
443       meta->vs.pShaderBytecode =
444          vk_alloc(&device->vk.alloc, bc.BytecodeLength, 8,
445                   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
446       if (meta->vs.pShaderBytecode) {
447          meta->vs.BytecodeLength = bc.BytecodeLength;
448          memcpy((void *)meta->vs.pShaderBytecode, bc.pShaderBytecode, bc.BytecodeLength);
449       }
450       free((void *)bc.pShaderBytecode);
451       ralloc_free(nir);
452    }
453 
454    mtx_unlock(&meta->shaders_lock);
455 
456    return &meta->vs;
457 }
458 
459 static const D3D12_SHADER_BYTECODE *
dzn_meta_blits_get_fs(struct dzn_device * device,const struct dzn_nir_blit_info * info)460 dzn_meta_blits_get_fs(struct dzn_device *device,
461                       const struct dzn_nir_blit_info *info)
462 {
463    struct dzn_meta_blits *meta = &device->blits;
464    D3D12_SHADER_BYTECODE *out = NULL;
465 
466    mtx_lock(&meta->shaders_lock);
467 
468    STATIC_ASSERT(sizeof(struct dzn_nir_blit_info) == sizeof(uint32_t));
469 
470    struct hash_entry *he =
471       _mesa_hash_table_search(meta->fs, (void *)(uintptr_t)info->hash_key);
472 
473    if (!he) {
474       nir_shader *nir = dzn_nir_blit_fs(info);
475 
476       if (info->out_type != GLSL_TYPE_FLOAT) {
477          dxil_wrap_sampler_state wrap_state = {
478             .is_int_sampler = 1,
479             .is_linear_filtering = 0,
480             .skip_boundary_conditions = 1,
481          };
482          dxil_lower_sample_to_txf_for_integer_tex(nir, 1, &wrap_state, NULL, 0);
483       }
484 
485       D3D12_SHADER_BYTECODE bc;
486 
487       dzn_meta_compile_shader(device, nir, &bc);
488 
489       out = vk_alloc(&device->vk.alloc,
490                      sizeof(D3D12_SHADER_BYTECODE) + bc.BytecodeLength, 8,
491                      VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
492       if (out) {
493          out->pShaderBytecode = out + 1;
494          memcpy((void *)out->pShaderBytecode, bc.pShaderBytecode, bc.BytecodeLength);
495          out->BytecodeLength = bc.BytecodeLength;
496          _mesa_hash_table_insert(meta->fs, &info->hash_key, out);
497       }
498       free((void *)bc.pShaderBytecode);
499       ralloc_free(nir);
500    } else {
501       out = he->data;
502    }
503 
504    mtx_unlock(&meta->shaders_lock);
505 
506    return out;
507 }
508 
509 static void
dzn_meta_blit_destroy(struct dzn_device * device,struct dzn_meta_blit * blit)510 dzn_meta_blit_destroy(struct dzn_device *device, struct dzn_meta_blit *blit)
511 {
512    if (!blit)
513       return;
514 
515    if (blit->root_sig)
516       ID3D12RootSignature_Release(blit->root_sig);
517    if (blit->pipeline_state)
518       ID3D12PipelineState_Release(blit->pipeline_state);
519 
520    vk_free(&device->vk.alloc, blit);
521 }
522 
523 static struct dzn_meta_blit *
dzn_meta_blit_create(struct dzn_device * device,const struct dzn_meta_blit_key * key)524 dzn_meta_blit_create(struct dzn_device *device, const struct dzn_meta_blit_key *key)
525 {
526    struct dzn_meta_blit *blit =
527       vk_zalloc(&device->vk.alloc, sizeof(*blit), 8,
528                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
529 
530    if (!blit)
531       return NULL;
532 
533    D3D12_DESCRIPTOR_RANGE1 ranges[] = {
534       {
535          .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
536          .NumDescriptors = 1,
537          .BaseShaderRegister = 0,
538          .RegisterSpace = 0,
539          .Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS,
540          .OffsetInDescriptorsFromTableStart = 0,
541       },
542    };
543    D3D12_DESCRIPTOR_RANGE1 sampler_ranges[] = {
544       {
545          .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
546          .NumDescriptors = 1,
547          .BaseShaderRegister = 0,
548          .RegisterSpace = 0,
549          .Flags = 0,
550          .OffsetInDescriptorsFromTableStart = 0,
551       },
552    };
553 
554    D3D12_STATIC_SAMPLER_DESC samplers[] = {
555       {
556          .Filter = key->linear_filter ?
557                    D3D12_FILTER_MIN_MAG_MIP_LINEAR :
558                    D3D12_FILTER_MIN_MAG_MIP_POINT,
559          .AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
560          .AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
561          .AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
562          .MipLODBias = 0,
563          .MaxAnisotropy = 0,
564          .MinLOD = 0,
565          .MaxLOD = D3D12_FLOAT32_MAX,
566          .ShaderRegister = 0,
567          .RegisterSpace = 0,
568          .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL,
569       },
570    };
571 
572    D3D12_ROOT_PARAMETER1 root_params[4] = {
573       {
574          .ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
575          .DescriptorTable = {
576             .NumDescriptorRanges = ARRAY_SIZE(ranges),
577             .pDescriptorRanges = ranges,
578          },
579          .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL,
580       },
581       {
582          .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
583          .Constants = {
584             .ShaderRegister = 0,
585             .RegisterSpace = 0,
586             .Num32BitValues = 17,
587          },
588          .ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX,
589       },
590       {
591          .ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
592          .DescriptorTable = {
593             .NumDescriptorRanges = ARRAY_SIZE(sampler_ranges),
594             .pDescriptorRanges = sampler_ranges,
595          },
596          .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL,
597       },
598       {
599          .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
600          .Constants = {
601             .ShaderRegister = 0,
602             .RegisterSpace = 0,
603             .Num32BitValues = 1,
604          },
605          .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL,
606       },
607    };
608    uint32_t num_root_params = 2;
609 
610    uint32_t samples = key->resolve_mode == dzn_blit_resolve_none ?
611       key->samples : 1;
612    D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {
613       .SampleMask = (1ULL << samples) - 1,
614       .RasterizerState = {
615          .FillMode = D3D12_FILL_MODE_SOLID,
616          .CullMode = D3D12_CULL_MODE_NONE,
617          .DepthClipEnable = true,
618       },
619       .PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,
620       .SampleDesc = {
621          .Count = samples,
622          .Quality = 0,
623       },
624       .Flags = D3D12_PIPELINE_STATE_FLAG_NONE,
625    };
626 
627    struct dzn_nir_blit_info blit_fs_info = {
628       .src_samples = key->samples,
629       .loc = key->loc,
630       .out_type = key->out_type,
631       .sampler_dim = key->sampler_dim,
632       .src_is_array = key->src_is_array,
633       .resolve_mode = key->resolve_mode,
634       .stencil_fallback = key->loc == FRAG_RESULT_STENCIL && key->stencil_bit != 0xf,
635       .padding = 0,
636    };
637 
638    D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {
639       .Version = D3D_ROOT_SIGNATURE_VERSION_1_1,
640       .Desc_1_1 = {
641          .NumParameters = num_root_params,
642          .pParameters = root_params,
643          .NumStaticSamplers = ARRAY_SIZE(samplers),
644          .pStaticSamplers = samplers,
645          .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE,
646       },
647    };
648 
649    if (!device->support_static_samplers) {
650       root_sig_desc.Desc_1_1.NumStaticSamplers = 0;
651       root_sig_desc.Desc_1_1.NumParameters = 3;
652    }
653 
654    /* Don't need fs constants unless we're doing the stencil fallback */
655    if (blit_fs_info.stencil_fallback) {
656       if (device->support_static_samplers) {
657          root_params[2] = root_params[3];
658          root_sig_desc.Desc_1_1.NumParameters = 3;
659       } else {
660          root_sig_desc.Desc_1_1.NumParameters = 4;
661       }
662    }
663 
664    blit->root_sig = dzn_device_create_root_sig(device, &root_sig_desc);
665    if (!blit->root_sig) {
666       dzn_meta_blit_destroy(device, blit);
667       return NULL;
668    }
669 
670    desc.pRootSignature = blit->root_sig;
671 
672    const D3D12_SHADER_BYTECODE *vs, *fs;
673 
674    vs = dzn_meta_blits_get_vs(device);
675    if (!vs) {
676       dzn_meta_blit_destroy(device, blit);
677       return NULL;
678    }
679 
680    desc.VS = *vs;
681    assert(desc.VS.pShaderBytecode);
682 
683    fs = dzn_meta_blits_get_fs(device, &blit_fs_info);
684    if (!fs) {
685       dzn_meta_blit_destroy(device, blit);
686       return NULL;
687    }
688 
689    desc.PS = *fs;
690    assert(desc.PS.pShaderBytecode);
691 
692    assert(key->loc == FRAG_RESULT_DATA0 ||
693           key->loc == FRAG_RESULT_DEPTH ||
694           key->loc == FRAG_RESULT_STENCIL);
695 
696    if (key->loc == FRAG_RESULT_DATA0) {
697       desc.NumRenderTargets = 1;
698       desc.RTVFormats[0] = key->out_format;
699       desc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
700    } else {
701       desc.DSVFormat = key->out_format;
702       if (key->loc == FRAG_RESULT_DEPTH) {
703          desc.DepthStencilState.DepthEnable = true;
704          desc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
705          desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
706       } else {
707          assert(key->loc == FRAG_RESULT_STENCIL);
708          desc.DepthStencilState.StencilEnable = true;
709          desc.DepthStencilState.StencilWriteMask = key->stencil_bit == 0xf ? 0xff : (1 << key->stencil_bit);
710          desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_REPLACE;
711          desc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_REPLACE;
712          desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE;
713          desc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
714          desc.DepthStencilState.BackFace = desc.DepthStencilState.FrontFace;
715       }
716    }
717 
718    if (FAILED(ID3D12Device1_CreateGraphicsPipelineState(device->dev, &desc,
719                                                         &IID_ID3D12PipelineState,
720                                                         (void **)&blit->pipeline_state))) {
721       dzn_meta_blit_destroy(device, blit);
722       return NULL;
723    }
724 
725    return blit;
726 }
727 
728 const struct dzn_meta_blit *
dzn_meta_blits_get_context(struct dzn_device * device,const struct dzn_meta_blit_key * key)729 dzn_meta_blits_get_context(struct dzn_device *device,
730                            const struct dzn_meta_blit_key *key)
731 {
732    struct dzn_meta_blit *out = NULL;
733 
734    STATIC_ASSERT(sizeof(*key) == sizeof(uint64_t));
735 
736    mtx_lock(&device->blits.contexts_lock);
737 
738    out =
739       _mesa_hash_table_u64_search(device->blits.contexts, key->u64);
740    if (!out) {
741       out = dzn_meta_blit_create(device, key);
742 
743       if (out)
744          _mesa_hash_table_u64_insert(device->blits.contexts, key->u64, out);
745    }
746 
747    mtx_unlock(&device->blits.contexts_lock);
748 
749    return out;
750 }
751 
752 static void
dzn_meta_blits_finish(struct dzn_device * device)753 dzn_meta_blits_finish(struct dzn_device *device)
754 {
755    struct dzn_meta_blits *meta = &device->blits;
756 
757    vk_free(&device->vk.alloc, (void *)meta->vs.pShaderBytecode);
758 
759    if (meta->fs) {
760       hash_table_foreach(meta->fs, he)
761          vk_free(&device->vk.alloc, he->data);
762       _mesa_hash_table_destroy(meta->fs, NULL);
763    }
764 
765    if (meta->contexts) {
766       hash_table_foreach(meta->contexts->table, he)
767          dzn_meta_blit_destroy(device, he->data);
768       _mesa_hash_table_u64_destroy(meta->contexts);
769    }
770 
771    mtx_destroy(&meta->shaders_lock);
772    mtx_destroy(&meta->contexts_lock);
773 }
774 
775 static VkResult
dzn_meta_blits_init(struct dzn_device * device)776 dzn_meta_blits_init(struct dzn_device *device)
777 {
778    struct dzn_instance *instance =
779       container_of(device->vk.physical->instance, struct dzn_instance, vk);
780    struct dzn_meta_blits *meta = &device->blits;
781 
782    mtx_init(&meta->shaders_lock, mtx_plain);
783    mtx_init(&meta->contexts_lock, mtx_plain);
784 
785    meta->fs = _mesa_hash_table_create_u32_keys(NULL);
786    if (!meta->fs) {
787       dzn_meta_blits_finish(device);
788       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
789    }
790 
791    meta->contexts = _mesa_hash_table_u64_create(NULL);
792    if (!meta->contexts) {
793       dzn_meta_blits_finish(device);
794       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
795    }
796 
797    return VK_SUCCESS;
798 }
799 
800 void
dzn_meta_finish(struct dzn_device * device)801 dzn_meta_finish(struct dzn_device *device)
802 {
803    for (uint32_t i = 0; i < ARRAY_SIZE(device->triangle_fan); i++)
804       dzn_meta_triangle_fan_rewrite_index_finish(device, i);
805 
806    for (uint32_t i = 0; i < ARRAY_SIZE(device->indirect_draws); i++)
807       dzn_meta_indirect_draw_finish(device, (struct dzn_indirect_draw_type) { .value = i });
808 
809    dzn_meta_blits_finish(device);
810 }
811 
812 VkResult
dzn_meta_init(struct dzn_device * device)813 dzn_meta_init(struct dzn_device *device)
814 {
815    struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
816    VkResult result = dzn_meta_blits_init(device);
817    if (result != VK_SUCCESS)
818       goto out;
819 
820    for (uint32_t i = 0; i < ARRAY_SIZE(device->indirect_draws); i++) {
821       struct dzn_indirect_draw_type type = { .value = i };
822       if (type.triangle_fan_primitive_restart && !type.triangle_fan)
823          continue;
824       if (type.triangle_fan && pdev->options15.TriangleFanSupported)
825          continue;
826       if (type.draw_params && pdev->options21.ExtendedCommandInfoSupported)
827          continue;
828       if (type.draw_id && pdev->options21.ExecuteIndirectTier >= D3D12_EXECUTE_INDIRECT_TIER_1_1)
829          continue;
830       VkResult result =
831          dzn_meta_indirect_draw_init(device, type);
832       if (result != VK_SUCCESS)
833          goto out;
834    }
835 
836    if (!pdev->options15.TriangleFanSupported) {
837       for (uint32_t i = 0; i < ARRAY_SIZE(device->triangle_fan); i++) {
838          VkResult result =
839             dzn_meta_triangle_fan_rewrite_index_init(device, i);
840          if (result != VK_SUCCESS)
841             goto out;
842       }
843    }
844 
845 out:
846    if (result != VK_SUCCESS) {
847       dzn_meta_finish(device);
848       return result;
849    }
850 
851    return VK_SUCCESS;
852 }
853