xref: /aosp_15_r20/external/mesa3d/src/microsoft/clc/clc_compiler.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_clc_helpers.h"
26 #include "nir_serialize.h"
27 #include "glsl_types.h"
28 #include "clc_compiler.h"
29 #include "clc_helpers.h"
30 #include "clc_nir.h"
31 #include "../compiler/dxil_nir.h"
32 #include "../compiler/dxil_nir_lower_int_samplers.h"
33 #include "../compiler/nir_to_dxil.h"
34 
35 #include "util/u_debug.h"
36 #include <util/u_math.h>
37 #include "spirv/nir_spirv.h"
38 #include "spirv/spirv_info.h"
39 #include "nir_builder.h"
40 #include "nir_builtin_builder.h"
41 
42 #include "git_sha1.h"
43 
44 struct clc_image_lower_context
45 {
46    struct clc_dxil_metadata *metadata;
47    unsigned *num_srvs;
48    unsigned *num_uavs;
49    nir_deref_instr *deref;
50    unsigned num_buf_ids;
51    int metadata_index;
52 };
53 
54 static int
lower_image_deref_impl(nir_builder * b,struct clc_image_lower_context * context,const struct glsl_type * new_var_type,nir_variable_mode var_mode,unsigned * num_bindings)55 lower_image_deref_impl(nir_builder *b, struct clc_image_lower_context *context,
56                        const struct glsl_type *new_var_type,
57                        nir_variable_mode var_mode,
58                        unsigned *num_bindings)
59 {
60    nir_variable *in_var = nir_deref_instr_get_variable(context->deref);
61    nir_foreach_variable_with_modes(var, b->shader, var_mode) {
62       // Check if we've already created a variable for this image
63       if (var->data.driver_location == in_var->data.driver_location &&
64           var->type == new_var_type)
65          return var->data.binding;
66    }
67    nir_variable *image = nir_variable_create(b->shader, var_mode, new_var_type, NULL);
68    image->data.access = in_var->data.access;
69    image->data.binding = in_var->data.binding;
70    image->data.driver_location = in_var->data.driver_location;
71    if (context->num_buf_ids > 0) {
72       // Need to assign a new binding
73       context->metadata->args[context->metadata_index].
74          image.buf_ids[context->num_buf_ids] = image->data.binding = (*num_bindings)++;
75    }
76    context->num_buf_ids++;
77    return image->data.binding;
78 }
79 
80 static int
lower_read_only_image_deref(nir_builder * b,struct clc_image_lower_context * context,nir_alu_type image_type)81 lower_read_only_image_deref(nir_builder *b, struct clc_image_lower_context *context,
82                             nir_alu_type image_type)
83 {
84    nir_variable *in_var = nir_deref_instr_get_variable(context->deref);
85 
86    // Non-writeable images should be converted to samplers,
87    // since they may have texture operations done on them
88    const struct glsl_type *new_var_type =
89       glsl_texture_type(glsl_get_sampler_dim(in_var->type),
90             glsl_sampler_type_is_array(in_var->type),
91             nir_get_glsl_base_type_for_nir_type(image_type | 32));
92    return lower_image_deref_impl(b, context, new_var_type, nir_var_uniform, context->num_srvs);
93 }
94 
95 static int
lower_read_write_image_deref(nir_builder * b,struct clc_image_lower_context * context,nir_alu_type image_type)96 lower_read_write_image_deref(nir_builder *b, struct clc_image_lower_context *context,
97                              nir_alu_type image_type)
98 {
99    nir_variable *in_var = nir_deref_instr_get_variable(context->deref);
100    const struct glsl_type *new_var_type =
101       glsl_image_type(glsl_get_sampler_dim(in_var->type),
102          glsl_sampler_type_is_array(in_var->type),
103          nir_get_glsl_base_type_for_nir_type(image_type | 32));
104    return lower_image_deref_impl(b, context, new_var_type, nir_var_image, context->num_uavs);
105 }
106 
107 static void
clc_lower_input_image_deref(nir_builder * b,struct clc_image_lower_context * context)108 clc_lower_input_image_deref(nir_builder *b, struct clc_image_lower_context *context)
109 {
110    // The input variable here isn't actually an image, it's just the
111    // image format data.
112    //
113    // For every use of an image in a different way, we'll add an
114    // appropriate image to match it. That can result in up to
115    // 3 images (float4, int4, uint4) for each image. Only one of these
116    // formats will actually produce correct data, but a single kernel
117    // could use runtime conditionals to potentially access any of them.
118    //
119    // If the image is used in a query that doesn't have a corresponding
120    // DXIL intrinsic (CL image channel order or channel format), then
121    // we'll add a kernel input for that data that'll be lowered by the
122    // explicit IO pass later on.
123    //
124    // After all that, we can remove the image input variable and deref.
125 
126    enum image_type {
127       FLOAT4,
128       INT4,
129       UINT4,
130       IMAGE_TYPE_COUNT
131    };
132 
133    int image_bindings[IMAGE_TYPE_COUNT] = {-1, -1, -1};
134    nir_def *format_deref_dest = NULL, *order_deref_dest = NULL;
135 
136    nir_variable *in_var = nir_deref_instr_get_variable(context->deref);
137 
138    context->metadata_index = 0;
139    while (context->metadata->args[context->metadata_index].offset != in_var->data.driver_location)
140       context->metadata_index++;
141 
142    context->num_buf_ids = context->metadata->args[context->metadata_index].image.num_buf_ids;
143 
144    /* Do this in 2 passes:
145     * 1. When encountering a strongly-typed access (load/store), replace the deref
146     *    with one that references an appropriately typed variable. When encountering
147     *    an untyped access (size query), if we have a strongly-typed variable already,
148     *    replace the deref to point to it.
149     * 2. If there's any references left, they should all be untyped. If we found
150     *    a strongly-typed access later in the 1st pass, then just replace the reference.
151     *    If we didn't, e.g. the resource is only used for a size query, then pick an
152     *    arbitrary type for it.
153     */
154    for (int pass = 0; pass < 2; ++pass) {
155       nir_foreach_use_safe(src, &context->deref->def) {
156          enum image_type type;
157 
158          if (nir_src_parent_instr(src)->type == nir_instr_type_intrinsic) {
159             nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(nir_src_parent_instr(src));
160             nir_alu_type dest_type;
161 
162             b->cursor = nir_before_instr(&intrinsic->instr);
163 
164             switch (intrinsic->intrinsic) {
165             case nir_intrinsic_image_deref_load:
166             case nir_intrinsic_image_deref_store: {
167                dest_type = intrinsic->intrinsic == nir_intrinsic_image_deref_load ?
168                   nir_intrinsic_dest_type(intrinsic) : nir_intrinsic_src_type(intrinsic);
169 
170                switch (nir_alu_type_get_base_type(dest_type)) {
171                case nir_type_float: type = FLOAT4; break;
172                case nir_type_int: type = INT4; break;
173                case nir_type_uint: type = UINT4; break;
174                default: unreachable("Unsupported image type for load.");
175                }
176 
177                int image_binding = image_bindings[type];
178                if (image_binding < 0) {
179                   image_binding = image_bindings[type] =
180                      lower_read_write_image_deref(b, context, dest_type);
181                }
182 
183                assert((in_var->data.access & ACCESS_NON_WRITEABLE) == 0);
184                nir_rewrite_image_intrinsic(intrinsic, nir_imm_int(b, image_binding), false);
185                break;
186             }
187 
188             case nir_intrinsic_image_deref_size: {
189                int image_binding = -1;
190                for (unsigned i = 0; i < IMAGE_TYPE_COUNT; ++i) {
191                   if (image_bindings[i] >= 0) {
192                      image_binding = image_bindings[i];
193                      break;
194                   }
195                }
196                if (image_binding < 0) {
197                   // Skip for now and come back to it
198                   if (pass == 0)
199                      break;
200 
201                   type = FLOAT4;
202                   image_binding = image_bindings[type] =
203                      lower_read_write_image_deref(b, context, nir_type_float32);
204                }
205 
206                assert((in_var->data.access & ACCESS_NON_WRITEABLE) == 0);
207                nir_rewrite_image_intrinsic(intrinsic, nir_imm_int(b, image_binding), false);
208                break;
209             }
210 
211             case nir_intrinsic_image_deref_format:
212             case nir_intrinsic_image_deref_order: {
213                nir_def **cached_deref = intrinsic->intrinsic == nir_intrinsic_image_deref_format ?
214                   &format_deref_dest : &order_deref_dest;
215                if (!*cached_deref) {
216                   unsigned driver_location = in_var->data.driver_location;
217                   if (intrinsic->intrinsic == nir_intrinsic_image_deref_format) {
218                      /* Match cl_image_format { image_channel_order, image_channel_data_type }; */
219                      driver_location += 4;
220                   }
221 
222                   nir_variable *new_input = NULL;
223                   nir_foreach_variable_with_modes(var, b->shader, nir_var_uniform) {
224                      if (var->data.driver_location == driver_location &&
225                          var->type == glsl_uint_type()) {
226                         new_input = var;
227                         break;
228                      }
229                   }
230                   if (!new_input) {
231                      new_input = nir_variable_create(b->shader, nir_var_uniform, glsl_uint_type(), NULL);
232                      new_input->data.driver_location = driver_location;
233                   }
234 
235                   b->cursor = nir_after_instr(&context->deref->instr);
236                   *cached_deref = nir_load_var(b, new_input);
237                }
238 
239                /* No actual intrinsic needed here, just reference the loaded variable */
240                nir_def_replace(&intrinsic->def, *cached_deref);
241                break;
242             }
243 
244             default:
245                unreachable("Unsupported image intrinsic");
246             }
247          } else if (nir_src_parent_instr(src)->type == nir_instr_type_tex) {
248             assert(in_var->data.access & ACCESS_NON_WRITEABLE);
249             nir_tex_instr *tex = nir_instr_as_tex(nir_src_parent_instr(src));
250 
251             switch (nir_alu_type_get_base_type(tex->dest_type)) {
252             case nir_type_float: type = FLOAT4; break;
253             case nir_type_int: type = INT4; break;
254             case nir_type_uint: type = UINT4; break;
255             default: unreachable("Unsupported image format for sample.");
256             }
257 
258             int image_binding = image_bindings[type];
259             if (image_binding < 0) {
260                image_binding = image_bindings[type] =
261                   lower_read_only_image_deref(b, context, tex->dest_type);
262             }
263 
264             nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_texture_deref));
265             tex->texture_index = image_binding;
266          }
267       }
268    }
269 
270    context->metadata->args[context->metadata_index].image.num_buf_ids = context->num_buf_ids;
271 
272    nir_instr_remove(&context->deref->instr);
273 }
274 
275 static void
clc_lower_images(nir_shader * nir,struct clc_image_lower_context * context)276 clc_lower_images(nir_shader *nir, struct clc_image_lower_context *context)
277 {
278    nir_foreach_function(func, nir) {
279       if (!func->is_entrypoint)
280          continue;
281       assert(func->impl);
282 
283       nir_builder b = nir_builder_create(func->impl);
284 
285       nir_foreach_block(block, func->impl) {
286          nir_foreach_instr_safe(instr, block) {
287             if (instr->type == nir_instr_type_deref) {
288                context->deref = nir_instr_as_deref(instr);
289 
290                if (glsl_type_is_image(context->deref->type)) {
291                   assert(context->deref->deref_type == nir_deref_type_var);
292                   clc_lower_input_image_deref(&b, context);
293                }
294             }
295          }
296       }
297    }
298 
299    nir_foreach_variable_with_modes_safe(var, nir, nir_var_image) {
300       if (glsl_type_is_image(var->type) && glsl_get_sampler_result_type(var->type) == GLSL_TYPE_VOID)
301          exec_node_remove(&var->node);
302    }
303 }
304 
305 static void
clc_lower_64bit_semantics(nir_shader * nir)306 clc_lower_64bit_semantics(nir_shader *nir)
307 {
308    nir_foreach_function_impl(impl, nir) {
309       nir_builder b = nir_builder_create(impl);
310 
311       nir_foreach_block(block, impl) {
312          nir_foreach_instr_safe(instr, block) {
313             if (instr->type == nir_instr_type_intrinsic) {
314                nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
315                switch (intrinsic->intrinsic) {
316                case nir_intrinsic_load_global_invocation_id:
317                case nir_intrinsic_load_base_global_invocation_id:
318                case nir_intrinsic_load_local_invocation_id:
319                case nir_intrinsic_load_workgroup_id:
320                case nir_intrinsic_load_base_workgroup_id:
321                case nir_intrinsic_load_num_workgroups:
322                   break;
323                default:
324                   continue;
325                }
326 
327                if (nir_instr_def(instr)->bit_size != 64)
328                   continue;
329 
330                intrinsic->def.bit_size = 32;
331                b.cursor = nir_after_instr(instr);
332 
333                nir_def *i64 = nir_u2u64(&b, &intrinsic->def);
334                nir_def_rewrite_uses_after(
335                   &intrinsic->def,
336                   i64,
337                   i64->parent_instr);
338             }
339          }
340       }
341    }
342 }
343 
344 static void
clc_lower_nonnormalized_samplers(nir_shader * nir,const dxil_wrap_sampler_state * states)345 clc_lower_nonnormalized_samplers(nir_shader *nir,
346                                  const dxil_wrap_sampler_state *states)
347 {
348    nir_foreach_function(func, nir) {
349       if (!func->is_entrypoint)
350          continue;
351       assert(func->impl);
352 
353       nir_builder b = nir_builder_create(func->impl);
354 
355       nir_foreach_block(block, func->impl) {
356          nir_foreach_instr_safe(instr, block) {
357             if (instr->type != nir_instr_type_tex)
358                continue;
359             nir_tex_instr *tex = nir_instr_as_tex(instr);
360 
361             int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
362             if (sampler_src_idx == -1)
363                continue;
364 
365             nir_src *sampler_src = &tex->src[sampler_src_idx].src;
366             assert(sampler_src->ssa->parent_instr->type == nir_instr_type_deref);
367             nir_variable *sampler = nir_deref_instr_get_variable(
368                nir_instr_as_deref(sampler_src->ssa->parent_instr));
369 
370             // If the sampler returns ints, we'll handle this in the int lowering pass
371             if (nir_alu_type_get_base_type(tex->dest_type) != nir_type_float)
372                continue;
373 
374             // If sampler uses normalized coords, nothing to do
375             if (!states[sampler->data.binding].is_nonnormalized_coords)
376                continue;
377 
378             b.cursor = nir_before_instr(&tex->instr);
379 
380             int coords_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
381             assert(coords_idx != -1);
382             nir_def *coords =
383                tex->src[coords_idx].src.ssa;
384 
385             nir_def *txs = nir_i2f32(&b, nir_get_texture_size(&b, tex));
386 
387             // Normalize coords for tex
388             nir_def *scale = nir_frcp(&b, txs);
389             nir_def *comps[4];
390             for (unsigned i = 0; i < coords->num_components; ++i) {
391                comps[i] = nir_channel(&b, coords, i);
392                if (tex->is_array && i == coords->num_components - 1) {
393                   // Don't scale the array index, but do clamp it
394                   comps[i] = nir_fround_even(&b, comps[i]);
395                   comps[i] = nir_fmax(&b, comps[i], nir_imm_float(&b, 0.0f));
396                   comps[i] = nir_fmin(&b, comps[i], nir_fadd_imm(&b, nir_channel(&b, txs, i), -1.0f));
397                   break;
398                }
399 
400                // The CTS is pretty clear that this value has to be floored for nearest sampling
401                // but must not be for linear sampling.
402                if (!states[sampler->data.binding].is_linear_filtering)
403                   comps[i] = nir_fadd_imm(&b, nir_ffloor(&b, comps[i]), 0.5f);
404                comps[i] = nir_fmul(&b, comps[i], nir_channel(&b, scale, i));
405             }
406             nir_def *normalized_coords = nir_vec(&b, comps, coords->num_components);
407             nir_src_rewrite(&tex->src[coords_idx].src, normalized_coords);
408          }
409       }
410    }
411 }
412 
413 static nir_variable *
add_kernel_inputs_var(struct clc_dxil_object * dxil,nir_shader * nir,unsigned * cbv_id)414 add_kernel_inputs_var(struct clc_dxil_object *dxil, nir_shader *nir,
415                       unsigned *cbv_id)
416 {
417    if (!dxil->kernel->num_args)
418       return NULL;
419 
420    unsigned size = 0;
421 
422    nir_foreach_variable_with_modes(var, nir, nir_var_uniform)
423       size = MAX2(size,
424                   var->data.driver_location +
425                   glsl_get_cl_size(var->type));
426 
427    size = align(size, 4);
428 
429    const struct glsl_type *array_type = glsl_array_type(glsl_uint_type(), size / 4, 4);
430    const struct glsl_struct_field field = { array_type, "arr" };
431    nir_variable *var =
432       nir_variable_create(nir, nir_var_mem_ubo,
433          glsl_struct_type(&field, 1, "kernel_inputs", false),
434          "kernel_inputs");
435    var->data.binding = (*cbv_id)++;
436    var->data.how_declared = nir_var_hidden;
437    return var;
438 }
439 
440 static nir_variable *
add_work_properties_var(struct clc_dxil_object * dxil,struct nir_shader * nir,unsigned * cbv_id)441 add_work_properties_var(struct clc_dxil_object *dxil,
442                            struct nir_shader *nir, unsigned *cbv_id)
443 {
444    const struct glsl_type *array_type =
445       glsl_array_type(glsl_uint_type(),
446          sizeof(struct clc_work_properties_data) / sizeof(unsigned),
447          sizeof(unsigned));
448    const struct glsl_struct_field field = { array_type, "arr" };
449    nir_variable *var =
450       nir_variable_create(nir, nir_var_mem_ubo,
451          glsl_struct_type(&field, 1, "kernel_work_properties", false),
452          "kernel_work_properies");
453    var->data.binding = (*cbv_id)++;
454    var->data.how_declared = nir_var_hidden;
455    return var;
456 }
457 
458 static void
clc_lower_constant_to_ssbo(nir_shader * nir,const struct clc_kernel_info * kerninfo,unsigned * uav_id)459 clc_lower_constant_to_ssbo(nir_shader *nir,
460                       const struct clc_kernel_info *kerninfo, unsigned *uav_id)
461 {
462    /* Update UBO vars and assign them a binding. */
463    nir_foreach_variable_with_modes(var, nir, nir_var_mem_constant) {
464       var->data.mode = nir_var_mem_ssbo;
465       var->data.binding = (*uav_id)++;
466    }
467 
468    /* And finally patch all the derefs referincing the constant
469     * variables/pointers.
470     */
471    nir_foreach_function(func, nir) {
472       if (!func->is_entrypoint)
473          continue;
474 
475       assert(func->impl);
476 
477       nir_foreach_block(block, func->impl) {
478          nir_foreach_instr(instr, block) {
479             if (instr->type != nir_instr_type_deref)
480                continue;
481 
482             nir_deref_instr *deref = nir_instr_as_deref(instr);
483 
484             if (deref->modes != nir_var_mem_constant)
485                continue;
486 
487             deref->modes = nir_var_mem_ssbo;
488          }
489       }
490    }
491 }
492 
493 static void
clc_change_variable_mode(nir_shader * nir,nir_variable_mode from,nir_variable_mode to)494 clc_change_variable_mode(nir_shader *nir, nir_variable_mode from, nir_variable_mode to)
495 {
496    nir_foreach_variable_with_modes(var, nir, from)
497       var->data.mode = to;
498 
499    nir_foreach_function(func, nir) {
500       if (!func->is_entrypoint)
501          continue;
502 
503       assert(func->impl);
504 
505       nir_foreach_block(block, func->impl) {
506          nir_foreach_instr(instr, block) {
507             if (instr->type != nir_instr_type_deref)
508                continue;
509 
510             nir_deref_instr *deref = nir_instr_as_deref(instr);
511 
512             if (deref->modes != from)
513                continue;
514 
515             deref->modes = to;
516          }
517       }
518    }
519 }
520 
521 static void
copy_const_initializer(const nir_constant * constant,const struct glsl_type * type,uint8_t * data)522 copy_const_initializer(const nir_constant *constant, const struct glsl_type *type,
523                        uint8_t *data)
524 {
525    if (glsl_type_is_array(type)) {
526       const struct glsl_type *elm_type = glsl_get_array_element(type);
527       unsigned step_size = glsl_get_explicit_stride(type);
528 
529       for (unsigned i = 0; i < constant->num_elements; i++) {
530          copy_const_initializer(constant->elements[i], elm_type,
531                                 data + (i * step_size));
532       }
533    } else if (glsl_type_is_struct(type)) {
534       for (unsigned i = 0; i < constant->num_elements; i++) {
535          const struct glsl_type *elm_type = glsl_get_struct_field(type, i);
536          int offset = glsl_get_struct_field_offset(type, i);
537          copy_const_initializer(constant->elements[i], elm_type, data + offset);
538       }
539    } else {
540       assert(glsl_type_is_vector_or_scalar(type));
541 
542       for (unsigned i = 0; i < glsl_get_components(type); i++) {
543          switch (glsl_get_bit_size(type)) {
544          case 64:
545             *((uint64_t *)data) = constant->values[i].u64;
546             break;
547          case 32:
548             *((uint32_t *)data) = constant->values[i].u32;
549             break;
550          case 16:
551             *((uint16_t *)data) = constant->values[i].u16;
552             break;
553          case 8:
554             *((uint8_t *)data) = constant->values[i].u8;
555             break;
556          default:
557             unreachable("Invalid base type");
558          }
559 
560          data += glsl_get_bit_size(type) / 8;
561       }
562    }
563 }
564 
565 static enum dxil_tex_wrap
wrap_from_cl_addressing(unsigned addressing_mode)566 wrap_from_cl_addressing(unsigned addressing_mode)
567 {
568    switch (addressing_mode)
569    {
570    default:
571    case SAMPLER_ADDRESSING_MODE_NONE:
572    case SAMPLER_ADDRESSING_MODE_CLAMP:
573       // Since OpenCL's only border color is 0's and D3D specs out-of-bounds loads to return 0, don't apply any wrap mode
574       return (enum dxil_tex_wrap)-1;
575    case SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE: return DXIL_TEX_WRAP_CLAMP_TO_EDGE;
576    case SAMPLER_ADDRESSING_MODE_REPEAT: return DXIL_TEX_WRAP_REPEAT;
577    case SAMPLER_ADDRESSING_MODE_REPEAT_MIRRORED: return DXIL_TEX_WRAP_MIRROR_REPEAT;
578    }
579 }
580 
shader_has_double(nir_shader * nir)581 static bool shader_has_double(nir_shader *nir)
582 {
583    foreach_list_typed(nir_function, func, node, &nir->functions) {
584       if (!func->is_entrypoint)
585          continue;
586 
587       assert(func->impl);
588 
589       nir_foreach_block(block, func->impl) {
590          nir_foreach_instr_safe(instr, block) {
591             if (instr->type != nir_instr_type_alu)
592                continue;
593 
594              nir_alu_instr *alu = nir_instr_as_alu(instr);
595              const nir_op_info *info = &nir_op_infos[alu->op];
596 
597              if (info->output_type & nir_type_float &&
598                  alu->def.bit_size == 64)
599                  return true;
600          }
601       }
602    }
603 
604    return false;
605 }
606 
607 struct clc_libclc {
608    const nir_shader *libclc_nir;
609 };
610 
611 struct clc_libclc *
clc_libclc_new(const struct clc_logger * logger,const struct clc_libclc_options * options)612 clc_libclc_new(const struct clc_logger *logger, const struct clc_libclc_options *options)
613 {
614    struct clc_libclc *ctx = rzalloc(NULL, struct clc_libclc);
615    if (!ctx) {
616       clc_error(logger, "D3D12: failed to allocate a clc_libclc");
617       return NULL;
618    }
619 
620    const struct spirv_capabilities libclc_spirv_caps = {
621       .Addresses = true,
622       .Float64 = true,
623       .Int8 = true,
624       .Int16 = true,
625       .Int64 = true,
626       .Kernel = true,
627       .Linkage = true,
628    };
629    const struct spirv_to_nir_options libclc_spirv_options = {
630       .environment = NIR_SPIRV_OPENCL,
631       .create_library = true,
632       .constant_addr_format = nir_address_format_32bit_index_offset_pack64,
633       .global_addr_format = nir_address_format_32bit_index_offset_pack64,
634       .shared_addr_format = nir_address_format_32bit_offset_as_64bit,
635       .temp_addr_format = nir_address_format_32bit_offset_as_64bit,
636       .float_controls_execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32,
637       .capabilities = &libclc_spirv_caps,
638    };
639 
640    glsl_type_singleton_init_or_ref();
641    bool optimize = options && options->optimize;
642    nir_shader *s =
643       nir_load_libclc_shader(64, NULL, &libclc_spirv_options, options->nir_options, optimize);
644    if (!s) {
645       clc_error(logger, "D3D12: spirv_to_nir failed on libclc blob");
646       ralloc_free(ctx);
647       return NULL;
648    }
649 
650    ralloc_steal(ctx, s);
651    ctx->libclc_nir = s;
652 
653    return ctx;
654 }
655 
clc_free_libclc(struct clc_libclc * ctx)656 void clc_free_libclc(struct clc_libclc *ctx)
657 {
658    ralloc_free(ctx);
659    glsl_type_singleton_decref();
660 }
661 
clc_libclc_get_clc_shader(struct clc_libclc * ctx)662 const nir_shader *clc_libclc_get_clc_shader(struct clc_libclc *ctx)
663 {
664    return ctx->libclc_nir;
665 }
666 
clc_libclc_serialize(struct clc_libclc * context,void ** serialized,size_t * serialized_size)667 void clc_libclc_serialize(struct clc_libclc *context,
668                            void **serialized,
669                            size_t *serialized_size)
670 {
671    struct blob tmp;
672    blob_init(&tmp);
673    nir_serialize(&tmp, context->libclc_nir, true);
674 
675    blob_finish_get_buffer(&tmp, serialized, serialized_size);
676 }
677 
clc_libclc_free_serialized(void * serialized)678 void clc_libclc_free_serialized(void *serialized)
679 {
680    free(serialized);
681 }
682 
683 struct clc_libclc *
clc_libclc_deserialize(const void * serialized,size_t serialized_size)684 clc_libclc_deserialize(const void *serialized, size_t serialized_size)
685 {
686    struct clc_libclc *ctx = rzalloc(NULL, struct clc_libclc);
687    if (!ctx) {
688       return NULL;
689    }
690 
691    glsl_type_singleton_init_or_ref();
692 
693    struct blob_reader tmp;
694    blob_reader_init(&tmp, serialized, serialized_size);
695 
696    nir_shader *s = nir_deserialize(NULL, NULL, &tmp);
697    if (!s) {
698       ralloc_free(ctx);
699       return NULL;
700    }
701 
702    ralloc_steal(ctx, s);
703    ctx->libclc_nir = s;
704 
705    return ctx;
706 }
707 
708 struct clc_libclc *
clc_libclc_new_dxil(const struct clc_logger * logger,const struct clc_libclc_dxil_options * options)709 clc_libclc_new_dxil(const struct clc_logger *logger,
710                     const struct clc_libclc_dxil_options *options)
711 {
712    struct clc_libclc_options clc_options = {
713       .optimize = options->optimize,
714       .nir_options = dxil_get_base_nir_compiler_options(),
715    };
716 
717    return clc_libclc_new(logger, &clc_options);
718 }
719 
720 bool
clc_spirv_to_dxil(struct clc_libclc * lib,const struct clc_binary * linked_spirv,const struct clc_parsed_spirv * parsed_data,const char * entrypoint,const struct clc_runtime_kernel_conf * conf,const struct clc_spirv_specialization_consts * consts,const struct clc_logger * logger,struct clc_dxil_object * out_dxil)721 clc_spirv_to_dxil(struct clc_libclc *lib,
722                   const struct clc_binary *linked_spirv,
723                   const struct clc_parsed_spirv *parsed_data,
724                   const char *entrypoint,
725                   const struct clc_runtime_kernel_conf *conf,
726                   const struct clc_spirv_specialization_consts *consts,
727                   const struct clc_logger *logger,
728                   struct clc_dxil_object *out_dxil)
729 {
730    struct nir_shader *nir;
731 
732    for (unsigned i = 0; i < parsed_data->num_kernels; i++) {
733       if (!strcmp(parsed_data->kernels[i].name, entrypoint)) {
734          out_dxil->kernel = &parsed_data->kernels[i];
735          break;
736       }
737    }
738 
739    if (!out_dxil->kernel) {
740       clc_error(logger, "no '%s' kernel found", entrypoint);
741       return false;
742    }
743 
744    const struct spirv_capabilities libclc_spirv_caps = {
745       .Addresses = true,
746       .Float64 = true,
747       .Int8 = true,
748       .Int16 = true,
749       .Int64 = true,
750       .Kernel = true,
751       .ImageBasic = true,
752       .ImageReadWrite = true,
753       .LiteralSampler = true,
754 
755       // These aren't fully supported, but silence warnings about them from
756       // code that doesn't really use them.
757       .Linkage = true,
758       .GenericPointer = true,
759    };
760    const struct spirv_to_nir_options spirv_options = {
761       .environment = NIR_SPIRV_OPENCL,
762       .clc_shader = clc_libclc_get_clc_shader(lib),
763       .constant_addr_format = nir_address_format_32bit_index_offset_pack64,
764       .global_addr_format = nir_address_format_32bit_index_offset_pack64,
765       .shared_addr_format = nir_address_format_32bit_offset_as_64bit,
766       .temp_addr_format = nir_address_format_32bit_offset_as_64bit,
767       .float_controls_execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32,
768       .printf = true,
769       .capabilities = &libclc_spirv_caps,
770    };
771    unsigned supported_int_sizes = (16 | 32 | 64);
772    unsigned supported_float_sizes = (16 | 32);
773    if (conf) {
774       supported_int_sizes &= ~conf->lower_bit_size;
775       supported_float_sizes &= ~conf->lower_bit_size;
776    }
777    nir_shader_compiler_options nir_options;
778    dxil_get_nir_compiler_options(&nir_options,
779                                  conf ? conf->max_shader_model : SHADER_MODEL_6_2,
780                                  supported_int_sizes,
781                                  supported_float_sizes);
782 
783    glsl_type_singleton_init_or_ref();
784 
785    nir = spirv_to_nir(linked_spirv->data, linked_spirv->size / 4,
786                       consts ? (struct nir_spirv_specialization *)consts->specializations : NULL,
787                       consts ? consts->num_specializations : 0,
788                       MESA_SHADER_KERNEL, entrypoint,
789                       &spirv_options,
790                       &nir_options);
791    if (!nir) {
792       clc_error(logger, "spirv_to_nir() failed");
793       goto err_free_dxil;
794    }
795    nir->info.workgroup_size_variable = true;
796 
797    NIR_PASS_V(nir, nir_lower_goto_ifs);
798    NIR_PASS_V(nir, nir_opt_dead_cf);
799 
800    struct clc_dxil_metadata *metadata = &out_dxil->metadata;
801 
802    metadata->args = calloc(out_dxil->kernel->num_args,
803                            sizeof(*metadata->args));
804    if (!metadata->args) {
805       clc_error(logger, "failed to allocate arg positions");
806       goto err_free_dxil;
807    }
808 
809    {
810       bool progress;
811       do
812       {
813          progress = false;
814          NIR_PASS(progress, nir, nir_copy_prop);
815          NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
816          NIR_PASS(progress, nir, nir_opt_deref);
817          NIR_PASS(progress, nir, nir_opt_dce);
818          NIR_PASS(progress, nir, nir_opt_undef);
819          NIR_PASS(progress, nir, nir_opt_constant_folding);
820          NIR_PASS(progress, nir, nir_opt_cse);
821          NIR_PASS(progress, nir, nir_split_var_copies);
822          NIR_PASS(progress, nir, nir_lower_var_copies);
823          NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
824          NIR_PASS(progress, nir, nir_opt_algebraic);
825       } while (progress);
826    }
827 
828    // Inline all functions first.
829    // according to the comment on nir_inline_functions
830    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
831    NIR_PASS_V(nir, nir_lower_returns);
832    NIR_PASS_V(nir, nir_link_shader_functions, clc_libclc_get_clc_shader(lib));
833    NIR_PASS_V(nir, nir_inline_functions);
834 
835    // Pick off the single entrypoint that we want.
836    nir_remove_non_entrypoints(nir);
837 
838    {
839       bool progress;
840       do
841       {
842          progress = false;
843          NIR_PASS(progress, nir, nir_copy_prop);
844          NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
845          NIR_PASS(progress, nir, nir_opt_deref);
846          NIR_PASS(progress, nir, nir_opt_dce);
847          NIR_PASS(progress, nir, nir_opt_undef);
848          NIR_PASS(progress, nir, nir_opt_constant_folding);
849          NIR_PASS(progress, nir, nir_opt_cse);
850          NIR_PASS(progress, nir, nir_split_var_copies);
851          NIR_PASS(progress, nir, nir_lower_var_copies);
852          NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
853          NIR_PASS(progress, nir, nir_opt_algebraic);
854          NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
855          NIR_PASS(progress, nir, nir_opt_dead_cf);
856          NIR_PASS(progress, nir, nir_opt_remove_phis);
857          NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
858          NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);
859          NIR_PASS(progress, nir, nir_opt_memcpy);
860       } while (progress);
861    }
862 
863    NIR_PASS_V(nir, nir_scale_fdiv);
864 
865    /* 128 is the minimum value for CL_DEVICE_MAX_READ_IMAGE_ARGS and used by CLOn12 */
866    dxil_wrap_sampler_state int_sampler_states[128] = { {{0}} };
867    unsigned sampler_id = 0;
868 
869    NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_function_temp | nir_var_shader_temp));
870 
871    // Ensure the printf struct has explicit types, but we'll throw away the scratch size, because we haven't
872    // necessarily removed all temp variables (e.g. the printf struct itself) at this point, so we'll rerun this later
873    assert(nir->scratch_size == 0);
874    NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);
875 
876    nir_lower_printf_options printf_options = {
877       .max_buffer_size = 1024 * 1024
878    };
879    NIR_PASS_V(nir, nir_lower_printf, &printf_options);
880 
881    metadata->printf.info_count = nir->printf_info_count;
882    metadata->printf.infos = calloc(nir->printf_info_count, sizeof(struct clc_printf_info));
883    for (unsigned i = 0; i < nir->printf_info_count; i++) {
884       metadata->printf.infos[i].str = malloc(nir->printf_info[i].string_size);
885       memcpy(metadata->printf.infos[i].str, nir->printf_info[i].strings, nir->printf_info[i].string_size);
886       metadata->printf.infos[i].num_args = nir->printf_info[i].num_args;
887       metadata->printf.infos[i].arg_sizes = malloc(nir->printf_info[i].num_args * sizeof(unsigned));
888       memcpy(metadata->printf.infos[i].arg_sizes, nir->printf_info[i].arg_sizes, nir->printf_info[i].num_args * sizeof(unsigned));
889    }
890 
891    // For uniforms (kernel inputs, minus images), run this before adjusting variable list via image/sampler lowering
892    NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_uniform, glsl_get_cl_type_size_align);
893 
894    // Calculate input offsets/metadata.
895    unsigned uav_id = 0;
896    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
897       int i = var->data.location;
898       if (i < 0)
899          continue;
900 
901       unsigned size = glsl_get_cl_size(var->type);
902 
903       metadata->args[i].offset = var->data.driver_location;
904       metadata->args[i].size = size;
905       metadata->kernel_inputs_buf_size = MAX2(metadata->kernel_inputs_buf_size,
906          var->data.driver_location + size);
907       if (out_dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL ||
908           out_dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_CONSTANT) {
909          metadata->args[i].globconstptr.buf_id = var->data.binding = uav_id++;
910       } else if (glsl_type_is_sampler(var->type)) {
911          unsigned address_mode = conf ? conf->args[i].sampler.addressing_mode : 0u;
912          int_sampler_states[sampler_id].wrap[0] =
913             int_sampler_states[sampler_id].wrap[1] =
914             int_sampler_states[sampler_id].wrap[2] = wrap_from_cl_addressing(address_mode);
915          int_sampler_states[sampler_id].is_nonnormalized_coords =
916             conf ? !conf->args[i].sampler.normalized_coords : 0;
917          int_sampler_states[sampler_id].is_linear_filtering =
918             conf ? conf->args[i].sampler.linear_filtering : 0;
919          metadata->args[i].sampler.sampler_id = var->data.binding = sampler_id++;
920       }
921    }
922 
923    unsigned num_global_inputs = uav_id;
924 
925    // Before removing dead uniforms, dedupe inline samplers to make more dead uniforms
926    NIR_PASS_V(nir, nir_dedup_inline_samplers);
927    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
928               nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
929 
930    nir->scratch_size = 0;
931    NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
932               nir_var_mem_shared | nir_var_function_temp | nir_var_mem_global | nir_var_mem_constant,
933               glsl_get_cl_type_size_align);
934 
935    // Lower memcpy - needs to wait until types are sized
936    {
937       bool progress;
938       do {
939          progress = false;
940          NIR_PASS(progress, nir, nir_opt_memcpy);
941          NIR_PASS(progress, nir, nir_copy_prop);
942          NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
943          NIR_PASS(progress, nir, nir_opt_deref);
944          NIR_PASS(progress, nir, nir_opt_dce);
945          NIR_PASS(progress, nir, nir_split_var_copies);
946          NIR_PASS(progress, nir, nir_lower_var_copies);
947          NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
948          NIR_PASS(progress, nir, nir_opt_constant_folding);
949          NIR_PASS(progress, nir, nir_opt_cse);
950       } while (progress);
951    }
952    NIR_PASS_V(nir, nir_lower_memcpy);
953 
954    NIR_PASS_V(nir, clc_nir_lower_global_pointers_to_constants);
955 
956    // Attempt to preserve derefs to constants by moving them to shader_temp
957    NIR_PASS_V(nir, dxil_nir_lower_constant_to_temp);
958    // While inserting new var derefs for our "logical" addressing mode, temporarily
959    // switch the pointer size to 32-bit.
960    nir->info.cs.ptr_size = 32;
961    NIR_PASS_V(nir, nir_split_struct_vars, nir_var_shader_temp);
962    NIR_PASS_V(nir, dxil_nir_flatten_var_arrays, nir_var_shader_temp);
963    NIR_PASS_V(nir, dxil_nir_lower_var_bit_size, nir_var_shader_temp,
964               (supported_int_sizes & 16) ? 16 : 32, (supported_int_sizes & 64) ? 64 : 32);
965    nir->info.cs.ptr_size = 64;
966 
967    NIR_PASS_V(nir, clc_lower_constant_to_ssbo, out_dxil->kernel, &uav_id);
968    NIR_PASS_V(nir, clc_change_variable_mode, nir_var_shader_temp, nir_var_mem_constant);
969    NIR_PASS_V(nir, clc_change_variable_mode, nir_var_mem_global, nir_var_mem_ssbo);
970 
971    bool has_printf = false;
972    NIR_PASS(has_printf, nir, clc_lower_printf_base, uav_id);
973    metadata->printf.uav_id = has_printf ? uav_id++ : -1;
974 
975    NIR_PASS_V(nir, dxil_nir_lower_deref_ssbo);
976 
977    NIR_PASS_V(nir, dxil_nir_split_unaligned_loads_stores, nir_var_mem_shared | nir_var_function_temp);
978 
979    // Second pass over inputs to calculate image bindings
980    unsigned srv_id = 0;
981    nir_foreach_image_variable(var, nir) {
982       int i = var->data.location;
983       if (i < 0)
984          continue;
985 
986       assert(glsl_type_is_image(var->type));
987 
988       if (var->data.access == ACCESS_NON_WRITEABLE) {
989          metadata->args[i].image.buf_ids[0] = srv_id++;
990       } else {
991          // Write or read-write are UAVs
992          metadata->args[i].image.buf_ids[0] = uav_id++;
993       }
994 
995       metadata->args[i].image.num_buf_ids = 0;
996       var->data.binding = metadata->args[i].image.buf_ids[0];
997 
998       // Assign location that'll be used for uniforms for format/order
999       var->data.driver_location = metadata->kernel_inputs_buf_size;
1000       metadata->args[i].offset = metadata->kernel_inputs_buf_size;
1001       metadata->args[i].size = 8;
1002       metadata->kernel_inputs_buf_size += metadata->args[i].size;
1003    }
1004 
1005    // Fill out inline sampler metadata, now that they've been deduped and dead ones removed
1006    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
1007       if (glsl_type_is_sampler(var->type) && var->data.sampler.is_inline_sampler) {
1008          int_sampler_states[sampler_id].wrap[0] =
1009             int_sampler_states[sampler_id].wrap[1] =
1010             int_sampler_states[sampler_id].wrap[2] =
1011             wrap_from_cl_addressing(var->data.sampler.addressing_mode);
1012          int_sampler_states[sampler_id].is_nonnormalized_coords =
1013             !var->data.sampler.normalized_coordinates;
1014          int_sampler_states[sampler_id].is_linear_filtering =
1015             var->data.sampler.filter_mode == SAMPLER_FILTER_MODE_LINEAR;
1016          var->data.binding = sampler_id++;
1017 
1018          assert(metadata->num_const_samplers < CLC_MAX_SAMPLERS);
1019          metadata->const_samplers[metadata->num_const_samplers].sampler_id = var->data.binding;
1020          metadata->const_samplers[metadata->num_const_samplers].addressing_mode = var->data.sampler.addressing_mode;
1021          metadata->const_samplers[metadata->num_const_samplers].normalized_coords = var->data.sampler.normalized_coordinates;
1022          metadata->const_samplers[metadata->num_const_samplers].filter_mode = var->data.sampler.filter_mode;
1023          metadata->num_const_samplers++;
1024       }
1025    }
1026 
1027    // Needs to come before lower_explicit_io
1028    NIR_PASS_V(nir, nir_lower_readonly_images_to_tex, false);
1029    struct clc_image_lower_context image_lower_context = { metadata, &srv_id, &uav_id };
1030    NIR_PASS_V(nir, clc_lower_images, &image_lower_context);
1031    NIR_PASS_V(nir, clc_lower_nonnormalized_samplers, int_sampler_states);
1032    NIR_PASS_V(nir, nir_lower_samplers);
1033    NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
1034               sampler_id, int_sampler_states, NULL, 14.0f);
1035 
1036    assert(nir->info.cs.ptr_size == 64);
1037    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
1038               nir_address_format_32bit_index_offset_pack64);
1039    NIR_PASS_V(nir, nir_lower_explicit_io,
1040               nir_var_mem_shared | nir_var_function_temp | nir_var_uniform,
1041               nir_address_format_32bit_offset_as_64bit);
1042 
1043    NIR_PASS_V(nir, nir_lower_system_values);
1044 
1045    nir_lower_compute_system_values_options compute_options = {
1046       .has_base_global_invocation_id = (conf && conf->support_global_work_id_offsets),
1047       .has_base_workgroup_id = (conf && conf->support_workgroup_id_offsets),
1048    };
1049    NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_options);
1050 
1051    NIR_PASS_V(nir, clc_lower_64bit_semantics);
1052 
1053    NIR_PASS_V(nir, nir_opt_deref);
1054    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1055 
1056    unsigned cbv_id = 0;
1057 
1058    nir_variable *inputs_var =
1059       add_kernel_inputs_var(out_dxil, nir, &cbv_id);
1060    nir_variable *work_properties_var =
1061       add_work_properties_var(out_dxil, nir, &cbv_id);
1062 
1063    memcpy(metadata->local_size, nir->info.workgroup_size,
1064           sizeof(metadata->local_size));
1065    memcpy(metadata->local_size_hint, nir->info.cs.workgroup_size_hint,
1066           sizeof(metadata->local_size));
1067 
1068    // Patch the localsize before calling clc_nir_lower_system_values().
1069    if (conf) {
1070       for (unsigned i = 0; i < ARRAY_SIZE(nir->info.workgroup_size); i++) {
1071          if (!conf->local_size[i] ||
1072              conf->local_size[i] == nir->info.workgroup_size[i])
1073             continue;
1074 
1075          if (nir->info.workgroup_size[i] &&
1076              nir->info.workgroup_size[i] != conf->local_size[i]) {
1077             debug_printf("D3D12: runtime local size does not match reqd_work_group_size() values\n");
1078             goto err_free_dxil;
1079          }
1080 
1081          nir->info.workgroup_size[i] = conf->local_size[i];
1082       }
1083       memcpy(metadata->local_size, nir->info.workgroup_size,
1084             sizeof(metadata->local_size));
1085    } else {
1086       /* Make sure there's at least one thread that's set to run */
1087       for (unsigned i = 0; i < ARRAY_SIZE(nir->info.workgroup_size); i++) {
1088          if (nir->info.workgroup_size[i] == 0)
1089             nir->info.workgroup_size[i] = 1;
1090       }
1091    }
1092 
1093    NIR_PASS_V(nir, clc_nir_lower_kernel_input_loads, inputs_var);
1094    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
1095               nir_address_format_32bit_index_offset);
1096    NIR_PASS_V(nir, clc_nir_lower_system_values, work_properties_var);
1097    const struct dxil_nir_lower_loads_stores_options loads_stores_options = {
1098       .use_16bit_ssbo = false,
1099    };
1100 
1101    /* Now that function-declared local vars have been sized, append args */
1102    for (unsigned i = 0; i < out_dxil->kernel->num_args; i++) {
1103       if (out_dxil->kernel->args[i].address_qualifier != CLC_KERNEL_ARG_ADDRESS_LOCAL)
1104          continue;
1105 
1106       /* If we don't have the runtime conf yet, we just create a dummy variable.
1107        * This will be adjusted when clc_spirv_to_dxil() is called with a conf
1108        * argument.
1109        */
1110       unsigned size = 4;
1111       if (conf && conf->args)
1112          size = conf->args[i].localptr.size;
1113 
1114       /* The alignment required for the pointee type is not easy to get from
1115        * here, so let's base our logic on the size itself. Anything bigger than
1116        * the maximum alignment constraint (which is 128 bytes, since ulong16 or
1117        * doubl16 size are the biggest base types) should be aligned on this
1118        * maximum alignment constraint. For smaller types, we use the size
1119        * itself to calculate the alignment.
1120        */
1121       unsigned alignment = size < 128 ? (1 << (ffs(size) - 1)) : 128;
1122 
1123       nir->info.shared_size = align(nir->info.shared_size, alignment);
1124       metadata->args[i].localptr.sharedmem_offset = nir->info.shared_size;
1125       nir->info.shared_size += size;
1126    }
1127 
1128    NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil, &loads_stores_options);
1129    NIR_PASS_V(nir, dxil_nir_opt_alu_deref_srcs);
1130    NIR_PASS_V(nir, nir_lower_fp16_casts, nir_lower_fp16_all);
1131    NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);
1132 
1133    // Convert pack to pack_split
1134    NIR_PASS_V(nir, nir_lower_pack);
1135    // Lower pack_split to bit math
1136    NIR_PASS_V(nir, nir_opt_algebraic);
1137 
1138    NIR_PASS_V(nir, nir_opt_dce);
1139 
1140    nir_validate_shader(nir, "Validate before feeding NIR to the DXIL compiler");
1141    struct nir_to_dxil_options opts = {
1142       .interpolate_at_vertex = false,
1143       .lower_int16 = (conf && (conf->lower_bit_size & 16) != 0),
1144       .disable_math_refactoring = true,
1145       .num_kernel_globals = num_global_inputs,
1146       .environment = DXIL_ENVIRONMENT_CL,
1147       .shader_model_max = conf && conf->max_shader_model ? conf->max_shader_model : SHADER_MODEL_6_2,
1148       .validator_version_max = conf ? conf->validator_version : DXIL_VALIDATOR_1_4,
1149    };
1150 
1151    metadata->local_mem_size = nir->info.shared_size;
1152    metadata->priv_mem_size = nir->scratch_size;
1153 
1154    /* DXIL double math is too limited compared to what NIR expects. Let's refuse
1155     * to compile a shader when it contains double operations until we have
1156     * double lowering hooked up.
1157     */
1158    if (shader_has_double(nir)) {
1159       clc_error(logger, "NIR shader contains doubles, which we don't support yet");
1160       goto err_free_dxil;
1161    }
1162 
1163    struct dxil_logger dxil_logger = { .priv = logger ? logger->priv : NULL,
1164                                      .log = logger ? logger->error : NULL};
1165 
1166    struct blob tmp;
1167    if (!nir_to_dxil(nir, &opts, logger ? &dxil_logger : NULL, &tmp)) {
1168       debug_printf("D3D12: nir_to_dxil failed\n");
1169       goto err_free_dxil;
1170    }
1171 
1172    nir_foreach_variable_with_modes(var, nir, nir_var_mem_ssbo) {
1173       if (var->constant_initializer) {
1174          if (glsl_type_is_array(var->type)) {
1175             int size = align(glsl_get_cl_size(var->type), 4);
1176             uint8_t *data = malloc(size);
1177             if (!data)
1178                goto err_free_dxil;
1179 
1180             copy_const_initializer(var->constant_initializer, var->type, data);
1181             metadata->consts[metadata->num_consts].data = data;
1182             metadata->consts[metadata->num_consts].size = size;
1183             metadata->consts[metadata->num_consts].uav_id = var->data.binding;
1184             metadata->num_consts++;
1185          } else
1186             unreachable("unexpected constant initializer");
1187       }
1188    }
1189 
1190    metadata->kernel_inputs_cbv_id = inputs_var ? inputs_var->data.binding : 0;
1191    metadata->work_properties_cbv_id = work_properties_var->data.binding;
1192    metadata->num_uavs = uav_id;
1193    metadata->num_srvs = srv_id;
1194    metadata->num_samplers = sampler_id;
1195 
1196    ralloc_free(nir);
1197    glsl_type_singleton_decref();
1198 
1199    blob_finish_get_buffer(&tmp, &out_dxil->binary.data,
1200                           &out_dxil->binary.size);
1201    return true;
1202 
1203 err_free_dxil:
1204    clc_free_dxil_object(out_dxil);
1205    return false;
1206 }
1207 
clc_free_dxil_object(struct clc_dxil_object * dxil)1208 void clc_free_dxil_object(struct clc_dxil_object *dxil)
1209 {
1210    for (unsigned i = 0; i < dxil->metadata.num_consts; i++)
1211       free(dxil->metadata.consts[i].data);
1212 
1213    for (unsigned i = 0; i < dxil->metadata.printf.info_count; i++) {
1214       free(dxil->metadata.printf.infos[i].arg_sizes);
1215       free(dxil->metadata.printf.infos[i].str);
1216    }
1217    free(dxil->metadata.printf.infos);
1218 
1219    free(dxil->binary.data);
1220 }
1221 
clc_compiler_get_version(void)1222 uint64_t clc_compiler_get_version(void)
1223 {
1224    const char sha1[] = MESA_GIT_SHA1;
1225    const char* dash = strchr(sha1, '-');
1226    if (dash) {
1227       return strtoull(dash + 1, NULL, 16);
1228    }
1229    return 0;
1230 }
1231