xref: /aosp_15_r20/external/mesa3d/src/microsoft/compiler/nir_to_dxil.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir_to_dxil.h"
25 
26 #include "dxil_container.h"
27 #include "dxil_dump.h"
28 #include "dxil_enums.h"
29 #include "dxil_function.h"
30 #include "dxil_module.h"
31 #include "dxil_nir.h"
32 #include "dxil_signature.h"
33 
34 #include "nir/nir_builder.h"
35 #include "nir_deref.h"
36 #include "util/ralloc.h"
37 #include "util/u_debug.h"
38 #include "util/u_dynarray.h"
39 #include "util/u_math.h"
40 
41 #include "git_sha1.h"
42 
43 #include "vulkan/vulkan_core.h"
44 
45 #include <stdint.h>
46 
47 int debug_dxil = 0;
48 
49 static const struct debug_named_value
50 dxil_debug_options[] = {
51    { "verbose", DXIL_DEBUG_VERBOSE, NULL },
52    { "dump_blob",  DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" },
53    { "trace",  DXIL_DEBUG_TRACE , "Trace instruction conversion" },
54    { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"},
55    DEBUG_NAMED_VALUE_END
56 };
57 
58 DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0)
59 
60 static void
log_nir_instr_unsupported(const struct dxil_logger * logger,const char * message_prefix,const nir_instr * instr)61 log_nir_instr_unsupported(const struct dxil_logger *logger,
62                           const char *message_prefix, const nir_instr *instr)
63 {
64    char *msg = NULL;
65    char *instr_str = nir_instr_as_str(instr, NULL);
66    asprintf(&msg, "%s: %s\n", message_prefix, instr_str);
67    ralloc_free(instr_str);
68    assert(msg);
69    logger->log(logger->priv, msg);
70    free(msg);
71 }
72 
73 static void
default_logger_func(void * priv,const char * msg)74 default_logger_func(void *priv, const char *msg)
75 {
76    fprintf(stderr, "%s", msg);
77    unreachable("Unhandled error");
78 }
79 
80 static const struct dxil_logger default_logger = { .priv = NULL, .log = default_logger_func };
81 
82 #define TRACE_CONVERSION(instr) \
83    if (debug_dxil & DXIL_DEBUG_TRACE) \
84       do { \
85          fprintf(stderr, "Convert '"); \
86          nir_print_instr(instr, stderr); \
87          fprintf(stderr, "'\n"); \
88       } while (0)
89 
90 static const nir_shader_compiler_options
91 nir_options = {
92    .compact_arrays = true,
93    .lower_ineg = true,
94    .lower_fneg = true,
95    .lower_ffma16 = true,
96    .lower_ffma32 = true,
97    .lower_isign = true,
98    .lower_fsign = true,
99    .lower_iabs = true,
100    .lower_fmod = true,
101    .lower_fpow = true,
102    .lower_scmp = true,
103    .lower_ldexp = true,
104    .lower_flrp16 = true,
105    .lower_flrp32 = true,
106    .lower_flrp64 = true,
107    .lower_bitfield_extract = true,
108    .lower_ifind_msb = true,
109    .lower_ufind_msb = true,
110    .lower_extract_word = true,
111    .lower_extract_byte = true,
112    .lower_insert_word = true,
113    .lower_insert_byte = true,
114    .lower_all_io_to_elements = true,
115    .lower_hadd = true,
116    .lower_uadd_sat = true,
117    .lower_usub_sat = true,
118    .lower_iadd_sat = true,
119    .lower_uadd_carry = true,
120    .lower_usub_borrow = true,
121    .lower_mul_high = true,
122    .lower_pack_half_2x16 = true,
123    .lower_pack_unorm_4x8 = true,
124    .lower_pack_snorm_4x8 = true,
125    .lower_pack_snorm_2x16 = true,
126    .lower_pack_unorm_2x16 = true,
127    .lower_pack_64_2x32_split = true,
128    .lower_pack_32_2x16_split = true,
129    .lower_pack_64_4x16 = true,
130    .lower_unpack_64_2x32_split = true,
131    .lower_unpack_32_2x16_split = true,
132    .lower_unpack_half_2x16 = true,
133    .lower_unpack_snorm_2x16 = true,
134    .lower_unpack_snorm_4x8 = true,
135    .lower_unpack_unorm_2x16 = true,
136    .lower_unpack_unorm_4x8 = true,
137    .lower_interpolate_at = true,
138    .has_fsub = true,
139    .has_isub = true,
140    .has_bfe = true,
141    .has_find_msb_rev = true,
142    .vertex_id_zero_based = true,
143    .lower_base_vertex = true,
144    .lower_helper_invocation = true,
145    .has_cs_global_id = true,
146    .lower_mul_2x32_64 = true,
147    .lower_doubles_options =
148       nir_lower_drcp |
149       nir_lower_dsqrt |
150       nir_lower_drsq |
151       nir_lower_dfract |
152       nir_lower_dtrunc |
153       nir_lower_dfloor |
154       nir_lower_dceil |
155       nir_lower_dround_even,
156    .lower_uniforms_to_ubo = true,
157    .max_unroll_iterations = 32, /* arbitrary */
158    .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out),
159    .lower_device_index_to_zero = true,
160    .linker_ignore_precision = true,
161    .support_16bit_alu = true,
162    .preserve_mediump = true,
163    .discard_is_demote = true,
164    .has_ddx_intrinsics = true,
165    .scalarize_ddx = true,
166 };
167 
168 const nir_shader_compiler_options*
dxil_get_base_nir_compiler_options(void)169 dxil_get_base_nir_compiler_options(void)
170 {
171    return &nir_options;
172 }
173 
174 void
dxil_get_nir_compiler_options(nir_shader_compiler_options * options,enum dxil_shader_model shader_model_max,unsigned supported_int_sizes,unsigned supported_float_sizes)175 dxil_get_nir_compiler_options(nir_shader_compiler_options *options,
176                               enum dxil_shader_model shader_model_max,
177                               unsigned supported_int_sizes,
178                               unsigned supported_float_sizes)
179 {
180    *options = nir_options;
181    if (!(supported_int_sizes & 64)) {
182       options->lower_pack_64_2x32_split = false;
183       options->lower_unpack_64_2x32_split = false;
184       options->lower_int64_options = ~0;
185    }
186    if (!(supported_float_sizes & 64))
187       options->lower_doubles_options = ~0;
188    if (shader_model_max >= SHADER_MODEL_6_4) {
189       options->has_sdot_4x8 = true;
190       options->has_udot_4x8 = true;
191    }
192 }
193 
194 static bool
emit_llvm_ident(struct dxil_module * m)195 emit_llvm_ident(struct dxil_module *m)
196 {
197    const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1);
198    if (!compiler)
199       return false;
200 
201    const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1);
202    return llvm_ident &&
203           dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1);
204 }
205 
206 static bool
emit_named_version(struct dxil_module * m,const char * name,int major,int minor)207 emit_named_version(struct dxil_module *m, const char *name,
208                    int major, int minor)
209 {
210    const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major);
211    const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor);
212    const struct dxil_mdnode *version_nodes[] = { major_node, minor_node };
213    const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes,
214                                                      ARRAY_SIZE(version_nodes));
215    return dxil_add_metadata_named_node(m, name, &version, 1);
216 }
217 
218 static const char *
get_shader_kind_str(enum dxil_shader_kind kind)219 get_shader_kind_str(enum dxil_shader_kind kind)
220 {
221    switch (kind) {
222    case DXIL_PIXEL_SHADER:
223       return "ps";
224    case DXIL_VERTEX_SHADER:
225       return "vs";
226    case DXIL_GEOMETRY_SHADER:
227       return "gs";
228    case DXIL_HULL_SHADER:
229       return "hs";
230    case DXIL_DOMAIN_SHADER:
231       return "ds";
232    case DXIL_COMPUTE_SHADER:
233       return "cs";
234    default:
235       unreachable("invalid shader kind");
236    }
237 }
238 
239 static bool
emit_dx_shader_model(struct dxil_module * m)240 emit_dx_shader_model(struct dxil_module *m)
241 {
242    const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind));
243    const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version);
244    const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version);
245    const struct dxil_mdnode *shader_model[] = { type_node, major_node,
246                                                 minor_node };
247    const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model));
248 
249    return dxil_add_metadata_named_node(m, "dx.shaderModel",
250                                        &dx_shader_model, 1);
251 }
252 
253 enum {
254    DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0,
255    DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1
256 };
257 
258 enum dxil_intr {
259    DXIL_INTR_LOAD_INPUT = 4,
260    DXIL_INTR_STORE_OUTPUT = 5,
261    DXIL_INTR_FABS = 6,
262    DXIL_INTR_SATURATE = 7,
263 
264    DXIL_INTR_ISFINITE = 10,
265    DXIL_INTR_ISNORMAL = 11,
266 
267    DXIL_INTR_FCOS = 12,
268    DXIL_INTR_FSIN = 13,
269 
270    DXIL_INTR_FEXP2 = 21,
271    DXIL_INTR_FRC = 22,
272    DXIL_INTR_FLOG2 = 23,
273 
274    DXIL_INTR_SQRT = 24,
275    DXIL_INTR_RSQRT = 25,
276    DXIL_INTR_ROUND_NE = 26,
277    DXIL_INTR_ROUND_NI = 27,
278    DXIL_INTR_ROUND_PI = 28,
279    DXIL_INTR_ROUND_Z = 29,
280 
281    DXIL_INTR_BFREV = 30,
282    DXIL_INTR_COUNTBITS = 31,
283    DXIL_INTR_FIRSTBIT_LO = 32,
284    DXIL_INTR_FIRSTBIT_HI = 33,
285    DXIL_INTR_FIRSTBIT_SHI = 34,
286 
287    DXIL_INTR_FMAX = 35,
288    DXIL_INTR_FMIN = 36,
289    DXIL_INTR_IMAX = 37,
290    DXIL_INTR_IMIN = 38,
291    DXIL_INTR_UMAX = 39,
292    DXIL_INTR_UMIN = 40,
293 
294    DXIL_INTR_FMA = 47,
295 
296    DXIL_INTR_IBFE = 51,
297    DXIL_INTR_UBFE = 52,
298    DXIL_INTR_BFI = 53,
299 
300    DXIL_INTR_CREATE_HANDLE = 57,
301    DXIL_INTR_CBUFFER_LOAD_LEGACY = 59,
302 
303    DXIL_INTR_SAMPLE = 60,
304    DXIL_INTR_SAMPLE_BIAS = 61,
305    DXIL_INTR_SAMPLE_LEVEL = 62,
306    DXIL_INTR_SAMPLE_GRAD = 63,
307    DXIL_INTR_SAMPLE_CMP = 64,
308    DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65,
309 
310    DXIL_INTR_TEXTURE_LOAD = 66,
311    DXIL_INTR_TEXTURE_STORE = 67,
312 
313    DXIL_INTR_BUFFER_LOAD = 68,
314    DXIL_INTR_BUFFER_STORE = 69,
315 
316    DXIL_INTR_TEXTURE_SIZE = 72,
317    DXIL_INTR_TEXTURE_GATHER = 73,
318    DXIL_INTR_TEXTURE_GATHER_CMP = 74,
319 
320    DXIL_INTR_TEXTURE2DMS_GET_SAMPLE_POSITION = 75,
321    DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION = 76,
322    DXIL_INTR_RENDER_TARGET_GET_SAMPLE_COUNT = 77,
323 
324    DXIL_INTR_ATOMIC_BINOP = 78,
325    DXIL_INTR_ATOMIC_CMPXCHG = 79,
326    DXIL_INTR_BARRIER = 80,
327    DXIL_INTR_TEXTURE_LOD = 81,
328 
329    DXIL_INTR_DISCARD = 82,
330    DXIL_INTR_DDX_COARSE = 83,
331    DXIL_INTR_DDY_COARSE = 84,
332    DXIL_INTR_DDX_FINE = 85,
333    DXIL_INTR_DDY_FINE = 86,
334 
335    DXIL_INTR_EVAL_SNAPPED = 87,
336    DXIL_INTR_EVAL_SAMPLE_INDEX = 88,
337    DXIL_INTR_EVAL_CENTROID = 89,
338 
339    DXIL_INTR_SAMPLE_INDEX = 90,
340    DXIL_INTR_COVERAGE = 91,
341 
342    DXIL_INTR_THREAD_ID = 93,
343    DXIL_INTR_GROUP_ID = 94,
344    DXIL_INTR_THREAD_ID_IN_GROUP = 95,
345    DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96,
346 
347    DXIL_INTR_EMIT_STREAM = 97,
348    DXIL_INTR_CUT_STREAM = 98,
349 
350    DXIL_INTR_GS_INSTANCE_ID = 100,
351 
352    DXIL_INTR_MAKE_DOUBLE = 101,
353    DXIL_INTR_SPLIT_DOUBLE = 102,
354 
355    DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT = 103,
356    DXIL_INTR_LOAD_PATCH_CONSTANT = 104,
357    DXIL_INTR_DOMAIN_LOCATION = 105,
358    DXIL_INTR_STORE_PATCH_CONSTANT = 106,
359    DXIL_INTR_OUTPUT_CONTROL_POINT_ID = 107,
360    DXIL_INTR_PRIMITIVE_ID = 108,
361 
362    DXIL_INTR_WAVE_IS_FIRST_LANE = 110,
363    DXIL_INTR_WAVE_GET_LANE_INDEX = 111,
364    DXIL_INTR_WAVE_GET_LANE_COUNT = 112,
365    DXIL_INTR_WAVE_ANY_TRUE = 113,
366    DXIL_INTR_WAVE_ALL_TRUE = 114,
367    DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL = 115,
368    DXIL_INTR_WAVE_ACTIVE_BALLOT = 116,
369    DXIL_INTR_WAVE_READ_LANE_AT = 117,
370    DXIL_INTR_WAVE_READ_LANE_FIRST = 118,
371    DXIL_INTR_WAVE_ACTIVE_OP = 119,
372    DXIL_INTR_WAVE_ACTIVE_BIT = 120,
373    DXIL_INTR_WAVE_PREFIX_OP = 121,
374    DXIL_INTR_QUAD_READ_LANE_AT = 122,
375    DXIL_INTR_QUAD_OP = 123,
376 
377    DXIL_INTR_LEGACY_F32TOF16 = 130,
378    DXIL_INTR_LEGACY_F16TOF32 = 131,
379 
380    DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137,
381    DXIL_INTR_VIEW_ID = 138,
382 
383    DXIL_INTR_RAW_BUFFER_LOAD = 139,
384    DXIL_INTR_RAW_BUFFER_STORE = 140,
385 
386    DXIL_INTR_DOT4_ADD_I8_PACKED = 163,
387    DXIL_INTR_DOT4_ADD_U8_PACKED = 164,
388 
389    DXIL_INTR_ANNOTATE_HANDLE = 216,
390    DXIL_INTR_CREATE_HANDLE_FROM_BINDING = 217,
391    DXIL_INTR_CREATE_HANDLE_FROM_HEAP = 218,
392 
393    DXIL_INTR_IS_HELPER_LANE = 221,
394    DXIL_INTR_SAMPLE_CMP_LEVEL = 224,
395    DXIL_INTR_SAMPLE_CMP_GRAD = 254,
396    DXIL_INTR_SAMPLE_CMP_BIAS = 255,
397 
398    DXIL_INTR_START_VERTEX_LOCATION = 256,
399    DXIL_INTR_START_INSTANCE_LOCATION = 257,
400 };
401 
402 enum dxil_atomic_op {
403    DXIL_ATOMIC_ADD = 0,
404    DXIL_ATOMIC_AND = 1,
405    DXIL_ATOMIC_OR = 2,
406    DXIL_ATOMIC_XOR = 3,
407    DXIL_ATOMIC_IMIN = 4,
408    DXIL_ATOMIC_IMAX = 5,
409    DXIL_ATOMIC_UMIN = 6,
410    DXIL_ATOMIC_UMAX = 7,
411    DXIL_ATOMIC_EXCHANGE = 8,
412 };
413 
414 static enum dxil_atomic_op
nir_atomic_to_dxil_atomic(nir_atomic_op op)415 nir_atomic_to_dxil_atomic(nir_atomic_op op)
416 {
417    switch (op) {
418    case nir_atomic_op_iadd: return DXIL_ATOMIC_ADD;
419    case nir_atomic_op_iand: return DXIL_ATOMIC_AND;
420    case nir_atomic_op_ior: return DXIL_ATOMIC_OR;
421    case nir_atomic_op_ixor: return DXIL_ATOMIC_XOR;
422    case nir_atomic_op_imin: return DXIL_ATOMIC_IMIN;
423    case nir_atomic_op_imax: return DXIL_ATOMIC_IMAX;
424    case nir_atomic_op_umin: return DXIL_ATOMIC_UMIN;
425    case nir_atomic_op_umax: return DXIL_ATOMIC_UMAX;
426    case nir_atomic_op_xchg: return DXIL_ATOMIC_EXCHANGE;
427    default: unreachable("Unsupported atomic op");
428    }
429 }
430 
431 static enum dxil_rmw_op
nir_atomic_to_dxil_rmw(nir_atomic_op op)432 nir_atomic_to_dxil_rmw(nir_atomic_op op)
433 {
434    switch (op) {
435    case nir_atomic_op_iadd: return DXIL_RMWOP_ADD;
436    case nir_atomic_op_iand: return DXIL_RMWOP_AND;
437    case nir_atomic_op_ior: return DXIL_RMWOP_OR;
438    case nir_atomic_op_ixor: return DXIL_RMWOP_XOR;
439    case nir_atomic_op_imin: return DXIL_RMWOP_MIN;
440    case nir_atomic_op_imax: return DXIL_RMWOP_MAX;
441    case nir_atomic_op_umin: return DXIL_RMWOP_UMIN;
442    case nir_atomic_op_umax: return DXIL_RMWOP_UMAX;
443    case nir_atomic_op_xchg: return DXIL_RMWOP_XCHG;
444    default: unreachable("Unsupported atomic op");
445    }
446 }
447 
448 typedef struct {
449    unsigned id;
450    unsigned binding;
451    unsigned size;
452    unsigned space;
453 } resource_array_layout;
454 
455 static void
fill_resource_metadata(struct dxil_module * m,const struct dxil_mdnode ** fields,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout)456 fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields,
457                        const struct dxil_type *struct_type,
458                        const char *name, const resource_array_layout *layout)
459 {
460    const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type);
461    const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type);
462 
463    fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID
464    fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol
465    fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name
466    fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID
467    fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound
468    fields[5] = dxil_get_metadata_int32(m, layout->size); // range size
469 }
470 
471 static const struct dxil_mdnode *
emit_srv_metadata(struct dxil_module * m,const struct dxil_type * elem_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind)472 emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type,
473                   const char *name, const resource_array_layout *layout,
474                   enum dxil_component_type comp_type,
475                   enum dxil_resource_kind res_kind)
476 {
477    const struct dxil_mdnode *fields[9];
478 
479    const struct dxil_mdnode *metadata_tag_nodes[2];
480 
481    fill_resource_metadata(m, fields, elem_type, name, layout);
482    fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
483    fields[7] = dxil_get_metadata_int1(m, 0); // sample count
484    if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
485        res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
486       metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
487       metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
488       fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
489    } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
490       fields[8] = NULL;
491    else
492       unreachable("Structured buffers not supported yet");
493 
494    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
495 }
496 
497 static const struct dxil_mdnode *
emit_uav_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind,enum gl_access_qualifier access)498 emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
499                   const char *name, const resource_array_layout *layout,
500                   enum dxil_component_type comp_type,
501                   enum dxil_resource_kind res_kind,
502                   enum gl_access_qualifier access)
503 {
504    const struct dxil_mdnode *fields[11];
505 
506    const struct dxil_mdnode *metadata_tag_nodes[2];
507 
508    fill_resource_metadata(m, fields, struct_type, name, layout);
509    fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
510    fields[7] = dxil_get_metadata_int1(m, (access & ACCESS_COHERENT) != 0); // globally-coherent
511    fields[8] = dxil_get_metadata_int1(m, false); // has counter
512    fields[9] = dxil_get_metadata_int1(m, false); // is ROV
513    if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
514        res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
515       metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
516       metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
517       fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
518    } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
519       fields[10] = NULL;
520    else
521       unreachable("Structured buffers not supported yet");
522 
523    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
524 }
525 
526 static const struct dxil_mdnode *
emit_cbv_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,unsigned size)527 emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
528                   const char *name, const resource_array_layout *layout,
529                   unsigned size)
530 {
531    const struct dxil_mdnode *fields[8];
532 
533    fill_resource_metadata(m, fields, struct_type, name, layout);
534    fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size
535    fields[7] = NULL; // metadata
536 
537    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
538 }
539 
540 static const struct dxil_mdnode *
emit_sampler_metadata(struct dxil_module * m,const struct dxil_type * struct_type,nir_variable * var,const resource_array_layout * layout)541 emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
542                       nir_variable *var, const resource_array_layout *layout)
543 {
544    const struct dxil_mdnode *fields[8];
545    const struct glsl_type *type = glsl_without_array(var->type);
546 
547    fill_resource_metadata(m, fields, struct_type, var->name, layout);
548    enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ?
549           DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT;
550    fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind
551    fields[7] = NULL; // metadata
552 
553    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
554 }
555 
556 
557 #define MAX_SRVS 128
558 #define MAX_UAVS 64
559 #define MAX_CBVS 64 // ??
560 #define MAX_SAMPLERS 64 // ??
561 
562 struct dxil_def {
563    const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS];
564 };
565 
566 struct ntd_context {
567    void *ralloc_ctx;
568    const struct nir_to_dxil_options *opts;
569    struct nir_shader *shader;
570 
571    struct dxil_module mod;
572 
573    struct util_dynarray srv_metadata_nodes;
574    const struct dxil_value *srv_handles[MAX_SRVS];
575 
576    struct util_dynarray uav_metadata_nodes;
577    const struct dxil_value *ssbo_handles[MAX_UAVS];
578    const struct dxil_value *image_handles[MAX_UAVS];
579    uint32_t num_uavs;
580 
581    struct util_dynarray cbv_metadata_nodes;
582    const struct dxil_value *cbv_handles[MAX_CBVS];
583 
584    struct util_dynarray sampler_metadata_nodes;
585    const struct dxil_value *sampler_handles[MAX_SAMPLERS];
586 
587    struct util_dynarray resources;
588 
589    const struct dxil_mdnode *shader_property_nodes[6];
590    size_t num_shader_property_nodes;
591 
592    struct dxil_def *defs;
593    unsigned num_defs;
594    struct hash_table *phis;
595 
596    const struct dxil_value **sharedvars;
597    const struct dxil_value **scratchvars;
598    const struct dxil_value **consts;
599 
600    nir_variable *system_value[SYSTEM_VALUE_MAX];
601 
602    nir_function *tess_ctrl_patch_constant_func;
603    unsigned tess_input_control_point_count;
604 
605    struct dxil_func_def *main_func_def;
606    struct dxil_func_def *tess_ctrl_patch_constant_func_def;
607    unsigned unnamed_ubo_count;
608 
609    BITSET_WORD *float_types;
610    BITSET_WORD *int_types;
611 
612    const struct dxil_logger *logger;
613 };
614 
615 static const char*
unary_func_name(enum dxil_intr intr)616 unary_func_name(enum dxil_intr intr)
617 {
618    switch (intr) {
619    case DXIL_INTR_COUNTBITS:
620    case DXIL_INTR_FIRSTBIT_HI:
621    case DXIL_INTR_FIRSTBIT_SHI:
622    case DXIL_INTR_FIRSTBIT_LO:
623       return "dx.op.unaryBits";
624    case DXIL_INTR_ISFINITE:
625    case DXIL_INTR_ISNORMAL:
626       return "dx.op.isSpecialFloat";
627    default:
628       return "dx.op.unary";
629    }
630 }
631 
632 static const struct dxil_value *
emit_unary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0)633 emit_unary_call(struct ntd_context *ctx, enum overload_type overload,
634                 enum dxil_intr intr,
635                 const struct dxil_value *op0)
636 {
637    const struct dxil_func *func = dxil_get_function(&ctx->mod,
638                                                     unary_func_name(intr),
639                                                     overload);
640    if (!func)
641       return NULL;
642 
643    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
644    if (!opcode)
645       return NULL;
646 
647    const struct dxil_value *args[] = {
648      opcode,
649      op0
650    };
651 
652    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
653 }
654 
655 static const struct dxil_value *
emit_binary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)656 emit_binary_call(struct ntd_context *ctx, enum overload_type overload,
657                  enum dxil_intr intr,
658                  const struct dxil_value *op0, const struct dxil_value *op1)
659 {
660    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload);
661    if (!func)
662       return NULL;
663 
664    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
665    if (!opcode)
666       return NULL;
667 
668    const struct dxil_value *args[] = {
669      opcode,
670      op0,
671      op1
672    };
673 
674    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
675 }
676 
677 static const struct dxil_value *
emit_tertiary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)678 emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload,
679                    enum dxil_intr intr,
680                    const struct dxil_value *op0,
681                    const struct dxil_value *op1,
682                    const struct dxil_value *op2)
683 {
684    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload);
685    if (!func)
686       return NULL;
687 
688    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
689    if (!opcode)
690       return NULL;
691 
692    const struct dxil_value *args[] = {
693      opcode,
694      op0,
695      op1,
696      op2
697    };
698 
699    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
700 }
701 
702 static const struct dxil_value *
emit_quaternary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2,const struct dxil_value * op3)703 emit_quaternary_call(struct ntd_context *ctx, enum overload_type overload,
704                      enum dxil_intr intr,
705                      const struct dxil_value *op0,
706                      const struct dxil_value *op1,
707                      const struct dxil_value *op2,
708                      const struct dxil_value *op3)
709 {
710    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quaternary", overload);
711    if (!func)
712       return NULL;
713 
714    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
715    if (!opcode)
716       return NULL;
717 
718    const struct dxil_value *args[] = {
719      opcode,
720      op0,
721      op1,
722      op2,
723      op3
724    };
725 
726    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
727 }
728 
729 static const struct dxil_value *
emit_threadid_call(struct ntd_context * ctx,const struct dxil_value * comp)730 emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp)
731 {
732    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32);
733    if (!func)
734       return NULL;
735 
736    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
737        DXIL_INTR_THREAD_ID);
738    if (!opcode)
739       return NULL;
740 
741    const struct dxil_value *args[] = {
742      opcode,
743      comp
744    };
745 
746    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
747 }
748 
749 static const struct dxil_value *
emit_threadidingroup_call(struct ntd_context * ctx,const struct dxil_value * comp)750 emit_threadidingroup_call(struct ntd_context *ctx,
751                           const struct dxil_value *comp)
752 {
753    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32);
754 
755    if (!func)
756       return NULL;
757 
758    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
759        DXIL_INTR_THREAD_ID_IN_GROUP);
760    if (!opcode)
761       return NULL;
762 
763    const struct dxil_value *args[] = {
764      opcode,
765      comp
766    };
767 
768    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
769 }
770 
771 static const struct dxil_value *
emit_flattenedthreadidingroup_call(struct ntd_context * ctx)772 emit_flattenedthreadidingroup_call(struct ntd_context *ctx)
773 {
774    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32);
775 
776    if (!func)
777       return NULL;
778 
779    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
780       DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP);
781    if (!opcode)
782       return NULL;
783 
784    const struct dxil_value *args[] = {
785      opcode
786    };
787 
788    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
789 }
790 
791 static const struct dxil_value *
emit_groupid_call(struct ntd_context * ctx,const struct dxil_value * comp)792 emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp)
793 {
794    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32);
795 
796    if (!func)
797       return NULL;
798 
799    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
800        DXIL_INTR_GROUP_ID);
801    if (!opcode)
802       return NULL;
803 
804    const struct dxil_value *args[] = {
805      opcode,
806      comp
807    };
808 
809    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
810 }
811 
812 static const struct dxil_value *
emit_raw_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload,unsigned component_count,unsigned alignment)813 emit_raw_bufferload_call(struct ntd_context *ctx,
814                          const struct dxil_value *handle,
815                          const struct dxil_value *coord[2],
816                          enum overload_type overload,
817                          unsigned component_count,
818                          unsigned alignment)
819 {
820    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferLoad", overload);
821    if (!func)
822       return NULL;
823 
824    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
825                                                                  DXIL_INTR_RAW_BUFFER_LOAD);
826    const struct dxil_value *args[] = {
827       opcode, handle, coord[0], coord[1],
828       dxil_module_get_int8_const(&ctx->mod, (1 << component_count) - 1),
829       dxil_module_get_int32_const(&ctx->mod, alignment),
830    };
831 
832    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
833 }
834 
835 static const struct dxil_value *
emit_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload)836 emit_bufferload_call(struct ntd_context *ctx,
837                      const struct dxil_value *handle,
838                      const struct dxil_value *coord[2],
839                      enum overload_type overload)
840 {
841    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload);
842    if (!func)
843       return NULL;
844 
845    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
846       DXIL_INTR_BUFFER_LOAD);
847    const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] };
848 
849    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
850 }
851 
852 static bool
emit_raw_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload,unsigned alignment)853 emit_raw_bufferstore_call(struct ntd_context *ctx,
854                           const struct dxil_value *handle,
855                           const struct dxil_value *coord[2],
856                           const struct dxil_value *value[4],
857                           const struct dxil_value *write_mask,
858                           enum overload_type overload,
859                           unsigned alignment)
860 {
861    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferStore", overload);
862 
863    if (!func)
864       return false;
865 
866    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
867                                                                  DXIL_INTR_RAW_BUFFER_STORE);
868    const struct dxil_value *args[] = {
869       opcode, handle, coord[0], coord[1],
870       value[0], value[1], value[2], value[3],
871       write_mask,
872       dxil_module_get_int32_const(&ctx->mod, alignment),
873    };
874 
875    return dxil_emit_call_void(&ctx->mod, func,
876                               args, ARRAY_SIZE(args));
877 }
878 
879 static bool
emit_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)880 emit_bufferstore_call(struct ntd_context *ctx,
881                       const struct dxil_value *handle,
882                       const struct dxil_value *coord[2],
883                       const struct dxil_value *value[4],
884                       const struct dxil_value *write_mask,
885                       enum overload_type overload)
886 {
887    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload);
888 
889    if (!func)
890       return false;
891 
892    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
893       DXIL_INTR_BUFFER_STORE);
894    const struct dxil_value *args[] = {
895       opcode, handle, coord[0], coord[1],
896       value[0], value[1], value[2], value[3],
897       write_mask
898    };
899 
900    return dxil_emit_call_void(&ctx->mod, func,
901                               args, ARRAY_SIZE(args));
902 }
903 
904 static const struct dxil_value *
emit_textureload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],enum overload_type overload)905 emit_textureload_call(struct ntd_context *ctx,
906                       const struct dxil_value *handle,
907                       const struct dxil_value *coord[3],
908                       enum overload_type overload)
909 {
910    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload);
911    if (!func)
912       return NULL;
913    const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
914    const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
915 
916    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
917       DXIL_INTR_TEXTURE_LOAD);
918    const struct dxil_value *args[] = { opcode, handle,
919       /*lod_or_sample*/ int_undef,
920       coord[0], coord[1], coord[2],
921       /* offsets */ int_undef, int_undef, int_undef};
922 
923    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
924 }
925 
926 static bool
emit_texturestore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)927 emit_texturestore_call(struct ntd_context *ctx,
928                        const struct dxil_value *handle,
929                        const struct dxil_value *coord[3],
930                        const struct dxil_value *value[4],
931                        const struct dxil_value *write_mask,
932                        enum overload_type overload)
933 {
934    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload);
935 
936    if (!func)
937       return false;
938 
939    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
940       DXIL_INTR_TEXTURE_STORE);
941    const struct dxil_value *args[] = {
942       opcode, handle, coord[0], coord[1], coord[2],
943       value[0], value[1], value[2], value[3],
944       write_mask
945    };
946 
947    return dxil_emit_call_void(&ctx->mod, func,
948                               args, ARRAY_SIZE(args));
949 }
950 
951 static const struct dxil_value *
emit_atomic_binop(struct ntd_context * ctx,const struct dxil_value * handle,enum dxil_atomic_op atomic_op,const struct dxil_value * coord[3],const struct dxil_value * value)952 emit_atomic_binop(struct ntd_context *ctx,
953                   const struct dxil_value *handle,
954                   enum dxil_atomic_op atomic_op,
955                   const struct dxil_value *coord[3],
956                   const struct dxil_value *value)
957 {
958    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32);
959 
960    if (!func)
961       return false;
962 
963    const struct dxil_value *opcode =
964       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP);
965    const struct dxil_value *atomic_op_value =
966       dxil_module_get_int32_const(&ctx->mod, atomic_op);
967    const struct dxil_value *args[] = {
968       opcode, handle, atomic_op_value,
969       coord[0], coord[1], coord[2], value
970    };
971 
972    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
973 }
974 
975 static const struct dxil_value *
emit_atomic_cmpxchg(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * cmpval,const struct dxil_value * newval)976 emit_atomic_cmpxchg(struct ntd_context *ctx,
977                     const struct dxil_value *handle,
978                     const struct dxil_value *coord[3],
979                     const struct dxil_value *cmpval,
980                     const struct dxil_value *newval)
981 {
982    const struct dxil_func *func =
983       dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32);
984 
985    if (!func)
986       return false;
987 
988    const struct dxil_value *opcode =
989       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG);
990    const struct dxil_value *args[] = {
991       opcode, handle, coord[0], coord[1], coord[2], cmpval, newval
992    };
993 
994    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
995 }
996 
997 static const struct dxil_value *
emit_createhandle_call_pre_6_6(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)998 emit_createhandle_call_pre_6_6(struct ntd_context *ctx,
999                                enum dxil_resource_class resource_class,
1000                                unsigned lower_bound,
1001                                unsigned upper_bound,
1002                                unsigned space,
1003                                unsigned resource_range_id,
1004                                const struct dxil_value *resource_range_index,
1005                                bool non_uniform_resource_index)
1006 {
1007    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE);
1008    const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class);
1009    const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id);
1010    const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1011    if (!opcode || !resource_class_value || !resource_range_id_value ||
1012        !non_uniform_resource_index_value)
1013       return NULL;
1014 
1015    const struct dxil_value *args[] = {
1016       opcode,
1017       resource_class_value,
1018       resource_range_id_value,
1019       resource_range_index,
1020       non_uniform_resource_index_value
1021    };
1022 
1023    const struct dxil_func *func =
1024          dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE);
1025 
1026    if (!func)
1027          return NULL;
1028 
1029    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1030 }
1031 
1032 static const struct dxil_value *
emit_annotate_handle(struct ntd_context * ctx,const struct dxil_value * unannotated_handle,const struct dxil_value * res_props)1033 emit_annotate_handle(struct ntd_context *ctx,
1034                      const struct dxil_value *unannotated_handle,
1035                      const struct dxil_value *res_props)
1036 {
1037    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ANNOTATE_HANDLE);
1038    if (!opcode)
1039       return NULL;
1040 
1041    const struct dxil_value *args[] = {
1042       opcode,
1043       unannotated_handle,
1044       res_props
1045    };
1046 
1047    const struct dxil_func *func =
1048       dxil_get_function(&ctx->mod, "dx.op.annotateHandle", DXIL_NONE);
1049 
1050    if (!func)
1051       return NULL;
1052 
1053    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1054 }
1055 
1056 static const struct dxil_value *
emit_annotate_handle_from_metadata(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned resource_range_id,const struct dxil_value * unannotated_handle)1057 emit_annotate_handle_from_metadata(struct ntd_context *ctx,
1058                                    enum dxil_resource_class resource_class,
1059                                    unsigned resource_range_id,
1060                                    const struct dxil_value *unannotated_handle)
1061 {
1062 
1063    const struct util_dynarray *mdnodes;
1064    switch (resource_class) {
1065    case DXIL_RESOURCE_CLASS_SRV:
1066       mdnodes = &ctx->srv_metadata_nodes;
1067       break;
1068    case DXIL_RESOURCE_CLASS_UAV:
1069       mdnodes = &ctx->uav_metadata_nodes;
1070       break;
1071    case DXIL_RESOURCE_CLASS_CBV:
1072       mdnodes = &ctx->cbv_metadata_nodes;
1073       break;
1074    case DXIL_RESOURCE_CLASS_SAMPLER:
1075       mdnodes = &ctx->sampler_metadata_nodes;
1076       break;
1077    default:
1078       unreachable("Invalid resource class");
1079    }
1080 
1081    const struct dxil_mdnode *mdnode = *util_dynarray_element(mdnodes, const struct dxil_mdnode *, resource_range_id);
1082    const struct dxil_value *res_props = dxil_module_get_res_props_const(&ctx->mod, resource_class, mdnode);
1083    if (!res_props)
1084       return NULL;
1085 
1086    return emit_annotate_handle(ctx, unannotated_handle, res_props);
1087 }
1088 
1089 static const struct dxil_value *
emit_createhandle_and_annotate(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1090 emit_createhandle_and_annotate(struct ntd_context *ctx,
1091                                enum dxil_resource_class resource_class,
1092                                unsigned lower_bound,
1093                                unsigned upper_bound,
1094                                unsigned space,
1095                                unsigned resource_range_id,
1096                                const struct dxil_value *resource_range_index,
1097                                bool non_uniform_resource_index)
1098 {
1099    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_BINDING);
1100    const struct dxil_value *res_bind = dxil_module_get_res_bind_const(&ctx->mod, lower_bound, upper_bound, space, resource_class);
1101    const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1102    if (!opcode || !res_bind || !non_uniform_resource_index_value)
1103       return NULL;
1104 
1105    const struct dxil_value *args[] = {
1106       opcode,
1107       res_bind,
1108       resource_range_index,
1109       non_uniform_resource_index_value
1110    };
1111 
1112    const struct dxil_func *func =
1113       dxil_get_function(&ctx->mod, "dx.op.createHandleFromBinding", DXIL_NONE);
1114 
1115    if (!func)
1116       return NULL;
1117 
1118    const struct dxil_value *unannotated_handle = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1119    if (!unannotated_handle)
1120       return NULL;
1121 
1122    return emit_annotate_handle_from_metadata(ctx, resource_class, resource_range_id, unannotated_handle);
1123 }
1124 
1125 static const struct dxil_value *
emit_createhandle_call(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1126 emit_createhandle_call(struct ntd_context *ctx,
1127                        enum dxil_resource_class resource_class,
1128                        unsigned lower_bound,
1129                        unsigned upper_bound,
1130                        unsigned space,
1131                        unsigned resource_range_id,
1132                        const struct dxil_value *resource_range_index,
1133                        bool non_uniform_resource_index)
1134 {
1135    if (ctx->mod.minor_version < 6)
1136       return emit_createhandle_call_pre_6_6(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index);
1137    else
1138       return emit_createhandle_and_annotate(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index);
1139 }
1140 
1141 static const struct dxil_value *
emit_createhandle_call_const_index(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,unsigned resource_range_index,bool non_uniform_resource_index)1142 emit_createhandle_call_const_index(struct ntd_context *ctx,
1143                                    enum dxil_resource_class resource_class,
1144                                    unsigned lower_bound,
1145                                    unsigned upper_bound,
1146                                    unsigned space,
1147                                    unsigned resource_range_id,
1148                                    unsigned resource_range_index,
1149                                    bool non_uniform_resource_index)
1150 {
1151 
1152    const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index);
1153    if (!resource_range_index_value)
1154       return NULL;
1155 
1156    return emit_createhandle_call(ctx, resource_class, lower_bound, upper_bound, space,
1157                                  resource_range_id, resource_range_index_value,
1158                                  non_uniform_resource_index);
1159 }
1160 
1161 static const struct dxil_value *
emit_createhandle_heap(struct ntd_context * ctx,const struct dxil_value * resource_range_index,bool is_sampler,bool non_uniform_resource_index)1162 emit_createhandle_heap(struct ntd_context *ctx,
1163                        const struct dxil_value *resource_range_index,
1164                        bool is_sampler,
1165                        bool non_uniform_resource_index)
1166 {
1167    if (is_sampler)
1168       ctx->mod.feats.sampler_descriptor_heap_indexing = true;
1169    else
1170       ctx->mod.feats.resource_descriptor_heap_indexing = true;
1171 
1172    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_HEAP);
1173    const struct dxil_value *sampler = dxil_module_get_int1_const(&ctx->mod, is_sampler);
1174    const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1175    if (!opcode || !sampler || !non_uniform_resource_index_value)
1176       return NULL;
1177 
1178    const struct dxil_value *args[] = {
1179       opcode,
1180       resource_range_index,
1181       sampler,
1182       non_uniform_resource_index_value
1183    };
1184 
1185    const struct dxil_func *func =
1186       dxil_get_function(&ctx->mod, "dx.op.createHandleFromHeap", DXIL_NONE);
1187 
1188    if (!func)
1189       return NULL;
1190 
1191    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1192 }
1193 
1194 static void
add_resource(struct ntd_context * ctx,enum dxil_resource_type type,enum dxil_resource_kind kind,const resource_array_layout * layout)1195 add_resource(struct ntd_context *ctx, enum dxil_resource_type type,
1196              enum dxil_resource_kind kind,
1197              const resource_array_layout *layout)
1198 {
1199    struct dxil_resource_v0 *resource_v0 = NULL;
1200    struct dxil_resource_v1 *resource_v1 = NULL;
1201    if (ctx->mod.minor_validator >= 6) {
1202       resource_v1 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v1, 1);
1203       resource_v0 = &resource_v1->v0;
1204    } else {
1205       resource_v0 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v0, 1);
1206    }
1207    resource_v0->resource_type = type;
1208    resource_v0->space = layout->space;
1209    resource_v0->lower_bound = layout->binding;
1210    if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX)
1211       resource_v0->upper_bound = UINT_MAX;
1212    else
1213       resource_v0->upper_bound = layout->binding + layout->size - 1;
1214    if (type == DXIL_RES_UAV_TYPED ||
1215        type == DXIL_RES_UAV_RAW ||
1216        type == DXIL_RES_UAV_STRUCTURED) {
1217       uint32_t new_uav_count = ctx->num_uavs + layout->size;
1218       if (layout->size == 0 || new_uav_count < ctx->num_uavs)
1219          ctx->num_uavs = UINT_MAX;
1220       else
1221          ctx->num_uavs = new_uav_count;
1222       if (ctx->mod.minor_validator >= 6 && ctx->num_uavs > 8)
1223          ctx->mod.feats.use_64uavs = 1;
1224    }
1225 
1226    if (resource_v1) {
1227       resource_v1->resource_kind = kind;
1228       /* No flags supported yet */
1229       resource_v1->resource_flags = 0;
1230    }
1231 }
1232 
1233 static const struct dxil_value *
emit_createhandle_call_dynamic(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned space,unsigned binding,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1234 emit_createhandle_call_dynamic(struct ntd_context *ctx,
1235                                enum dxil_resource_class resource_class,
1236                                unsigned space,
1237                                unsigned binding,
1238                                const struct dxil_value *resource_range_index,
1239                                bool non_uniform_resource_index)
1240 {
1241    unsigned offset = 0;
1242    unsigned count = 0;
1243 
1244    unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
1245    unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1246    unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1247    unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1248 
1249    switch (resource_class) {
1250    case DXIL_RESOURCE_CLASS_UAV:
1251       offset = num_srvs + num_samplers + num_cbvs;
1252       count = num_uavs;
1253       break;
1254    case DXIL_RESOURCE_CLASS_SRV:
1255       offset = num_samplers + num_cbvs;
1256       count = num_srvs;
1257       break;
1258    case DXIL_RESOURCE_CLASS_SAMPLER:
1259       offset = num_cbvs;
1260       count = num_samplers;
1261       break;
1262    case DXIL_RESOURCE_CLASS_CBV:
1263       offset = 0;
1264       count = num_cbvs;
1265       break;
1266    }
1267 
1268    unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1269       sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1270    assert(offset + count <= ctx->resources.size / resource_element_size);
1271    for (unsigned i = offset; i < offset + count; ++i) {
1272       const struct dxil_resource_v0 *resource = (const struct dxil_resource_v0 *)((const char *)ctx->resources.data + resource_element_size * i);
1273       if (resource->space == space &&
1274           resource->lower_bound <= binding &&
1275           resource->upper_bound >= binding) {
1276          return emit_createhandle_call(ctx, resource_class, resource->lower_bound,
1277                                        resource->upper_bound, space,
1278                                        i - offset,
1279                                        resource_range_index,
1280                                        non_uniform_resource_index);
1281       }
1282    }
1283 
1284    unreachable("Resource access for undeclared range");
1285 }
1286 
1287 static bool
emit_srv(struct ntd_context * ctx,nir_variable * var,unsigned count)1288 emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count)
1289 {
1290    unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
1291    unsigned binding = var->data.binding;
1292    resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1293 
1294    enum dxil_component_type comp_type;
1295    enum dxil_resource_kind res_kind;
1296    enum dxil_resource_type res_type;
1297    if (var->data.mode == nir_var_mem_ssbo) {
1298       comp_type = DXIL_COMP_TYPE_INVALID;
1299       res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
1300       res_type = DXIL_RES_SRV_RAW;
1301    } else {
1302       comp_type = dxil_get_comp_type(var->type);
1303       res_kind = dxil_get_resource_kind(var->type);
1304       res_type = DXIL_RES_SRV_TYPED;
1305    }
1306    const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, 4, false /* readwrite */);
1307 
1308    if (glsl_type_is_array(var->type))
1309       res_type_as_type = dxil_module_get_array_type(&ctx->mod, res_type_as_type, count);
1310 
1311    const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name,
1312                                                           &layout, comp_type, res_kind);
1313 
1314    if (!srv_meta)
1315       return false;
1316 
1317    util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta);
1318    add_resource(ctx, res_type, res_kind, &layout);
1319    if (res_type == DXIL_RES_SRV_RAW)
1320       ctx->mod.raw_and_structured_buffers = true;
1321 
1322    return true;
1323 }
1324 
1325 static bool
emit_uav(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned count,enum dxil_component_type comp_type,unsigned num_comps,enum dxil_resource_kind res_kind,enum gl_access_qualifier access,const char * name)1326 emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count,
1327          enum dxil_component_type comp_type, unsigned num_comps, enum dxil_resource_kind res_kind,
1328          enum gl_access_qualifier access, const char *name)
1329 {
1330    unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1331    resource_array_layout layout = { id, binding, count, space };
1332 
1333    const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, num_comps, true /* readwrite */);
1334    res_type = dxil_module_get_array_type(&ctx->mod, res_type, count);
1335    const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name,
1336                                                           &layout, comp_type, res_kind, access);
1337 
1338    if (!uav_meta)
1339       return false;
1340 
1341    util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
1342    if (ctx->mod.minor_validator < 6 &&
1343        util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
1344       ctx->mod.feats.use_64uavs = 1;
1345 
1346    add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, res_kind, &layout);
1347    if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
1348       ctx->mod.raw_and_structured_buffers = true;
1349    if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER &&
1350        ctx->mod.shader_kind != DXIL_COMPUTE_SHADER)
1351       ctx->mod.feats.uavs_at_every_stage = true;
1352 
1353    return true;
1354 }
1355 
1356 static bool
emit_globals(struct ntd_context * ctx,unsigned size)1357 emit_globals(struct ntd_context *ctx, unsigned size)
1358 {
1359    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo)
1360       size++;
1361 
1362    if (!size)
1363       return true;
1364 
1365    if (!emit_uav(ctx, 0, 0, size, DXIL_COMP_TYPE_INVALID, 1, DXIL_RESOURCE_KIND_RAW_BUFFER, 0, "globals"))
1366       return false;
1367 
1368    return true;
1369 }
1370 
1371 static bool
emit_uav_var(struct ntd_context * ctx,nir_variable * var,unsigned count)1372 emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count)
1373 {
1374    unsigned binding, space;
1375    if (ctx->opts->environment == DXIL_ENVIRONMENT_GL) {
1376       /* For GL, the image intrinsics are already lowered, using driver_location
1377        * as the 0-based image index. Use space 1 so that we can keep using these
1378        * NIR constants without having to remap them, and so they don't overlap
1379        * SSBOs, which are also 0-based UAV bindings.
1380        */
1381       binding = var->data.driver_location;
1382       space = 1;
1383    } else {
1384       binding = var->data.binding;
1385       space = var->data.descriptor_set;
1386    }
1387    enum dxil_component_type comp_type = dxil_get_comp_type(var->type);
1388    enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type);
1389    const char *name = var->name;
1390 
1391    return emit_uav(ctx, binding, space, count, comp_type,
1392                    util_format_get_nr_components(var->data.image.format),
1393                    res_kind, var->data.access, name);
1394 }
1395 
1396 static const struct dxil_value *
get_value_for_const(struct dxil_module * mod,nir_const_value * c,const struct dxil_type * type)1397 get_value_for_const(struct dxil_module *mod, nir_const_value *c, const struct dxil_type *type)
1398 {
1399    if (type == mod->int1_type) return dxil_module_get_int1_const(mod, c->b);
1400    if (type == mod->float32_type) return dxil_module_get_float_const(mod, c->f32);
1401    if (type == mod->int32_type) return dxil_module_get_int32_const(mod, c->i32);
1402    if (type == mod->int16_type) {
1403       mod->feats.min_precision = true;
1404       return dxil_module_get_int16_const(mod, c->i16);
1405    }
1406    if (type == mod->int64_type) {
1407       mod->feats.int64_ops = true;
1408       return dxil_module_get_int64_const(mod, c->i64);
1409    }
1410    if (type == mod->float16_type) {
1411       mod->feats.min_precision = true;
1412       return dxil_module_get_float16_const(mod, c->u16);
1413    }
1414    if (type == mod->float64_type) {
1415       mod->feats.doubles = true;
1416       return dxil_module_get_double_const(mod, c->f64);
1417    }
1418    unreachable("Invalid type");
1419 }
1420 
1421 static const struct dxil_type *
get_type_for_glsl_base_type(struct dxil_module * mod,enum glsl_base_type type)1422 get_type_for_glsl_base_type(struct dxil_module *mod, enum glsl_base_type type)
1423 {
1424    uint32_t bit_size = glsl_base_type_bit_size(type);
1425    if (nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(type)) == nir_type_float)
1426       return dxil_module_get_float_type(mod, bit_size);
1427    return dxil_module_get_int_type(mod, bit_size);
1428 }
1429 
1430 static const struct dxil_type *
get_type_for_glsl_type(struct dxil_module * mod,const struct glsl_type * type)1431 get_type_for_glsl_type(struct dxil_module *mod, const struct glsl_type *type)
1432 {
1433    if (glsl_type_is_scalar(type))
1434       return get_type_for_glsl_base_type(mod, glsl_get_base_type(type));
1435 
1436    if (glsl_type_is_vector(type))
1437       return dxil_module_get_vector_type(mod, get_type_for_glsl_base_type(mod, glsl_get_base_type(type)),
1438                                          glsl_get_vector_elements(type));
1439 
1440    if (glsl_type_is_array(type))
1441       return dxil_module_get_array_type(mod, get_type_for_glsl_type(mod, glsl_get_array_element(type)),
1442                                         glsl_array_size(type));
1443 
1444    assert(glsl_type_is_struct(type));
1445    uint32_t size = glsl_get_length(type);
1446    const struct dxil_type **fields = calloc(sizeof(const struct dxil_type *), size);
1447    for (uint32_t i = 0; i < size; ++i)
1448       fields[i] = get_type_for_glsl_type(mod, glsl_get_struct_field(type, i));
1449    const struct dxil_type *ret = dxil_module_get_struct_type(mod, glsl_get_type_name(type), fields, size);
1450    free((void *)fields);
1451    return ret;
1452 }
1453 
1454 static const struct dxil_value *
get_value_for_const_aggregate(struct dxil_module * mod,nir_constant * c,const struct glsl_type * type)1455 get_value_for_const_aggregate(struct dxil_module *mod, nir_constant *c, const struct glsl_type *type)
1456 {
1457    const struct dxil_type *dxil_type = get_type_for_glsl_type(mod, type);
1458    if (glsl_type_is_vector_or_scalar(type)) {
1459       const struct dxil_type *element_type = get_type_for_glsl_base_type(mod, glsl_get_base_type(type));
1460       const struct dxil_value *elements[NIR_MAX_VEC_COMPONENTS];
1461       for (uint32_t i = 0; i < glsl_get_vector_elements(type); ++i)
1462          elements[i] = get_value_for_const(mod, &c->values[i], element_type);
1463       if (glsl_type_is_scalar(type))
1464          return elements[0];
1465       return dxil_module_get_vector_const(mod, dxil_type, elements);
1466    }
1467 
1468    uint32_t num_values = glsl_get_length(type);
1469    assert(num_values == c->num_elements);
1470    const struct dxil_value **values = calloc(sizeof(const struct dxil_value *), num_values);
1471    const struct dxil_value *ret;
1472    if (glsl_type_is_array(type)) {
1473       const struct glsl_type *element_type = glsl_get_array_element(type);
1474       for (uint32_t i = 0; i < num_values; ++i)
1475          values[i] = get_value_for_const_aggregate(mod, c->elements[i], element_type);
1476       ret = dxil_module_get_array_const(mod, dxil_type, values);
1477    } else {
1478       for (uint32_t i = 0; i < num_values; ++i)
1479          values[i] = get_value_for_const_aggregate(mod, c->elements[i], glsl_get_struct_field(type, i));
1480       ret = dxil_module_get_struct_const(mod, dxil_type, values);
1481    }
1482    free((void *)values);
1483    return ret;
1484 }
1485 
1486 static bool
emit_global_consts(struct ntd_context * ctx)1487 emit_global_consts(struct ntd_context *ctx)
1488 {
1489    uint32_t index = 0;
1490    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) {
1491       assert(var->constant_initializer);
1492       var->data.driver_location = index++;
1493    }
1494 
1495    ctx->consts = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
1496 
1497    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) {
1498       if (!var->name)
1499          var->name = ralloc_asprintf(var, "const_%d", var->data.driver_location);
1500 
1501       const struct dxil_value *agg_vals =
1502          get_value_for_const_aggregate(&ctx->mod, var->constant_initializer, var->type);
1503       if (!agg_vals)
1504          return false;
1505 
1506       const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name,
1507                                                               dxil_value_get_type(agg_vals),
1508                                                               DXIL_AS_DEFAULT, 16,
1509                                                               agg_vals);
1510       if (!gvar)
1511          return false;
1512 
1513       ctx->consts[var->data.driver_location] = gvar;
1514    }
1515 
1516    return true;
1517 }
1518 
1519 static bool
emit_shared_vars(struct ntd_context * ctx)1520 emit_shared_vars(struct ntd_context *ctx)
1521 {
1522    uint32_t index = 0;
1523    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared)
1524       var->data.driver_location = index++;
1525 
1526    ctx->sharedvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
1527 
1528    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared) {
1529       if (!var->name)
1530          var->name = ralloc_asprintf(var, "shared_%d", var->data.driver_location);
1531       const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name,
1532                                                               get_type_for_glsl_type(&ctx->mod, var->type),
1533                                                               DXIL_AS_GROUPSHARED, 16,
1534                                                               NULL);
1535       if (!gvar)
1536          return false;
1537 
1538       ctx->sharedvars[var->data.driver_location] = gvar;
1539    }
1540 
1541    return true;
1542 }
1543 
1544 static bool
emit_cbv(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned size,unsigned count,char * name)1545 emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space,
1546          unsigned size, unsigned count, char *name)
1547 {
1548    assert(count != 0);
1549 
1550    unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1551 
1552    const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32);
1553    const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size);
1554    const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name,
1555                                                                      &array_type, 1);
1556    // All ubo[1]s should have been lowered to ubo with static indexing
1557    const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type;
1558    resource_array_layout layout = {idx, binding, count, space};
1559    const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type,
1560                                                           name, &layout, 4 * size);
1561 
1562    if (!cbv_meta)
1563       return false;
1564 
1565    util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta);
1566    add_resource(ctx, DXIL_RES_CBV, DXIL_RESOURCE_KIND_CBUFFER, &layout);
1567 
1568    return true;
1569 }
1570 
1571 static bool
emit_ubo_var(struct ntd_context * ctx,nir_variable * var)1572 emit_ubo_var(struct ntd_context *ctx, nir_variable *var)
1573 {
1574    unsigned count = 1;
1575    if (glsl_type_is_array(var->type))
1576       count = glsl_get_length(var->type);
1577 
1578    char *name = var->name;
1579    char temp_name[30];
1580    if (name && strlen(name) == 0) {
1581       snprintf(temp_name, sizeof(temp_name), "__unnamed_ubo_%d",
1582                ctx->unnamed_ubo_count++);
1583       name = temp_name;
1584    }
1585 
1586    const struct glsl_type *type = glsl_without_array(var->type);
1587    assert(glsl_type_is_struct(type) || glsl_type_is_interface(type));
1588    unsigned dwords = ALIGN_POT(glsl_get_explicit_size(type, false), 16) / 4;
1589 
1590    return emit_cbv(ctx, var->data.binding, var->data.descriptor_set,
1591                    dwords, count, name);
1592 }
1593 
1594 static bool
emit_sampler(struct ntd_context * ctx,nir_variable * var,unsigned count)1595 emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count)
1596 {
1597    unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1598    unsigned binding = var->data.binding;
1599    resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1600    const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
1601    const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1);
1602 
1603    if (glsl_type_is_array(var->type))
1604       sampler_type = dxil_module_get_array_type(&ctx->mod, sampler_type, count);
1605 
1606    const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout);
1607 
1608    if (!sampler_meta)
1609       return false;
1610 
1611    util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta);
1612    add_resource(ctx, DXIL_RES_SAMPLER, DXIL_RESOURCE_KIND_SAMPLER, &layout);
1613 
1614    return true;
1615 }
1616 
1617 static bool
emit_static_indexing_handles(struct ntd_context * ctx)1618 emit_static_indexing_handles(struct ntd_context *ctx)
1619 {
1620    /* Vulkan always uses dynamic handles, from instructions in the NIR */
1621    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN)
1622       return true;
1623 
1624    unsigned last_res_class = -1;
1625    unsigned id = 0;
1626 
1627    unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1628       sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1629    for (struct dxil_resource_v0 *res = (struct dxil_resource_v0 *)ctx->resources.data;
1630         res < (struct dxil_resource_v0 *)((char *)ctx->resources.data + ctx->resources.size);
1631         res = (struct dxil_resource_v0 *)((char *)res + resource_element_size)) {
1632       enum dxil_resource_class res_class;
1633       const struct dxil_value **handle_array;
1634       switch (res->resource_type) {
1635       case DXIL_RES_SRV_TYPED:
1636       case DXIL_RES_SRV_RAW:
1637       case DXIL_RES_SRV_STRUCTURED:
1638          res_class = DXIL_RESOURCE_CLASS_SRV;
1639          handle_array = ctx->srv_handles;
1640          break;
1641       case DXIL_RES_CBV:
1642          res_class = DXIL_RESOURCE_CLASS_CBV;
1643          handle_array = ctx->cbv_handles;
1644          break;
1645       case DXIL_RES_SAMPLER:
1646          res_class = DXIL_RESOURCE_CLASS_SAMPLER;
1647          handle_array = ctx->sampler_handles;
1648          break;
1649       case DXIL_RES_UAV_RAW:
1650          res_class = DXIL_RESOURCE_CLASS_UAV;
1651          handle_array = ctx->ssbo_handles;
1652          break;
1653       case DXIL_RES_UAV_TYPED:
1654       case DXIL_RES_UAV_STRUCTURED:
1655       case DXIL_RES_UAV_STRUCTURED_WITH_COUNTER:
1656          res_class = DXIL_RESOURCE_CLASS_UAV;
1657          handle_array = ctx->image_handles;
1658          break;
1659       default:
1660          unreachable("Unexpected resource type");
1661       }
1662 
1663       if (last_res_class != res_class)
1664          id = 0;
1665       else
1666          id++;
1667       last_res_class = res_class;
1668 
1669       if (res->space > 1)
1670          continue;
1671       assert(res->space == 0 ||
1672          (res->space == 1 &&
1673             res->resource_type != DXIL_RES_UAV_RAW &&
1674             ctx->opts->environment == DXIL_ENVIRONMENT_GL));
1675 
1676       /* CL uses dynamic handles for the "globals" UAV array, but uses static
1677        * handles for UBOs, textures, and samplers.
1678        */
1679       if (ctx->opts->environment == DXIL_ENVIRONMENT_CL &&
1680           res->resource_type == DXIL_RES_UAV_RAW)
1681          continue;
1682 
1683       for (unsigned i = res->lower_bound; i <= res->upper_bound; ++i) {
1684          handle_array[i] = emit_createhandle_call_const_index(ctx,
1685                                                               res_class,
1686                                                               res->lower_bound,
1687                                                               res->upper_bound,
1688                                                               res->space,
1689                                                               id,
1690                                                               i,
1691                                                               false);
1692          if (!handle_array[i])
1693             return false;
1694       }
1695    }
1696    return true;
1697 }
1698 
1699 static const struct dxil_mdnode *
emit_gs_state(struct ntd_context * ctx)1700 emit_gs_state(struct ntd_context *ctx)
1701 {
1702    const struct dxil_mdnode *gs_state_nodes[5];
1703    const nir_shader *s = ctx->shader;
1704 
1705    gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive));
1706    gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out);
1707    gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.gs.active_stream_mask, 1));
1708    gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive));
1709    gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations);
1710 
1711    for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) {
1712       if (!gs_state_nodes[i])
1713          return NULL;
1714    }
1715 
1716    return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes));
1717 }
1718 
1719 static enum dxil_tessellator_domain
get_tessellator_domain(enum tess_primitive_mode primitive_mode)1720 get_tessellator_domain(enum tess_primitive_mode primitive_mode)
1721 {
1722    switch (primitive_mode) {
1723    case TESS_PRIMITIVE_QUADS: return DXIL_TESSELLATOR_DOMAIN_QUAD;
1724    case TESS_PRIMITIVE_TRIANGLES: return DXIL_TESSELLATOR_DOMAIN_TRI;
1725    case TESS_PRIMITIVE_ISOLINES: return DXIL_TESSELLATOR_DOMAIN_ISOLINE;
1726    default:
1727       unreachable("Invalid tessellator primitive mode");
1728    }
1729 }
1730 
1731 static enum dxil_tessellator_partitioning
get_tessellator_partitioning(enum gl_tess_spacing spacing)1732 get_tessellator_partitioning(enum gl_tess_spacing spacing)
1733 {
1734    switch (spacing) {
1735    default:
1736    case TESS_SPACING_EQUAL:
1737       return DXIL_TESSELLATOR_PARTITIONING_INTEGER;
1738    case TESS_SPACING_FRACTIONAL_EVEN:
1739       return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
1740    case TESS_SPACING_FRACTIONAL_ODD:
1741       return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
1742    }
1743 }
1744 
1745 static enum dxil_tessellator_output_primitive
get_tessellator_output_primitive(const struct shader_info * info)1746 get_tessellator_output_primitive(const struct shader_info *info)
1747 {
1748    if (info->tess.point_mode)
1749       return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_POINT;
1750    if (info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES)
1751       return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_LINE;
1752    /* Note: GL tessellation domain is inverted from D3D, which means triangle
1753     * winding needs to be inverted.
1754     */
1755    if (info->tess.ccw)
1756       return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CW;
1757    return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CCW;
1758 }
1759 
1760 static const struct dxil_mdnode *
emit_hs_state(struct ntd_context * ctx)1761 emit_hs_state(struct ntd_context *ctx)
1762 {
1763    const struct dxil_mdnode *hs_state_nodes[7];
1764 
1765    hs_state_nodes[0] = dxil_get_metadata_func(&ctx->mod, ctx->tess_ctrl_patch_constant_func_def->func);
1766    hs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->tess_input_control_point_count);
1767    hs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1768    hs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1769    hs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_partitioning(ctx->shader->info.tess.spacing));
1770    hs_state_nodes[5] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_output_primitive(&ctx->shader->info));
1771    hs_state_nodes[6] = dxil_get_metadata_float32(&ctx->mod, 64.0f);
1772 
1773    return dxil_get_metadata_node(&ctx->mod, hs_state_nodes, ARRAY_SIZE(hs_state_nodes));
1774 }
1775 
1776 static const struct dxil_mdnode *
emit_ds_state(struct ntd_context * ctx)1777 emit_ds_state(struct ntd_context *ctx)
1778 {
1779    const struct dxil_mdnode *ds_state_nodes[2];
1780 
1781    ds_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1782    ds_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1783 
1784    return dxil_get_metadata_node(&ctx->mod, ds_state_nodes, ARRAY_SIZE(ds_state_nodes));
1785 }
1786 
1787 static const struct dxil_mdnode *
emit_threads(struct ntd_context * ctx)1788 emit_threads(struct ntd_context *ctx)
1789 {
1790    const nir_shader *s = ctx->shader;
1791    const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1));
1792    const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1));
1793    const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1));
1794    if (!threads_x || !threads_y || !threads_z)
1795       return false;
1796 
1797    const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z };
1798    return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes));
1799 }
1800 
1801 static const struct dxil_mdnode *
emit_wave_size(struct ntd_context * ctx)1802 emit_wave_size(struct ntd_context *ctx)
1803 {
1804    const nir_shader *s = ctx->shader;
1805    const struct dxil_mdnode *wave_size_node = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size);
1806    return dxil_get_metadata_node(&ctx->mod, &wave_size_node, 1);
1807 }
1808 
1809 static const struct dxil_mdnode *
emit_wave_size_range(struct ntd_context * ctx)1810 emit_wave_size_range(struct ntd_context *ctx)
1811 {
1812    const nir_shader *s = ctx->shader;
1813    const struct dxil_mdnode *wave_size_nodes[3];
1814    wave_size_nodes[0] = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size);
1815    wave_size_nodes[1] = wave_size_nodes[0];
1816    wave_size_nodes[2] = wave_size_nodes[0];
1817    return dxil_get_metadata_node(&ctx->mod, wave_size_nodes, ARRAY_SIZE(wave_size_nodes));
1818 }
1819 
1820 static int64_t
get_module_flags(struct ntd_context * ctx)1821 get_module_flags(struct ntd_context *ctx)
1822 {
1823    /* See the DXIL documentation for the definition of these flags:
1824     *
1825     * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags
1826     */
1827 
1828    uint64_t flags = 0;
1829    if (ctx->mod.feats.doubles)
1830       flags |= (1 << 2);
1831    if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
1832        ctx->shader->info.fs.early_fragment_tests)
1833       flags |= (1 << 3);
1834    if (ctx->mod.raw_and_structured_buffers)
1835       flags |= (1 << 4);
1836    if (ctx->mod.feats.min_precision)
1837       flags |= (1 << 5);
1838    if (ctx->mod.feats.dx11_1_double_extensions)
1839       flags |= (1 << 6);
1840    if (ctx->mod.feats.array_layer_from_vs_or_ds)
1841       flags |= (1 << 9);
1842    if (ctx->mod.feats.inner_coverage)
1843       flags |= (1 << 10);
1844    if (ctx->mod.feats.stencil_ref)
1845       flags |= (1 << 11);
1846    if (ctx->mod.feats.tiled_resources)
1847       flags |= (1 << 12);
1848    if (ctx->mod.feats.typed_uav_load_additional_formats)
1849       flags |= (1 << 13);
1850    if (ctx->mod.feats.use_64uavs)
1851       flags |= (1 << 15);
1852    if (ctx->mod.feats.uavs_at_every_stage)
1853       flags |= (1 << 16);
1854    if (ctx->mod.feats.cs_4x_raw_sb)
1855       flags |= (1 << 17);
1856    if (ctx->mod.feats.rovs)
1857       flags |= (1 << 18);
1858    if (ctx->mod.feats.wave_ops)
1859       flags |= (1 << 19);
1860    if (ctx->mod.feats.int64_ops)
1861       flags |= (1 << 20);
1862    if (ctx->mod.feats.view_id)
1863       flags |= (1 << 21);
1864    if (ctx->mod.feats.barycentrics)
1865       flags |= (1 << 22);
1866    if (ctx->mod.feats.native_low_precision)
1867       flags |= (1 << 23) | (1 << 5);
1868    if (ctx->mod.feats.shading_rate)
1869       flags |= (1 << 24);
1870    if (ctx->mod.feats.raytracing_tier_1_1)
1871       flags |= (1 << 25);
1872    if (ctx->mod.feats.sampler_feedback)
1873       flags |= (1 << 26);
1874    if (ctx->mod.feats.atomic_int64_typed)
1875       flags |= (1 << 27);
1876    if (ctx->mod.feats.atomic_int64_tgsm)
1877       flags |= (1 << 28);
1878    if (ctx->mod.feats.derivatives_in_mesh_or_amp)
1879       flags |= (1 << 29);
1880    if (ctx->mod.feats.resource_descriptor_heap_indexing)
1881       flags |= (1 << 30);
1882    if (ctx->mod.feats.sampler_descriptor_heap_indexing)
1883       flags |= (1ull << 31);
1884    if (ctx->mod.feats.atomic_int64_heap_resource)
1885       flags |= (1ull << 32);
1886    if (ctx->mod.feats.advanced_texture_ops)
1887       flags |= (1ull << 34);
1888    if (ctx->mod.feats.writable_msaa)
1889       flags |= (1ull << 35);
1890    // Bit 36 is wave MMA
1891    if (ctx->mod.feats.sample_cmp_bias_gradient)
1892       flags |= (1ull << 37);
1893    if (ctx->mod.feats.extended_command_info)
1894       flags |= (1ull << 38);
1895 
1896    if (ctx->opts->disable_math_refactoring)
1897       flags |= (1 << 1);
1898 
1899    /* Work around https://github.com/microsoft/DirectXShaderCompiler/issues/4616
1900     * When targeting SM6.7 and with at least one UAV, if no other flags are present,
1901     * set the resources-may-not-alias flag, or else the DXIL validator may end up
1902     * with uninitialized memory which will fail validation, due to missing that flag.
1903     */
1904    if (flags == 0 && ctx->mod.minor_version >= 7 && ctx->num_uavs > 0)
1905       flags |= (1ull << 33);
1906 
1907    return flags;
1908 }
1909 
1910 static const struct dxil_mdnode *
emit_entrypoint(struct ntd_context * ctx,const struct dxil_func * func,const char * name,const struct dxil_mdnode * signatures,const struct dxil_mdnode * resources,const struct dxil_mdnode * shader_props)1911 emit_entrypoint(struct ntd_context *ctx,
1912                 const struct dxil_func *func, const char *name,
1913                 const struct dxil_mdnode *signatures,
1914                 const struct dxil_mdnode *resources,
1915                 const struct dxil_mdnode *shader_props)
1916 {
1917    char truncated_name[254] = { 0 };
1918    strncpy(truncated_name, name, ARRAY_SIZE(truncated_name) - 1);
1919 
1920    const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func);
1921    const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, truncated_name);
1922    const struct dxil_mdnode *nodes[] = {
1923       func_md,
1924       name_md,
1925       signatures,
1926       resources,
1927       shader_props
1928    };
1929    return dxil_get_metadata_node(&ctx->mod, nodes,
1930                                  ARRAY_SIZE(nodes));
1931 }
1932 
1933 static const struct dxil_mdnode *
emit_resources(struct ntd_context * ctx)1934 emit_resources(struct ntd_context *ctx)
1935 {
1936    bool emit_resources = false;
1937    const struct dxil_mdnode *resources_nodes[] = {
1938       NULL, NULL, NULL, NULL
1939    };
1940 
1941 #define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *)
1942 
1943    if (ctx->srv_metadata_nodes.size) {
1944       resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes));
1945       emit_resources = true;
1946    }
1947 
1948    if (ctx->uav_metadata_nodes.size) {
1949       resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes));
1950       emit_resources = true;
1951    }
1952 
1953    if (ctx->cbv_metadata_nodes.size) {
1954       resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes));
1955       emit_resources = true;
1956    }
1957 
1958    if (ctx->sampler_metadata_nodes.size) {
1959       resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes));
1960       emit_resources = true;
1961    }
1962 
1963 #undef ARRAY_AND_SIZE
1964 
1965    return emit_resources ?
1966       dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL;
1967 }
1968 
1969 static bool
emit_tag(struct ntd_context * ctx,enum dxil_shader_tag tag,const struct dxil_mdnode * value_node)1970 emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag,
1971          const struct dxil_mdnode *value_node)
1972 {
1973    const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag);
1974    if (!tag_node || !value_node)
1975       return false;
1976    assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2);
1977    ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node;
1978    ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node;
1979 
1980    return true;
1981 }
1982 
1983 static bool
emit_metadata(struct ntd_context * ctx)1984 emit_metadata(struct ntd_context *ctx)
1985 {
1986    /* DXIL versions are 1.x for shader model 6.x */
1987    assert(ctx->mod.major_version == 6);
1988    unsigned dxilMajor = 1;
1989    unsigned dxilMinor = ctx->mod.minor_version;
1990    unsigned valMajor = ctx->mod.major_validator;
1991    unsigned valMinor = ctx->mod.minor_validator;
1992    if (!emit_llvm_ident(&ctx->mod) ||
1993        !emit_named_version(&ctx->mod, "dx.version", dxilMajor, dxilMinor) ||
1994        !emit_named_version(&ctx->mod, "dx.valver", valMajor, valMinor) ||
1995        !emit_dx_shader_model(&ctx->mod))
1996       return false;
1997 
1998    const struct dxil_func_def *main_func_def = ctx->main_func_def;
1999    if (!main_func_def)
2000       return false;
2001    const struct dxil_func *main_func = main_func_def->func;
2002 
2003    const struct dxil_mdnode *resources_node = emit_resources(ctx);
2004 
2005    const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func);
2006    const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0);
2007 
2008    const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0);
2009    const struct dxil_mdnode *nodes_4_27_27[] = {
2010       node4, node27, node27
2011    };
2012    const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27,
2013                                                       ARRAY_SIZE(nodes_4_27_27));
2014 
2015    const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1);
2016 
2017    const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1);
2018    const struct dxil_mdnode *main_type_annotation_nodes[] = {
2019       node3, main_entrypoint, node29
2020    };
2021    const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes,
2022                                                                            ARRAY_SIZE(main_type_annotation_nodes));
2023 
2024    if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
2025       if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx)))
2026          return false;
2027    } else if (ctx->mod.shader_kind == DXIL_HULL_SHADER) {
2028       ctx->tess_input_control_point_count = 32;
2029       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
2030          if (nir_is_arrayed_io(var, MESA_SHADER_TESS_CTRL)) {
2031             ctx->tess_input_control_point_count = glsl_array_size(var->type);
2032             break;
2033          }
2034       }
2035 
2036       if (!emit_tag(ctx, DXIL_SHADER_TAG_HS_STATE, emit_hs_state(ctx)))
2037          return false;
2038    } else if (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
2039       if (!emit_tag(ctx, DXIL_SHADER_TAG_DS_STATE, emit_ds_state(ctx)))
2040          return false;
2041    } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) {
2042       if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx)))
2043          return false;
2044       if (ctx->mod.minor_version >= 6 &&
2045           ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_4) {
2046          if (ctx->mod.minor_version < 8) {
2047             if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE, emit_wave_size(ctx)))
2048                return false;
2049          } else {
2050             if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE_RANGE, emit_wave_size_range(ctx)))
2051                return false;
2052          }
2053       }
2054    }
2055 
2056    uint64_t flags = get_module_flags(ctx);
2057    if (flags != 0) {
2058       if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags)))
2059          return false;
2060    }
2061    const struct dxil_mdnode *shader_properties = NULL;
2062    if (ctx->num_shader_property_nodes > 0) {
2063       shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes,
2064                                                  ctx->num_shader_property_nodes);
2065       if (!shader_properties)
2066          return false;
2067    }
2068 
2069    nir_function_impl *entry_func_impl = nir_shader_get_entrypoint(ctx->shader);
2070    const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func,
2071        entry_func_impl->function->name, get_signatures(&ctx->mod), resources_node, shader_properties);
2072    if (!dx_entry_point)
2073       return false;
2074 
2075    if (resources_node) {
2076       const struct dxil_mdnode *dx_resources = resources_node;
2077       dxil_add_metadata_named_node(&ctx->mod, "dx.resources",
2078                                        &dx_resources, 1);
2079    }
2080 
2081    if (ctx->mod.minor_version >= 2 &&
2082        dxil_nir_analyze_io_dependencies(&ctx->mod, ctx->shader)) {
2083       const struct dxil_type *i32_type = dxil_module_get_int_type(&ctx->mod, 32);
2084       if (!i32_type)
2085          return false;
2086 
2087       const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, i32_type, ctx->mod.serialized_dependency_table_size);
2088       if (!array_type)
2089          return false;
2090 
2091       const struct dxil_value **array_entries = malloc(sizeof(const struct value *) * ctx->mod.serialized_dependency_table_size);
2092       if (!array_entries)
2093          return false;
2094 
2095       for (uint32_t i = 0; i < ctx->mod.serialized_dependency_table_size; ++i)
2096          array_entries[i] = dxil_module_get_int32_const(&ctx->mod, ctx->mod.serialized_dependency_table[i]);
2097       const struct dxil_value *array_val = dxil_module_get_array_const(&ctx->mod, array_type, array_entries);
2098       free((void *)array_entries);
2099 
2100       const struct dxil_mdnode *view_id_state_val = dxil_get_metadata_value(&ctx->mod, array_type, array_val);
2101       if (!view_id_state_val)
2102          return false;
2103 
2104       const struct dxil_mdnode *view_id_state_node = dxil_get_metadata_node(&ctx->mod, &view_id_state_val, 1);
2105 
2106       dxil_add_metadata_named_node(&ctx->mod, "dx.viewIdState", &view_id_state_node, 1);
2107    }
2108 
2109    const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation };
2110    return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations",
2111                                        dx_type_annotations,
2112                                        ARRAY_SIZE(dx_type_annotations)) &&
2113           dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints",
2114                                        &dx_entry_point, 1);
2115 }
2116 
2117 static const struct dxil_value *
bitcast_to_int(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)2118 bitcast_to_int(struct ntd_context *ctx, unsigned bit_size,
2119                const struct dxil_value *value)
2120 {
2121    const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size);
2122    if (!type)
2123       return NULL;
2124 
2125    return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
2126 }
2127 
2128 static const struct dxil_value *
bitcast_to_float(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)2129 bitcast_to_float(struct ntd_context *ctx, unsigned bit_size,
2130                  const struct dxil_value *value)
2131 {
2132    const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size);
2133    if (!type)
2134       return NULL;
2135 
2136    return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
2137 }
2138 
2139 static bool
is_phi_src(nir_def * ssa)2140 is_phi_src(nir_def *ssa)
2141 {
2142    nir_foreach_use(src, ssa)
2143       if (nir_src_parent_instr(src)->type == nir_instr_type_phi)
2144          return true;
2145    return false;
2146 }
2147 
2148 static void
store_ssa_def(struct ntd_context * ctx,nir_def * ssa,unsigned chan,const struct dxil_value * value)2149 store_ssa_def(struct ntd_context *ctx, nir_def *ssa, unsigned chan,
2150               const struct dxil_value *value)
2151 {
2152    assert(ssa->index < ctx->num_defs);
2153    assert(chan < ssa->num_components);
2154    /* Insert bitcasts for phi srcs in the parent block */
2155    if (is_phi_src(ssa)) {
2156       /* Prefer ints over floats if it could be both or if we have no type info */
2157       nir_alu_type expect_type =
2158          BITSET_TEST(ctx->int_types, ssa->index) ? nir_type_int :
2159          (BITSET_TEST(ctx->float_types, ssa->index) ? nir_type_float :
2160           nir_type_int);
2161       assert(ssa->bit_size != 1 || expect_type == nir_type_int);
2162       if (ssa->bit_size != 1 && expect_type != dxil_type_to_nir_type(dxil_value_get_type(value)))
2163          value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST,
2164                                 expect_type == nir_type_int ?
2165                                  dxil_module_get_int_type(&ctx->mod, ssa->bit_size) :
2166                                  dxil_module_get_float_type(&ctx->mod, ssa->bit_size), value);
2167       if (ssa->bit_size == 64) {
2168          if (expect_type == nir_type_int)
2169             ctx->mod.feats.int64_ops = true;
2170          if (expect_type == nir_type_float)
2171             ctx->mod.feats.doubles = true;
2172       }
2173    }
2174    ctx->defs[ssa->index].chans[chan] = value;
2175 }
2176 
2177 static void
store_def(struct ntd_context * ctx,nir_def * def,unsigned chan,const struct dxil_value * value)2178 store_def(struct ntd_context *ctx, nir_def *def, unsigned chan,
2179            const struct dxil_value *value)
2180 {
2181    const struct dxil_type *type = dxil_value_get_type(value);
2182    if (type == ctx->mod.float64_type)
2183       ctx->mod.feats.doubles = true;
2184    if (type == ctx->mod.float16_type ||
2185        type == ctx->mod.int16_type)
2186       ctx->mod.feats.min_precision = true;
2187    if (type == ctx->mod.int64_type)
2188       ctx->mod.feats.int64_ops = true;
2189    store_ssa_def(ctx, def, chan, value);
2190 }
2191 
2192 static void
store_alu_dest(struct ntd_context * ctx,nir_alu_instr * alu,unsigned chan,const struct dxil_value * value)2193 store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan,
2194                const struct dxil_value *value)
2195 {
2196    store_def(ctx, &alu->def, chan, value);
2197 }
2198 
2199 static const struct dxil_value *
get_src_ssa(struct ntd_context * ctx,const nir_def * ssa,unsigned chan)2200 get_src_ssa(struct ntd_context *ctx, const nir_def *ssa, unsigned chan)
2201 {
2202    assert(ssa->index < ctx->num_defs);
2203    assert(chan < ssa->num_components);
2204    assert(ctx->defs[ssa->index].chans[chan]);
2205    return ctx->defs[ssa->index].chans[chan];
2206 }
2207 
2208 static const struct dxil_value *
get_src(struct ntd_context * ctx,nir_src * src,unsigned chan,nir_alu_type type)2209 get_src(struct ntd_context *ctx, nir_src *src, unsigned chan,
2210         nir_alu_type type)
2211 {
2212    const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan);
2213 
2214    const int bit_size = nir_src_bit_size(*src);
2215 
2216    switch (nir_alu_type_get_base_type(type)) {
2217    case nir_type_int:
2218    case nir_type_uint: {
2219       const struct dxil_type *expect_type =  dxil_module_get_int_type(&ctx->mod, bit_size);
2220       /* nohing to do */
2221       if (dxil_value_type_equal_to(value, expect_type)) {
2222          assert(bit_size != 64 || ctx->mod.feats.int64_ops);
2223          return value;
2224       }
2225       if (bit_size == 64) {
2226          assert(ctx->mod.feats.doubles);
2227          ctx->mod.feats.int64_ops = true;
2228       }
2229       if (bit_size == 16)
2230          ctx->mod.feats.native_low_precision = true;
2231       assert(dxil_value_type_bitsize_equal_to(value, bit_size));
2232       return bitcast_to_int(ctx,  bit_size, value);
2233       }
2234 
2235    case nir_type_float:
2236       assert(nir_src_bit_size(*src) >= 16);
2237       if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size))) {
2238          assert(nir_src_bit_size(*src) != 64 || ctx->mod.feats.doubles);
2239          return value;
2240       }
2241       if (bit_size == 64) {
2242          assert(ctx->mod.feats.int64_ops);
2243          ctx->mod.feats.doubles = true;
2244       }
2245       if (bit_size == 16)
2246          ctx->mod.feats.native_low_precision = true;
2247       assert(dxil_value_type_bitsize_equal_to(value, bit_size));
2248       return bitcast_to_float(ctx, bit_size, value);
2249 
2250    case nir_type_bool:
2251       if (!dxil_value_type_bitsize_equal_to(value, 1)) {
2252          return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC,
2253                                dxil_module_get_int_type(&ctx->mod, 1), value);
2254       }
2255       return value;
2256 
2257    default:
2258       unreachable("unexpected nir_alu_type");
2259    }
2260 }
2261 
2262 static const struct dxil_value *
get_alu_src(struct ntd_context * ctx,nir_alu_instr * alu,unsigned src)2263 get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
2264 {
2265    unsigned chan = alu->src[src].swizzle[0];
2266    return get_src(ctx, &alu->src[src].src, chan,
2267                   nir_op_infos[alu->op].input_types[src]);
2268 }
2269 
2270 static bool
emit_binop(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)2271 emit_binop(struct ntd_context *ctx, nir_alu_instr *alu,
2272            enum dxil_bin_opcode opcode,
2273            const struct dxil_value *op0, const struct dxil_value *op1)
2274 {
2275    bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float;
2276 
2277    enum dxil_opt_flags flags = 0;
2278    if (is_float_op && !alu->exact)
2279       flags |= DXIL_UNSAFE_ALGEBRA;
2280 
2281    const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags);
2282    if (!v)
2283       return false;
2284    store_alu_dest(ctx, alu, 0, v);
2285    return true;
2286 }
2287 
2288 static bool
emit_shift(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)2289 emit_shift(struct ntd_context *ctx, nir_alu_instr *alu,
2290            enum dxil_bin_opcode opcode,
2291            const struct dxil_value *op0, const struct dxil_value *op1)
2292 {
2293    unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src);
2294    unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src);
2295 
2296    uint64_t shift_mask = op0_bit_size - 1;
2297    if (!nir_src_is_const(alu->src[1].src)) {
2298       if (op0_bit_size != op1_bit_size) {
2299          const struct dxil_type *type =
2300             dxil_module_get_int_type(&ctx->mod, op0_bit_size);
2301          enum dxil_cast_opcode cast_op =
2302             op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC;
2303          op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1);
2304       }
2305       op1 = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND,
2306                             op1,
2307                             dxil_module_get_int_const(&ctx->mod, shift_mask, op0_bit_size),
2308                             0);
2309    } else {
2310       uint64_t val = nir_scalar_as_uint(
2311          nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1));
2312       op1 = dxil_module_get_int_const(&ctx->mod, val & shift_mask, op0_bit_size);
2313    }
2314 
2315    const struct dxil_value *v =
2316       dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0);
2317    if (!v)
2318       return false;
2319    store_alu_dest(ctx, alu, 0, v);
2320    return true;
2321 }
2322 
2323 static bool
emit_cmp(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_cmp_pred pred,const struct dxil_value * op0,const struct dxil_value * op1)2324 emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu,
2325          enum dxil_cmp_pred pred,
2326          const struct dxil_value *op0, const struct dxil_value *op1)
2327 {
2328    const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1);
2329    if (!v)
2330       return false;
2331    store_alu_dest(ctx, alu, 0, v);
2332    return true;
2333 }
2334 
2335 static enum dxil_cast_opcode
get_cast_op(nir_alu_instr * alu)2336 get_cast_op(nir_alu_instr *alu)
2337 {
2338    unsigned dst_bits = alu->def.bit_size;
2339    unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2340 
2341    switch (alu->op) {
2342    /* bool -> int */
2343    case nir_op_b2i16:
2344    case nir_op_b2i32:
2345    case nir_op_b2i64:
2346       return DXIL_CAST_ZEXT;
2347 
2348    /* float -> float */
2349    case nir_op_f2f16_rtz:
2350    case nir_op_f2f16:
2351    case nir_op_f2fmp:
2352    case nir_op_f2f32:
2353    case nir_op_f2f64:
2354       assert(dst_bits != src_bits);
2355       if (dst_bits < src_bits)
2356          return DXIL_CAST_FPTRUNC;
2357       else
2358          return DXIL_CAST_FPEXT;
2359 
2360    /* int -> int */
2361    case nir_op_i2i1:
2362    case nir_op_i2i16:
2363    case nir_op_i2imp:
2364    case nir_op_i2i32:
2365    case nir_op_i2i64:
2366       assert(dst_bits != src_bits);
2367       if (dst_bits < src_bits)
2368          return DXIL_CAST_TRUNC;
2369       else
2370          return DXIL_CAST_SEXT;
2371 
2372    /* uint -> uint */
2373    case nir_op_u2u1:
2374    case nir_op_u2u16:
2375    case nir_op_u2u32:
2376    case nir_op_u2u64:
2377       assert(dst_bits != src_bits);
2378       if (dst_bits < src_bits)
2379          return DXIL_CAST_TRUNC;
2380       else
2381          return DXIL_CAST_ZEXT;
2382 
2383    /* float -> int */
2384    case nir_op_f2i16:
2385    case nir_op_f2imp:
2386    case nir_op_f2i32:
2387    case nir_op_f2i64:
2388       return DXIL_CAST_FPTOSI;
2389 
2390    /* float -> uint */
2391    case nir_op_f2u16:
2392    case nir_op_f2ump:
2393    case nir_op_f2u32:
2394    case nir_op_f2u64:
2395       return DXIL_CAST_FPTOUI;
2396 
2397    /* int -> float */
2398    case nir_op_i2f16:
2399    case nir_op_i2fmp:
2400    case nir_op_i2f32:
2401    case nir_op_i2f64:
2402       return DXIL_CAST_SITOFP;
2403 
2404    /* uint -> float */
2405    case nir_op_u2f16:
2406    case nir_op_u2fmp:
2407    case nir_op_u2f32:
2408    case nir_op_u2f64:
2409       return DXIL_CAST_UITOFP;
2410 
2411    default:
2412       unreachable("unexpected cast op");
2413    }
2414 }
2415 
2416 static const struct dxil_type *
get_cast_dest_type(struct ntd_context * ctx,nir_alu_instr * alu)2417 get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu)
2418 {
2419    unsigned dst_bits = alu->def.bit_size;
2420    switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) {
2421    case nir_type_bool:
2422       assert(dst_bits == 1);
2423       FALLTHROUGH;
2424    case nir_type_int:
2425    case nir_type_uint:
2426       return dxil_module_get_int_type(&ctx->mod, dst_bits);
2427 
2428    case nir_type_float:
2429       return dxil_module_get_float_type(&ctx->mod, dst_bits);
2430 
2431    default:
2432       unreachable("unknown nir_alu_type");
2433    }
2434 }
2435 
2436 static bool
is_double(nir_alu_type alu_type,unsigned bit_size)2437 is_double(nir_alu_type alu_type, unsigned bit_size)
2438 {
2439    return nir_alu_type_get_base_type(alu_type) == nir_type_float &&
2440           bit_size == 64;
2441 }
2442 
2443 static bool
emit_cast(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * value)2444 emit_cast(struct ntd_context *ctx, nir_alu_instr *alu,
2445           const struct dxil_value *value)
2446 {
2447    enum dxil_cast_opcode opcode = get_cast_op(alu);
2448    const struct dxil_type *type = get_cast_dest_type(ctx, alu);
2449    if (!type)
2450       return false;
2451 
2452    const nir_op_info *info = &nir_op_infos[alu->op];
2453    switch (opcode) {
2454    case DXIL_CAST_UITOFP:
2455    case DXIL_CAST_SITOFP:
2456       if (is_double(info->output_type, alu->def.bit_size))
2457          ctx->mod.feats.dx11_1_double_extensions = true;
2458       break;
2459    case DXIL_CAST_FPTOUI:
2460    case DXIL_CAST_FPTOSI:
2461       if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src)))
2462          ctx->mod.feats.dx11_1_double_extensions = true;
2463       break;
2464    default:
2465       break;
2466    }
2467 
2468    if (alu->def.bit_size == 16) {
2469       switch (alu->op) {
2470       case nir_op_f2fmp:
2471       case nir_op_i2imp:
2472       case nir_op_f2imp:
2473       case nir_op_f2ump:
2474       case nir_op_i2fmp:
2475       case nir_op_u2fmp:
2476          break;
2477       default:
2478          ctx->mod.feats.native_low_precision = true;
2479       }
2480    }
2481 
2482    const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type,
2483                                                value);
2484    if (!v)
2485       return false;
2486    store_alu_dest(ctx, alu, 0, v);
2487    return true;
2488 }
2489 
2490 static enum overload_type
get_overload(nir_alu_type alu_type,unsigned bit_size)2491 get_overload(nir_alu_type alu_type, unsigned bit_size)
2492 {
2493    switch (nir_alu_type_get_base_type(alu_type)) {
2494    case nir_type_int:
2495    case nir_type_uint:
2496    case nir_type_bool:
2497       switch (bit_size) {
2498       case 1: return DXIL_I1;
2499       case 16: return DXIL_I16;
2500       case 32: return DXIL_I32;
2501       case 64: return DXIL_I64;
2502       default:
2503          unreachable("unexpected bit_size");
2504       }
2505    case nir_type_float:
2506       switch (bit_size) {
2507       case 16: return DXIL_F16;
2508       case 32: return DXIL_F32;
2509       case 64: return DXIL_F64;
2510       default:
2511          unreachable("unexpected bit_size");
2512       }
2513    case nir_type_invalid:
2514       return DXIL_NONE;
2515    default:
2516       unreachable("unexpected output type");
2517    }
2518 }
2519 
2520 static enum overload_type
get_ambiguous_overload(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum overload_type default_type)2521 get_ambiguous_overload(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2522                        enum overload_type default_type)
2523 {
2524    if (BITSET_TEST(ctx->int_types, intr->def.index))
2525       return get_overload(nir_type_int, intr->def.bit_size);
2526    if (BITSET_TEST(ctx->float_types, intr->def.index))
2527       return get_overload(nir_type_float, intr->def.bit_size);
2528    return default_type;
2529 }
2530 
2531 static enum overload_type
get_ambiguous_overload_alu_type(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_alu_type alu_type)2532 get_ambiguous_overload_alu_type(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2533                                 nir_alu_type alu_type)
2534 {
2535    return get_ambiguous_overload(ctx, intr, get_overload(alu_type, intr->def.bit_size));
2536 }
2537 
2538 static bool
emit_unary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op)2539 emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2540                  enum dxil_intr intr, const struct dxil_value *op)
2541 {
2542    const nir_op_info *info = &nir_op_infos[alu->op];
2543    unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2544    enum overload_type overload = get_overload(info->input_types[0], src_bits);
2545 
2546    const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op);
2547    if (!v)
2548       return false;
2549    store_alu_dest(ctx, alu, 0, v);
2550    return true;
2551 }
2552 
2553 static bool
emit_binary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)2554 emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2555                   enum dxil_intr intr,
2556                   const struct dxil_value *op0, const struct dxil_value *op1)
2557 {
2558    const nir_op_info *info = &nir_op_infos[alu->op];
2559    assert(info->output_type == info->input_types[0]);
2560    assert(info->output_type == info->input_types[1]);
2561    unsigned dst_bits = alu->def.bit_size;
2562    assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2563    assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2564    enum overload_type overload = get_overload(info->output_type, dst_bits);
2565 
2566    const struct dxil_value *v = emit_binary_call(ctx, overload, intr,
2567                                                  op0, op1);
2568    if (!v)
2569       return false;
2570    store_alu_dest(ctx, alu, 0, v);
2571    return true;
2572 }
2573 
2574 static bool
emit_tertiary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)2575 emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2576                     enum dxil_intr intr,
2577                     const struct dxil_value *op0,
2578                     const struct dxil_value *op1,
2579                     const struct dxil_value *op2)
2580 {
2581    const nir_op_info *info = &nir_op_infos[alu->op];
2582    unsigned dst_bits = alu->def.bit_size;
2583    assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2584    assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2585    assert(nir_src_bit_size(alu->src[2].src) == dst_bits);
2586 
2587    assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[0], dst_bits));
2588    assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[1], dst_bits));
2589    assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[2], dst_bits));
2590 
2591    enum overload_type overload = get_overload(info->output_type, dst_bits);
2592 
2593    const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr,
2594                                                    op0, op1, op2);
2595    if (!v)
2596       return false;
2597    store_alu_dest(ctx, alu, 0, v);
2598    return true;
2599 }
2600 
2601 static bool
emit_derivative(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_intr dxil_intr)2602 emit_derivative(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2603                  enum dxil_intr dxil_intr)
2604 {
2605    const struct dxil_value *src = get_src(ctx, &intr->src[0], 0, nir_type_float);
2606    enum overload_type overload = get_overload(nir_type_float, intr->src[0].ssa->bit_size);
2607    const struct dxil_value *v = emit_unary_call(ctx, overload, dxil_intr, src);
2608    if (!v)
2609       return false;
2610    store_def(ctx, &intr->def, 0, v);
2611    return true;
2612 }
2613 
2614 static bool
emit_bitfield_insert(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * base,const struct dxil_value * insert,const struct dxil_value * offset,const struct dxil_value * width)2615 emit_bitfield_insert(struct ntd_context *ctx, nir_alu_instr *alu,
2616                      const struct dxil_value *base,
2617                      const struct dxil_value *insert,
2618                      const struct dxil_value *offset,
2619                      const struct dxil_value *width)
2620 {
2621    /* DXIL is width, offset, insert, base, NIR is base, insert, offset, width */
2622    const struct dxil_value *v = emit_quaternary_call(ctx, DXIL_I32, DXIL_INTR_BFI,
2623                                                      width, offset, insert, base);
2624    if (!v)
2625       return false;
2626 
2627    /* DXIL uses the 5 LSB from width/offset. Special-case width >= 32 == copy insert. */
2628    const struct dxil_value *compare_width = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_SGE,
2629       width, dxil_module_get_int32_const(&ctx->mod, 32));
2630    v = dxil_emit_select(&ctx->mod, compare_width, insert, v);
2631    store_alu_dest(ctx, alu, 0, v);
2632    return true;
2633 }
2634 
2635 static bool
emit_dot4add_packed(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * src0,const struct dxil_value * src1,const struct dxil_value * accum)2636 emit_dot4add_packed(struct ntd_context *ctx, nir_alu_instr *alu,
2637                     enum dxil_intr intr,
2638                     const struct dxil_value *src0,
2639                     const struct dxil_value *src1,
2640                     const struct dxil_value *accum)
2641 {
2642    const struct dxil_func *f = dxil_get_function(&ctx->mod, "dx.op.dot4AddPacked", DXIL_I32);
2643    if (!f)
2644       return false;
2645    const struct dxil_value *srcs[] = { dxil_module_get_int32_const(&ctx->mod, intr), accum, src0, src1 };
2646    const struct dxil_value *v = dxil_emit_call(&ctx->mod, f, srcs, ARRAY_SIZE(srcs));
2647    if (!v)
2648       return false;
2649 
2650    store_alu_dest(ctx, alu, 0, v);
2651    return true;
2652 }
2653 
emit_select(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * sel,const struct dxil_value * val_true,const struct dxil_value * val_false)2654 static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu,
2655                         const struct dxil_value *sel,
2656                         const struct dxil_value *val_true,
2657                         const struct dxil_value *val_false)
2658 {
2659    assert(sel);
2660    assert(val_true);
2661    assert(val_false);
2662 
2663    const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false);
2664    if (!v)
2665       return false;
2666 
2667    store_alu_dest(ctx, alu, 0, v);
2668    return true;
2669 }
2670 
2671 static bool
emit_b2f16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2672 emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2673 {
2674    assert(val);
2675 
2676    struct dxil_module *m = &ctx->mod;
2677 
2678    const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00);
2679    const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0);
2680 
2681    if (!c0 || !c1)
2682       return false;
2683 
2684    return emit_select(ctx, alu, val, c1, c0);
2685 }
2686 
2687 static bool
emit_b2f32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2688 emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2689 {
2690    assert(val);
2691 
2692    struct dxil_module *m = &ctx->mod;
2693 
2694    const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f);
2695    const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f);
2696 
2697    if (!c0 || !c1)
2698       return false;
2699 
2700    return emit_select(ctx, alu, val, c1, c0);
2701 }
2702 
2703 static bool
emit_b2f64(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2704 emit_b2f64(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2705 {
2706    assert(val);
2707 
2708    struct dxil_module *m = &ctx->mod;
2709 
2710    const struct dxil_value *c1 = dxil_module_get_double_const(m, 1.0);
2711    const struct dxil_value *c0 = dxil_module_get_double_const(m, 0.0);
2712 
2713    if (!c0 || !c1)
2714       return false;
2715 
2716    ctx->mod.feats.doubles = 1;
2717    return emit_select(ctx, alu, val, c1, c0);
2718 }
2719 
2720 static bool
emit_f16tof32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val,bool shift)2721 emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val, bool shift)
2722 {
2723    if (shift) {
2724       val = dxil_emit_binop(&ctx->mod, DXIL_BINOP_LSHR, val,
2725          dxil_module_get_int32_const(&ctx->mod, 16), 0);
2726       if (!val)
2727          return false;
2728    }
2729 
2730    const struct dxil_func *func = dxil_get_function(&ctx->mod,
2731                                                     "dx.op.legacyF16ToF32",
2732                                                     DXIL_NONE);
2733    if (!func)
2734       return false;
2735 
2736    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32);
2737    if (!opcode)
2738       return false;
2739 
2740    const struct dxil_value *args[] = {
2741      opcode,
2742      val
2743    };
2744 
2745    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2746    if (!v)
2747       return false;
2748    store_alu_dest(ctx, alu, 0, v);
2749    return true;
2750 }
2751 
2752 static bool
emit_f32tof16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val0,const struct dxil_value * val1)2753 emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val0, const struct dxil_value *val1)
2754 {
2755    const struct dxil_func *func = dxil_get_function(&ctx->mod,
2756                                                     "dx.op.legacyF32ToF16",
2757                                                     DXIL_NONE);
2758    if (!func)
2759       return false;
2760 
2761    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16);
2762    if (!opcode)
2763       return false;
2764 
2765    const struct dxil_value *args[] = {
2766      opcode,
2767      val0
2768    };
2769 
2770    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2771    if (!v)
2772       return false;
2773 
2774    if (!nir_src_is_const(alu->src[1].src) || nir_src_as_int(alu->src[1].src) != 0) {
2775       args[1] = val1;
2776       const struct dxil_value *v_high = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2777       if (!v_high)
2778          return false;
2779 
2780       v_high = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, v_high,
2781          dxil_module_get_int32_const(&ctx->mod, 16), 0);
2782       if (!v_high)
2783          return false;
2784 
2785       v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_OR, v, v_high, 0);
2786       if (!v)
2787          return false;
2788    }
2789 
2790    store_alu_dest(ctx, alu, 0, v);
2791    return true;
2792 }
2793 
2794 static bool
emit_vec(struct ntd_context * ctx,nir_alu_instr * alu,unsigned num_inputs)2795 emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs)
2796 {
2797    for (unsigned i = 0; i < num_inputs; i++) {
2798       const struct dxil_value *src =
2799          get_src_ssa(ctx, alu->src[i].src.ssa, alu->src[i].swizzle[0]);
2800       if (!src)
2801          return false;
2802 
2803       store_alu_dest(ctx, alu, i, src);
2804    }
2805    return true;
2806 }
2807 
2808 static bool
emit_make_double(struct ntd_context * ctx,nir_alu_instr * alu)2809 emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu)
2810 {
2811    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64);
2812    if (!func)
2813       return false;
2814 
2815    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE);
2816    if (!opcode)
2817       return false;
2818 
2819    const struct dxil_value *args[3] = {
2820       opcode,
2821       get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_uint32),
2822       get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[1], nir_type_uint32),
2823    };
2824    if (!args[1] || !args[2])
2825       return false;
2826 
2827    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2828    if (!v)
2829       return false;
2830    store_def(ctx, &alu->def, 0, v);
2831    return true;
2832 }
2833 
2834 static bool
emit_split_double(struct ntd_context * ctx,nir_alu_instr * alu)2835 emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu)
2836 {
2837    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64);
2838    if (!func)
2839       return false;
2840 
2841    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE);
2842    if (!opcode)
2843       return false;
2844 
2845    const struct dxil_value *args[] = {
2846       opcode,
2847       get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_float64)
2848    };
2849    if (!args[1])
2850       return false;
2851 
2852    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2853    if (!v)
2854       return false;
2855 
2856    const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0);
2857    const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1);
2858    if (!hi || !lo)
2859       return false;
2860 
2861    store_def(ctx, &alu->def, 0, hi);
2862    store_def(ctx, &alu->def, 1, lo);
2863    return true;
2864 }
2865 
2866 static bool
emit_alu(struct ntd_context * ctx,nir_alu_instr * alu)2867 emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
2868 {
2869    /* handle vec-instructions first; they are the only ones that produce
2870     * vector results.
2871     */
2872    switch (alu->op) {
2873    case nir_op_vec2:
2874    case nir_op_vec3:
2875    case nir_op_vec4:
2876    case nir_op_vec8:
2877    case nir_op_vec16:
2878       return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs);
2879    case nir_op_mov: {
2880          assert(alu->def.num_components == 1);
2881          store_ssa_def(ctx, &alu->def, 0, get_src_ssa(ctx,
2882                         alu->src->src.ssa, alu->src->swizzle[0]));
2883          return true;
2884       }
2885    case nir_op_pack_double_2x32_dxil:
2886       return emit_make_double(ctx, alu);
2887    case nir_op_unpack_double_2x32_dxil:
2888       return emit_split_double(ctx, alu);
2889    case nir_op_bcsel: {
2890       /* Handled here to avoid type forced bitcast to int, since bcsel is used for ints and floats.
2891        * Ideally, the back-typing got both sources to match, but if it didn't, explicitly get src1's type */
2892       const struct dxil_value *src1 = get_src_ssa(ctx, alu->src[1].src.ssa, alu->src[1].swizzle[0]);
2893       nir_alu_type src1_type = dxil_type_to_nir_type(dxil_value_get_type(src1));
2894       return emit_select(ctx, alu,
2895                          get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_bool),
2896                          src1,
2897                          get_src(ctx, &alu->src[2].src, alu->src[2].swizzle[0], src1_type));
2898    }
2899    default:
2900       /* silence warnings */
2901       ;
2902    }
2903 
2904    /* other ops should be scalar */
2905    const struct dxil_value *src[4];
2906    assert(nir_op_infos[alu->op].num_inputs <= 4);
2907    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2908       src[i] = get_alu_src(ctx, alu, i);
2909       if (!src[i])
2910          return false;
2911    }
2912 
2913    switch (alu->op) {
2914    case nir_op_iadd:
2915    case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]);
2916 
2917    case nir_op_isub:
2918    case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]);
2919 
2920    case nir_op_imul:
2921    case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]);
2922 
2923    case nir_op_fdiv:
2924       if (alu->def.bit_size == 64)
2925          ctx->mod.feats.dx11_1_double_extensions = 1;
2926       return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]);
2927 
2928    case nir_op_idiv:
2929    case nir_op_udiv:
2930       if (nir_src_is_const(alu->src[1].src)) {
2931          /* It's illegal to emit a literal divide by 0 in DXIL */
2932          nir_scalar divisor = nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1);
2933          if (nir_scalar_as_int(divisor) == 0) {
2934             store_alu_dest(ctx, alu, 0,
2935                            dxil_module_get_int_const(&ctx->mod, 0, alu->def.bit_size));
2936             return true;
2937          }
2938       }
2939       return emit_binop(ctx, alu, alu->op == nir_op_idiv ? DXIL_BINOP_SDIV : DXIL_BINOP_UDIV, src[0], src[1]);
2940 
2941    case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]);
2942    case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2943    case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2944    case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]);
2945    case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]);
2946    case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]);
2947    case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]);
2948    case nir_op_ior:  return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]);
2949    case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]);
2950    case nir_op_inot: {
2951       unsigned bit_size = alu->def.bit_size;
2952       intmax_t val = bit_size == 1 ? 1 : -1;
2953       const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size);
2954       return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one);
2955    }
2956    case nir_op_ieq:  return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]);
2957    case nir_op_ine:  return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]);
2958    case nir_op_ige:  return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);
2959    case nir_op_uge:  return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]);
2960    case nir_op_ilt:  return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]);
2961    case nir_op_ult:  return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]);
2962    case nir_op_feq:  return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]);
2963    case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]);
2964    case nir_op_flt:  return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]);
2965    case nir_op_fge:  return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]);
2966    case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]);
2967    case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]);
2968    case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]);
2969    case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]);
2970    case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]);
2971    case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]);
2972    case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);
2973    case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);
2974    case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);
2975    case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);
2976    case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);
2977 
2978    case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]);
2979    case nir_op_frcp: {
2980       const struct dxil_value *one;
2981       switch (alu->def.bit_size) {
2982       case 16:
2983          one = dxil_module_get_float16_const(&ctx->mod, 0x3C00);
2984          break;
2985       case 32:
2986          one = dxil_module_get_float_const(&ctx->mod, 1.0f);
2987          break;
2988       case 64:
2989          one = dxil_module_get_double_const(&ctx->mod, 1.0);
2990          break;
2991       default: unreachable("Invalid float size");
2992       }
2993       return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]);
2994    }
2995    case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]);
2996    case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]);
2997    case nir_op_bitfield_reverse: return emit_unary_intin(ctx, alu, DXIL_INTR_BFREV, src[0]);
2998    case nir_op_ufind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_HI, src[0]);
2999    case nir_op_ifind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_SHI, src[0]);
3000    case nir_op_find_lsb: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_LO, src[0]);
3001    case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]);
3002    case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]);
3003    case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]);
3004    case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]);
3005    case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]);
3006    case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);
3007    case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
3008    case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
3009    case nir_op_ffma:
3010       if (alu->def.bit_size == 64)
3011          ctx->mod.feats.dx11_1_double_extensions = 1;
3012       return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);
3013 
3014    case nir_op_ibfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_IBFE, src[2], src[1], src[0]);
3015    case nir_op_ubfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_UBFE, src[2], src[1], src[0]);
3016    case nir_op_bitfield_insert: return emit_bitfield_insert(ctx, alu, src[0], src[1], src[2], src[3]);
3017 
3018    case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0], false);
3019    case nir_op_unpack_half_2x16_split_y: return emit_f16tof32(ctx, alu, src[0], true);
3020    case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0], src[1]);
3021 
3022    case nir_op_sdot_4x8_iadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_I8_PACKED, src[0], src[1], src[2]);
3023    case nir_op_udot_4x8_uadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_U8_PACKED, src[0], src[1], src[2]);
3024 
3025    case nir_op_i2i1:
3026    case nir_op_u2u1:
3027    case nir_op_b2i16:
3028    case nir_op_i2i16:
3029    case nir_op_i2imp:
3030    case nir_op_f2i16:
3031    case nir_op_f2imp:
3032    case nir_op_f2u16:
3033    case nir_op_f2ump:
3034    case nir_op_u2u16:
3035    case nir_op_u2f16:
3036    case nir_op_u2fmp:
3037    case nir_op_i2f16:
3038    case nir_op_i2fmp:
3039    case nir_op_f2f16_rtz:
3040    case nir_op_f2f16:
3041    case nir_op_f2fmp:
3042    case nir_op_b2i32:
3043    case nir_op_f2f32:
3044    case nir_op_f2i32:
3045    case nir_op_f2u32:
3046    case nir_op_i2f32:
3047    case nir_op_i2i32:
3048    case nir_op_u2f32:
3049    case nir_op_u2u32:
3050    case nir_op_b2i64:
3051    case nir_op_f2f64:
3052    case nir_op_f2i64:
3053    case nir_op_f2u64:
3054    case nir_op_i2f64:
3055    case nir_op_i2i64:
3056    case nir_op_u2f64:
3057    case nir_op_u2u64:
3058       return emit_cast(ctx, alu, src[0]);
3059 
3060    case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);
3061    case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);
3062    case nir_op_b2f64: return emit_b2f64(ctx, alu, src[0]);
3063    default:
3064       log_nir_instr_unsupported(ctx->logger, "Unimplemented ALU instruction",
3065                                 &alu->instr);
3066       return false;
3067    }
3068 }
3069 
3070 static const struct dxil_value *
load_ubo(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * offset,enum overload_type overload)3071 load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,
3072          const struct dxil_value *offset, enum overload_type overload)
3073 {
3074    assert(handle && offset);
3075 
3076    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY);
3077    if (!opcode)
3078       return NULL;
3079 
3080    const struct dxil_value *args[] = {
3081       opcode, handle, offset
3082    };
3083 
3084    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload);
3085    if (!func)
3086       return NULL;
3087    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3088 }
3089 
3090 static bool
emit_barrier_impl(struct ntd_context * ctx,nir_variable_mode modes,mesa_scope execution_scope,mesa_scope mem_scope)3091 emit_barrier_impl(struct ntd_context *ctx, nir_variable_mode modes, mesa_scope execution_scope, mesa_scope mem_scope)
3092 {
3093    const struct dxil_value *opcode, *mode;
3094    const struct dxil_func *func;
3095    uint32_t flags = 0;
3096 
3097    if (execution_scope == SCOPE_WORKGROUP)
3098       flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;
3099 
3100    bool is_compute = ctx->mod.shader_kind == DXIL_COMPUTE_SHADER;
3101 
3102    if ((modes & (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image)) &&
3103        (mem_scope > SCOPE_WORKGROUP || !is_compute)) {
3104       flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;
3105    } else {
3106       flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
3107    }
3108 
3109    if ((modes & nir_var_mem_shared) && is_compute)
3110       flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE;
3111 
3112    func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);
3113    if (!func)
3114       return false;
3115 
3116    opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);
3117    if (!opcode)
3118       return false;
3119 
3120    mode = dxil_module_get_int32_const(&ctx->mod, flags);
3121    if (!mode)
3122       return false;
3123 
3124    const struct dxil_value *args[] = { opcode, mode };
3125 
3126    return dxil_emit_call_void(&ctx->mod, func,
3127                               args, ARRAY_SIZE(args));
3128 }
3129 
3130 static bool
emit_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)3131 emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3132 {
3133    return emit_barrier_impl(ctx,
3134       nir_intrinsic_memory_modes(intr),
3135       nir_intrinsic_execution_scope(intr),
3136       nir_intrinsic_memory_scope(intr));
3137 }
3138 
3139 static bool
emit_load_global_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3140 emit_load_global_invocation_id(struct ntd_context *ctx,
3141                                     nir_intrinsic_instr *intr)
3142 {
3143    nir_component_mask_t comps = nir_def_components_read(&intr->def);
3144 
3145    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3146       if (comps & (1 << i)) {
3147          const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
3148          if (!idx)
3149             return false;
3150          const struct dxil_value *globalid = emit_threadid_call(ctx, idx);
3151 
3152          if (!globalid)
3153             return false;
3154 
3155          store_def(ctx, &intr->def, i, globalid);
3156       }
3157    }
3158    return true;
3159 }
3160 
3161 static bool
emit_load_local_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3162 emit_load_local_invocation_id(struct ntd_context *ctx,
3163                               nir_intrinsic_instr *intr)
3164 {
3165    nir_component_mask_t comps = nir_def_components_read(&intr->def);
3166 
3167    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3168       if (comps & (1 << i)) {
3169          const struct dxil_value
3170             *idx = dxil_module_get_int32_const(&ctx->mod, i);
3171          if (!idx)
3172             return false;
3173          const struct dxil_value
3174             *threadidingroup = emit_threadidingroup_call(ctx, idx);
3175          if (!threadidingroup)
3176             return false;
3177          store_def(ctx, &intr->def, i, threadidingroup);
3178       }
3179    }
3180    return true;
3181 }
3182 
3183 static bool
emit_load_local_invocation_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)3184 emit_load_local_invocation_index(struct ntd_context *ctx,
3185                                  nir_intrinsic_instr *intr)
3186 {
3187    const struct dxil_value
3188       *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx);
3189    if (!flattenedthreadidingroup)
3190       return false;
3191    store_def(ctx, &intr->def, 0, flattenedthreadidingroup);
3192 
3193    return true;
3194 }
3195 
3196 static bool
emit_load_local_workgroup_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3197 emit_load_local_workgroup_id(struct ntd_context *ctx,
3198                               nir_intrinsic_instr *intr)
3199 {
3200    nir_component_mask_t comps = nir_def_components_read(&intr->def);
3201 
3202    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3203       if (comps & (1 << i)) {
3204          const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
3205          if (!idx)
3206             return false;
3207          const struct dxil_value *groupid = emit_groupid_call(ctx, idx);
3208          if (!groupid)
3209             return false;
3210          store_def(ctx, &intr->def, i, groupid);
3211       }
3212    }
3213    return true;
3214 }
3215 
3216 static const struct dxil_value *
call_unary_external_function(struct ntd_context * ctx,const char * name,int32_t dxil_intr,enum overload_type overload)3217 call_unary_external_function(struct ntd_context *ctx,
3218                              const char *name,
3219                              int32_t dxil_intr,
3220                              enum overload_type overload)
3221 {
3222    const struct dxil_func *func =
3223       dxil_get_function(&ctx->mod, name, overload);
3224    if (!func)
3225       return false;
3226 
3227    const struct dxil_value *opcode =
3228       dxil_module_get_int32_const(&ctx->mod, dxil_intr);
3229    if (!opcode)
3230       return false;
3231 
3232    const struct dxil_value *args[] = {opcode};
3233 
3234    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3235 }
3236 
3237 static bool
emit_load_unary_external_function(struct ntd_context * ctx,nir_intrinsic_instr * intr,const char * name,int32_t dxil_intr,nir_alu_type type)3238 emit_load_unary_external_function(struct ntd_context *ctx,
3239                                   nir_intrinsic_instr *intr, const char *name,
3240                                   int32_t dxil_intr,
3241                                   nir_alu_type type)
3242 {
3243    const struct dxil_value *value = call_unary_external_function(ctx, name, dxil_intr,
3244                                                                  get_overload(type, intr->def.bit_size));
3245    store_def(ctx, &intr->def, 0, value);
3246 
3247    return true;
3248 }
3249 
3250 static bool
emit_load_sample_mask_in(struct ntd_context * ctx,nir_intrinsic_instr * intr)3251 emit_load_sample_mask_in(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3252 {
3253    const struct dxil_value *value = call_unary_external_function(ctx,
3254       "dx.op.coverage", DXIL_INTR_COVERAGE, DXIL_I32);
3255 
3256    /* Mask coverage with (1 << sample index). Note, done as an AND to handle extrapolation cases. */
3257    if (ctx->mod.info.has_per_sample_input) {
3258       value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND, value,
3259          dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL,
3260             dxil_module_get_int32_const(&ctx->mod, 1),
3261             call_unary_external_function(ctx, "dx.op.sampleIndex", DXIL_INTR_SAMPLE_INDEX, DXIL_I32), 0), 0);
3262    }
3263 
3264    store_def(ctx, &intr->def, 0, value);
3265    return true;
3266 }
3267 
3268 static bool
emit_load_tess_coord(struct ntd_context * ctx,nir_intrinsic_instr * intr)3269 emit_load_tess_coord(struct ntd_context *ctx,
3270                      nir_intrinsic_instr *intr)
3271 {
3272    const struct dxil_func *func =
3273       dxil_get_function(&ctx->mod, "dx.op.domainLocation", DXIL_F32);
3274    if (!func)
3275       return false;
3276 
3277    const struct dxil_value *opcode =
3278       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DOMAIN_LOCATION);
3279    if (!opcode)
3280       return false;
3281 
3282    unsigned num_coords = ctx->shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 3 : 2;
3283    for (unsigned i = 0; i < num_coords; ++i) {
3284       unsigned component_idx = i;
3285 
3286       const struct dxil_value *component = dxil_module_get_int8_const(&ctx->mod, component_idx);
3287       if (!component)
3288          return false;
3289 
3290       const struct dxil_value *args[] = { opcode, component };
3291 
3292       const struct dxil_value *value =
3293          dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3294       store_def(ctx, &intr->def, i, value);
3295    }
3296 
3297    for (unsigned i = num_coords; i < intr->def.num_components; ++i) {
3298       const struct dxil_value *value = dxil_module_get_float_const(&ctx->mod, 0.0f);
3299       store_def(ctx, &intr->def, i, value);
3300    }
3301 
3302    return true;
3303 }
3304 
3305 static const struct dxil_value *
get_int32_undef(struct dxil_module * m)3306 get_int32_undef(struct dxil_module *m)
3307 {
3308    const struct dxil_type *int32_type =
3309       dxil_module_get_int_type(m, 32);
3310    if (!int32_type)
3311       return NULL;
3312 
3313    return dxil_module_get_undef(m, int32_type);
3314 }
3315 
3316 static const struct dxil_value *
get_resource_handle(struct ntd_context * ctx,nir_src * src,enum dxil_resource_class class,enum dxil_resource_kind kind)3317 get_resource_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class,
3318                     enum dxil_resource_kind kind)
3319 {
3320    /* This source might be one of:
3321     * 1. Constant resource index - just look it up in precomputed handle arrays
3322     *    If it's null in that array, create a handle
3323     * 2. A handle from load_vulkan_descriptor - just get the stored SSA value
3324     * 3. Dynamic resource index - create a handle for it here
3325     */
3326    assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32);
3327    nir_const_value *const_block_index = nir_src_as_const_value(*src);
3328    const struct dxil_value *handle_entry = NULL;
3329    if (const_block_index) {
3330       assert(ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN);
3331       switch (kind) {
3332       case DXIL_RESOURCE_KIND_CBUFFER:
3333          handle_entry = ctx->cbv_handles[const_block_index->u32];
3334          break;
3335       case DXIL_RESOURCE_KIND_RAW_BUFFER:
3336          if (class == DXIL_RESOURCE_CLASS_UAV)
3337             handle_entry = ctx->ssbo_handles[const_block_index->u32];
3338          else
3339             handle_entry = ctx->srv_handles[const_block_index->u32];
3340          break;
3341       case DXIL_RESOURCE_KIND_SAMPLER:
3342          handle_entry = ctx->sampler_handles[const_block_index->u32];
3343          break;
3344       default:
3345          if (class == DXIL_RESOURCE_CLASS_UAV)
3346             handle_entry = ctx->image_handles[const_block_index->u32];
3347          else
3348             handle_entry = ctx->srv_handles[const_block_index->u32];
3349          break;
3350       }
3351    }
3352 
3353    if (handle_entry)
3354       return handle_entry;
3355 
3356    if (nir_src_as_deref(*src) ||
3357        ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3358       return get_src_ssa(ctx, src->ssa, 0);
3359    }
3360 
3361    unsigned space = 0;
3362    if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
3363        class == DXIL_RESOURCE_CLASS_UAV) {
3364       if (kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
3365          space = 2;
3366       else
3367          space = 1;
3368    }
3369 
3370    /* The base binding here will almost always be zero. The only cases where we end
3371     * up in this type of dynamic indexing are:
3372     * 1. GL UBOs
3373     * 2. GL SSBOs
3374     * 3. CL SSBOs
3375     * In all cases except GL UBOs, the resources are a single zero-based array.
3376     * In that case, the base is 1, because uniforms use 0 and cannot by dynamically
3377     * indexed. All other cases should either fall into static indexing (first early return),
3378     * deref-based dynamic handle creation (images, or Vulkan textures/samplers), or
3379     * load_vulkan_descriptor handle creation.
3380     */
3381    unsigned base_binding = 0;
3382    if (ctx->shader->info.first_ubo_is_default_ubo &&
3383        class == DXIL_RESOURCE_CLASS_CBV)
3384       base_binding = 1;
3385 
3386    const struct dxil_value *value = get_src(ctx, src, 0, nir_type_uint);
3387    const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, class,
3388       space, base_binding, value, !const_block_index);
3389 
3390    return handle;
3391 }
3392 
3393 static const struct dxil_value *
create_image_handle(struct ntd_context * ctx,nir_intrinsic_instr * image_intr)3394 create_image_handle(struct ntd_context *ctx, nir_intrinsic_instr *image_intr)
3395 {
3396    const struct dxil_value *unannotated_handle =
3397       emit_createhandle_heap(ctx, get_src(ctx, &image_intr->src[0], 0, nir_type_uint32), false, true /*TODO: divergence*/);
3398    const struct dxil_value *res_props =
3399       dxil_module_get_uav_res_props_const(&ctx->mod, image_intr);
3400 
3401    if (!unannotated_handle || !res_props)
3402       return NULL;
3403 
3404    return emit_annotate_handle(ctx, unannotated_handle, res_props);
3405 }
3406 
3407 static const struct dxil_value *
create_srv_handle(struct ntd_context * ctx,nir_tex_instr * tex,nir_src * src)3408 create_srv_handle(struct ntd_context *ctx, nir_tex_instr *tex, nir_src *src)
3409 {
3410    const struct dxil_value *unannotated_handle =
3411       emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), false, true /*TODO: divergence*/);
3412    const struct dxil_value *res_props =
3413       dxil_module_get_srv_res_props_const(&ctx->mod, tex);
3414 
3415    if (!unannotated_handle || !res_props)
3416       return NULL;
3417 
3418    return emit_annotate_handle(ctx, unannotated_handle, res_props);
3419 }
3420 
3421 static const struct dxil_value *
create_sampler_handle(struct ntd_context * ctx,bool is_shadow,nir_src * src)3422 create_sampler_handle(struct ntd_context *ctx, bool is_shadow, nir_src *src)
3423 {
3424    const struct dxil_value *unannotated_handle =
3425       emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), true, true /*TODO: divergence*/);
3426    const struct dxil_value *res_props =
3427       dxil_module_get_sampler_res_props_const(&ctx->mod, is_shadow);
3428 
3429    if (!unannotated_handle || !res_props)
3430       return NULL;
3431 
3432    return emit_annotate_handle(ctx, unannotated_handle, res_props);
3433 }
3434 
3435 static bool
emit_load_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3436 emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3437 {
3438    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3439 
3440    enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
3441    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3442       nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
3443       if (var && var->data.access & ACCESS_NON_WRITEABLE)
3444          class = DXIL_RESOURCE_CLASS_SRV;
3445    }
3446 
3447    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
3448    const struct dxil_value *offset =
3449       get_src(ctx, &intr->src[1], 0, nir_type_uint);
3450    if (!int32_undef || !handle || !offset)
3451       return false;
3452 
3453    assert(nir_src_bit_size(intr->src[0]) == 32);
3454    assert(nir_intrinsic_dest_components(intr) <= 4);
3455 
3456    const struct dxil_value *coord[2] = {
3457       offset,
3458       int32_undef
3459    };
3460 
3461    enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint);
3462    const struct dxil_value *load = ctx->mod.minor_version >= 2 ?
3463       emit_raw_bufferload_call(ctx, handle, coord,
3464                                overload,
3465                                nir_intrinsic_dest_components(intr),
3466                                intr->def.bit_size / 8) :
3467       emit_bufferload_call(ctx, handle, coord, overload);
3468    if (!load)
3469       return false;
3470 
3471    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3472       const struct dxil_value *val =
3473          dxil_emit_extractval(&ctx->mod, load, i);
3474       if (!val)
3475          return false;
3476       store_def(ctx, &intr->def, i, val);
3477    }
3478    if (intr->def.bit_size == 16)
3479       ctx->mod.feats.native_low_precision = true;
3480    return true;
3481 }
3482 
3483 static bool
emit_store_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3484 emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3485 {
3486    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
3487    const struct dxil_value *offset =
3488       get_src(ctx, &intr->src[2], 0, nir_type_uint);
3489    if (!handle || !offset)
3490       return false;
3491 
3492    unsigned num_components = nir_src_num_components(intr->src[0]);
3493    assert(num_components <= 4);
3494    if (nir_src_bit_size(intr->src[0]) == 16)
3495       ctx->mod.feats.native_low_precision = true;
3496 
3497    nir_alu_type type =
3498       dxil_type_to_nir_type(dxil_value_get_type(get_src_ssa(ctx, intr->src[0].ssa, 0)));
3499    const struct dxil_value *value[4] = { 0 };
3500    for (unsigned i = 0; i < num_components; ++i) {
3501       value[i] = get_src(ctx, &intr->src[0], i, type);
3502       if (!value[i])
3503          return false;
3504    }
3505 
3506    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3507    if (!int32_undef)
3508       return false;
3509 
3510    const struct dxil_value *coord[2] = {
3511       offset,
3512       int32_undef
3513    };
3514 
3515    enum overload_type overload = get_overload(type, intr->src[0].ssa->bit_size);
3516    if (num_components < 4) {
3517       const struct dxil_value *value_undef = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0]));
3518       if (!value_undef)
3519          return false;
3520 
3521       for (int i = num_components; i < 4; ++i)
3522          value[i] = value_undef;
3523    }
3524 
3525    const struct dxil_value *write_mask =
3526       dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
3527    if (!write_mask)
3528       return false;
3529 
3530    return ctx->mod.minor_version >= 2 ?
3531       emit_raw_bufferstore_call(ctx, handle, coord, value, write_mask, overload, intr->src[0].ssa->bit_size / 8) :
3532       emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
3533 }
3534 
3535 static bool
emit_load_ubo_vec4(struct ntd_context * ctx,nir_intrinsic_instr * intr)3536 emit_load_ubo_vec4(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3537 {
3538    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
3539    const struct dxil_value *offset =
3540       get_src(ctx, &intr->src[1], 0, nir_type_uint);
3541 
3542    if (!handle || !offset)
3543       return false;
3544 
3545    enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint);
3546    const struct dxil_value *agg = load_ubo(ctx, handle, offset, overload);
3547    if (!agg)
3548       return false;
3549 
3550    unsigned first_component = nir_intrinsic_has_component(intr) ?
3551       nir_intrinsic_component(intr) : 0;
3552    for (unsigned i = 0; i < intr->def.num_components; i++)
3553       store_def(ctx, &intr->def, i,
3554                  dxil_emit_extractval(&ctx->mod, agg, i + first_component));
3555 
3556    if (intr->def.bit_size == 16)
3557       ctx->mod.feats.native_low_precision = true;
3558    return true;
3559 }
3560 
3561 /* Need to add patch-ness as a matching parameter, since driver_location is *not* unique
3562  * between control points and patch variables in HS/DS
3563  */
3564 static nir_variable *
find_patch_matching_variable_by_driver_location(nir_shader * s,nir_variable_mode mode,unsigned driver_location,bool patch)3565 find_patch_matching_variable_by_driver_location(nir_shader *s, nir_variable_mode mode, unsigned driver_location, bool patch)
3566 {
3567    nir_foreach_variable_with_modes(var, s, mode) {
3568       if (var->data.driver_location == driver_location &&
3569           var->data.patch == patch)
3570          return var;
3571    }
3572    return NULL;
3573 }
3574 
3575 static bool
emit_store_output_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3576 emit_store_output_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3577 {
3578    assert(intr->intrinsic == nir_intrinsic_store_output ||
3579           ctx->mod.shader_kind == DXIL_HULL_SHADER);
3580    bool is_patch_constant = intr->intrinsic == nir_intrinsic_store_output &&
3581       ctx->mod.shader_kind == DXIL_HULL_SHADER;
3582    nir_alu_type out_type = nir_intrinsic_src_type(intr);
3583    enum overload_type overload = get_overload(out_type, intr->src[0].ssa->bit_size);
3584    const struct dxil_func *func = dxil_get_function(&ctx->mod, is_patch_constant ?
3585       "dx.op.storePatchConstant" : "dx.op.storeOutput",
3586       overload);
3587 
3588    if (!func)
3589       return false;
3590 
3591    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, is_patch_constant ?
3592       DXIL_INTR_STORE_PATCH_CONSTANT : DXIL_INTR_STORE_OUTPUT);
3593    const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr));
3594    unsigned row_index = intr->intrinsic == nir_intrinsic_store_output ? 1 : 2;
3595 
3596    /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3597     * generation, so muck with them here too.
3598     */
3599    nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3600    bool is_tess_level = is_patch_constant &&
3601                         (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3602                          semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3603 
3604    const struct dxil_value *row = NULL;
3605    const struct dxil_value *col = NULL;
3606    if (is_tess_level)
3607       col = dxil_module_get_int8_const(&ctx->mod, 0);
3608    else
3609       row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3610 
3611    bool success = true;
3612    uint32_t writemask = nir_intrinsic_write_mask(intr);
3613 
3614    nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_out, nir_intrinsic_base(intr), is_patch_constant);
3615    unsigned var_base_component = var->data.location_frac;
3616    unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3617 
3618    if (ctx->mod.minor_validator >= 5) {
3619       struct dxil_signature_record *sig_rec = is_patch_constant ?
3620          &ctx->mod.patch_consts[nir_intrinsic_base(intr)] :
3621          &ctx->mod.outputs[nir_intrinsic_base(intr)];
3622       unsigned comp_size = intr->src[0].ssa->bit_size == 64 ? 2 : 1;
3623       unsigned comp_mask = 0;
3624       if (is_tess_level)
3625          comp_mask = 1;
3626       else if (comp_size == 1)
3627          comp_mask = writemask << var_base_component;
3628       else {
3629          for (unsigned i = 0; i < intr->num_components; ++i)
3630             if ((writemask & (1 << i)))
3631                comp_mask |= 3 << ((i + var_base_component) * comp_size);
3632       }
3633       for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3634          sig_rec->elements[r].never_writes_mask &= ~comp_mask;
3635 
3636       if (!nir_src_is_const(intr->src[row_index])) {
3637          struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3638             &ctx->mod.psv_patch_consts[nir_intrinsic_base(intr)] :
3639             &ctx->mod.psv_outputs[nir_intrinsic_base(intr)];
3640          psv_rec->dynamic_mask_and_stream |= comp_mask;
3641       }
3642    }
3643 
3644    for (unsigned i = 0; i < intr->num_components && success; ++i) {
3645       if (writemask & (1 << i)) {
3646          if (is_tess_level)
3647             row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3648          else
3649             col = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3650          const struct dxil_value *value = get_src(ctx, &intr->src[0], i, out_type);
3651          if (!col || !row || !value)
3652             return false;
3653 
3654          const struct dxil_value *args[] = {
3655             opcode, output_id, row, col, value
3656          };
3657          success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3658       }
3659    }
3660 
3661    return success;
3662 }
3663 
3664 static bool
emit_load_input_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3665 emit_load_input_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3666 {
3667    bool attr_at_vertex = false;
3668    if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER &&
3669       ctx->opts->interpolate_at_vertex &&
3670       ctx->opts->provoking_vertex != 0 &&
3671       (nir_intrinsic_dest_type(intr) & nir_type_float)) {
3672       nir_variable *var = nir_find_variable_with_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr));
3673 
3674       attr_at_vertex = var && var->data.interpolation == INTERP_MODE_FLAT;
3675    }
3676 
3677    bool is_patch_constant = (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER &&
3678                              intr->intrinsic == nir_intrinsic_load_input) ||
3679                             (ctx->mod.shader_kind == DXIL_HULL_SHADER &&
3680                              intr->intrinsic == nir_intrinsic_load_output);
3681    bool is_output_control_point = intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3682 
3683    unsigned opcode_val;
3684    const char *func_name;
3685    if (attr_at_vertex) {
3686       opcode_val = DXIL_INTR_ATTRIBUTE_AT_VERTEX;
3687       func_name = "dx.op.attributeAtVertex";
3688       if (ctx->mod.minor_validator >= 6)
3689          ctx->mod.feats.barycentrics = 1;
3690    } else if (is_patch_constant) {
3691       opcode_val = DXIL_INTR_LOAD_PATCH_CONSTANT;
3692       func_name = "dx.op.loadPatchConstant";
3693    } else if (is_output_control_point) {
3694       opcode_val = DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT;
3695       func_name = "dx.op.loadOutputControlPoint";
3696    } else {
3697       opcode_val = DXIL_INTR_LOAD_INPUT;
3698       func_name = "dx.op.loadInput";
3699    }
3700 
3701    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3702    if (!opcode)
3703       return false;
3704 
3705    const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod,
3706       is_patch_constant || is_output_control_point ?
3707          nir_intrinsic_base(intr) :
3708          ctx->mod.input_mappings[nir_intrinsic_base(intr)]);
3709    if (!input_id)
3710       return false;
3711 
3712    bool is_per_vertex =
3713       intr->intrinsic == nir_intrinsic_load_per_vertex_input ||
3714       intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3715    int row_index = is_per_vertex ? 1 : 0;
3716    const struct dxil_value *vertex_id = NULL;
3717    if (!is_patch_constant) {
3718       if (is_per_vertex) {
3719          vertex_id = get_src(ctx, &intr->src[0], 0, nir_type_int);
3720       } else if (attr_at_vertex) {
3721          vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex);
3722       } else {
3723          const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
3724          if (!int32_type)
3725             return false;
3726 
3727          vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
3728       }
3729       if (!vertex_id)
3730          return false;
3731    }
3732 
3733    /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3734     * generation, so muck with them here too.
3735     */
3736    nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3737    bool is_tess_level = is_patch_constant &&
3738                         (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3739                          semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3740 
3741    const struct dxil_value *row = NULL;
3742    const struct dxil_value *comp = NULL;
3743    if (is_tess_level)
3744       comp = dxil_module_get_int8_const(&ctx->mod, 0);
3745    else
3746       row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3747 
3748    nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3749    enum overload_type overload = get_overload(out_type, intr->def.bit_size);
3750 
3751    const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, overload);
3752 
3753    if (!func)
3754       return false;
3755 
3756    nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), is_patch_constant);
3757    unsigned var_base_component = var ? var->data.location_frac : 0;
3758    unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3759 
3760    if (ctx->mod.minor_validator >= 5 &&
3761        !is_output_control_point &&
3762        intr->intrinsic != nir_intrinsic_load_output) {
3763       struct dxil_signature_record *sig_rec = is_patch_constant ?
3764          &ctx->mod.patch_consts[nir_intrinsic_base(intr)] :
3765          &ctx->mod.inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3766       unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1;
3767       unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3768       comp_mask <<= (var_base_component * comp_size);
3769       if (is_tess_level)
3770          comp_mask = 1;
3771       for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3772          sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3773 
3774       if (!nir_src_is_const(intr->src[row_index])) {
3775          struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3776             &ctx->mod.psv_patch_consts[nir_intrinsic_base(intr)] :
3777             &ctx->mod.psv_inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3778          psv_rec->dynamic_mask_and_stream |= comp_mask;
3779       }
3780    }
3781 
3782    for (unsigned i = 0; i < intr->num_components; ++i) {
3783       if (is_tess_level)
3784          row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3785       else
3786          comp = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3787 
3788       if (!row || !comp)
3789          return false;
3790 
3791       const struct dxil_value *args[] = {
3792          opcode, input_id, row, comp, vertex_id
3793       };
3794 
3795       unsigned num_args = ARRAY_SIZE(args) - (is_patch_constant ? 1 : 0);
3796       const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3797       if (!retval)
3798          return false;
3799       store_def(ctx, &intr->def, i, retval);
3800    }
3801    return true;
3802 }
3803 
3804 static bool
emit_load_interpolated_input(struct ntd_context * ctx,nir_intrinsic_instr * intr)3805 emit_load_interpolated_input(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3806 {
3807    nir_intrinsic_instr *barycentric = nir_src_as_intrinsic(intr->src[0]);
3808 
3809    const struct dxil_value *args[6] = { 0 };
3810 
3811    unsigned opcode_val;
3812    const char *func_name;
3813    unsigned num_args;
3814    switch (barycentric->intrinsic) {
3815    case nir_intrinsic_load_barycentric_at_offset:
3816       opcode_val = DXIL_INTR_EVAL_SNAPPED;
3817       func_name = "dx.op.evalSnapped";
3818       num_args = 6;
3819       for (unsigned i = 0; i < 2; ++i) {
3820          const struct dxil_value *float_offset = get_src(ctx, &barycentric->src[0], i, nir_type_float);
3821          /* GLSL uses [-0.5f, 0.5f), DXIL uses (-8, 7) */
3822          const struct dxil_value *offset_16 = dxil_emit_binop(&ctx->mod,
3823             DXIL_BINOP_MUL, float_offset, dxil_module_get_float_const(&ctx->mod, 16.0f), 0);
3824          args[i + 4] = dxil_emit_cast(&ctx->mod, DXIL_CAST_FPTOSI,
3825             dxil_module_get_int_type(&ctx->mod, 32), offset_16);
3826       }
3827       break;
3828    case nir_intrinsic_load_barycentric_pixel:
3829       opcode_val = DXIL_INTR_EVAL_SNAPPED;
3830       func_name = "dx.op.evalSnapped";
3831       num_args = 6;
3832       args[4] = args[5] = dxil_module_get_int32_const(&ctx->mod, 0);
3833       break;
3834    case nir_intrinsic_load_barycentric_at_sample:
3835       opcode_val = DXIL_INTR_EVAL_SAMPLE_INDEX;
3836       func_name = "dx.op.evalSampleIndex";
3837       num_args = 5;
3838       args[4] = get_src(ctx, &barycentric->src[0], 0, nir_type_int);
3839       break;
3840    case nir_intrinsic_load_barycentric_centroid:
3841       opcode_val = DXIL_INTR_EVAL_CENTROID;
3842       func_name = "dx.op.evalCentroid";
3843       num_args = 4;
3844       break;
3845    default:
3846       unreachable("Unsupported interpolation barycentric intrinsic");
3847    }
3848    args[0] = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3849    args[1] = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr));
3850    args[2] = get_src(ctx, &intr->src[1], 0, nir_type_int);
3851 
3852    const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, DXIL_F32);
3853 
3854    if (!func)
3855       return false;
3856 
3857    nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), false);
3858    unsigned var_base_component = var ? var->data.location_frac : 0;
3859    unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3860 
3861    if (ctx->mod.minor_validator >= 5) {
3862       struct dxil_signature_record *sig_rec =
3863          &ctx->mod.inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3864       unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1;
3865       unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3866       comp_mask <<= (var_base_component * comp_size);
3867       for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3868          sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3869 
3870       if (!nir_src_is_const(intr->src[1])) {
3871          struct dxil_psv_signature_element *psv_rec =
3872             &ctx->mod.psv_inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3873          psv_rec->dynamic_mask_and_stream |= comp_mask;
3874       }
3875    }
3876 
3877    for (unsigned i = 0; i < intr->num_components; ++i) {
3878       args[3] = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3879 
3880       const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3881       if (!retval)
3882          return false;
3883       store_def(ctx, &intr->def, i, retval);
3884    }
3885    return true;
3886 }
3887 
3888 static const struct dxil_value *
deref_to_gep(struct ntd_context * ctx,nir_deref_instr * deref)3889 deref_to_gep(struct ntd_context *ctx, nir_deref_instr *deref)
3890 {
3891    nir_deref_path path;
3892    nir_deref_path_init(&path, deref, ctx->ralloc_ctx);
3893    assert(path.path[0]->deref_type == nir_deref_type_var);
3894    uint32_t count = 0;
3895    while (path.path[count])
3896       ++count;
3897 
3898    const struct dxil_value **gep_indices = ralloc_array(ctx->ralloc_ctx,
3899                                                        const struct dxil_value *,
3900                                                        count + 1);
3901    nir_variable *var = path.path[0]->var;
3902    const struct dxil_value **var_array;
3903    switch (deref->modes) {
3904    case nir_var_mem_constant: var_array = ctx->consts; break;
3905    case nir_var_mem_shared: var_array = ctx->sharedvars; break;
3906    case nir_var_function_temp: var_array = ctx->scratchvars; break;
3907    default: unreachable("Invalid deref mode");
3908    }
3909    gep_indices[0] = var_array[var->data.driver_location];
3910 
3911    for (uint32_t i = 0; i < count; ++i)
3912       gep_indices[i + 1] = get_src_ssa(ctx, &path.path[i]->def, 0);
3913 
3914    return dxil_emit_gep_inbounds(&ctx->mod, gep_indices, count + 1);
3915 }
3916 
3917 static bool
emit_load_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3918 emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3919 {
3920    const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3921    if (!ptr)
3922       return false;
3923 
3924    const struct dxil_value *retval =
3925       dxil_emit_load(&ctx->mod, ptr, intr->def.bit_size / 8, false);
3926    if (!retval)
3927       return false;
3928 
3929    store_def(ctx, &intr->def, 0, retval);
3930    return true;
3931 }
3932 
3933 static bool
emit_store_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3934 emit_store_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3935 {
3936    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
3937    const struct dxil_value *ptr = deref_to_gep(ctx, deref);
3938    if (!ptr)
3939       return false;
3940 
3941    const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_get_nir_type_for_glsl_type(deref->type));
3942    return dxil_emit_store(&ctx->mod, value, ptr, nir_src_bit_size(intr->src[1]) / 8, false);
3943 }
3944 
3945 static bool
emit_atomic_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3946 emit_atomic_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3947 {
3948    const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3949    if (!ptr)
3950       return false;
3951 
3952    const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3953    if (!value)
3954       return false;
3955 
3956    enum dxil_rmw_op dxil_op = nir_atomic_to_dxil_rmw(nir_intrinsic_atomic_op(intr));
3957    const struct dxil_value *retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, dxil_op, false,
3958                                                          DXIL_ATOMIC_ORDERING_ACQREL,
3959                                                          DXIL_SYNC_SCOPE_CROSSTHREAD);
3960    if (!retval)
3961       return false;
3962 
3963    store_def(ctx, &intr->def, 0, retval);
3964    return true;
3965 }
3966 
3967 static bool
emit_atomic_deref_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)3968 emit_atomic_deref_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3969 {
3970    const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3971    if (!ptr)
3972       return false;
3973 
3974    const struct dxil_value *cmp = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3975    const struct dxil_value *value = get_src(ctx, &intr->src[2], 0, nir_type_uint);
3976    if (!value)
3977       return false;
3978 
3979    const struct dxil_value *retval = dxil_emit_cmpxchg(&ctx->mod, cmp, value, ptr, false,
3980                                                        DXIL_ATOMIC_ORDERING_ACQREL,
3981                                                        DXIL_SYNC_SCOPE_CROSSTHREAD);
3982    if (!retval)
3983       return false;
3984 
3985    store_def(ctx, &intr->def, 0, retval);
3986    return true;
3987 }
3988 
3989 static bool
emit_discard_if_with_value(struct ntd_context * ctx,const struct dxil_value * value)3990 emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value)
3991 {
3992    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD);
3993    if (!opcode)
3994       return false;
3995 
3996    const struct dxil_value *args[] = {
3997      opcode,
3998      value
3999    };
4000 
4001    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE);
4002    if (!func)
4003       return false;
4004 
4005    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4006 }
4007 
4008 static bool
emit_discard_if(struct ntd_context * ctx,nir_intrinsic_instr * intr)4009 emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4010 {
4011    const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool);
4012    if (!value)
4013       return false;
4014 
4015    return emit_discard_if_with_value(ctx, value);
4016 }
4017 
4018 static bool
emit_discard(struct ntd_context * ctx)4019 emit_discard(struct ntd_context *ctx)
4020 {
4021    const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true);
4022    return emit_discard_if_with_value(ctx, value);
4023 }
4024 
4025 static bool
emit_emit_vertex(struct ntd_context * ctx,nir_intrinsic_instr * intr)4026 emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4027 {
4028    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM);
4029    const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
4030    if (!opcode || !stream_id)
4031       return false;
4032 
4033    const struct dxil_value *args[] = {
4034      opcode,
4035      stream_id
4036    };
4037 
4038    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE);
4039    if (!func)
4040       return false;
4041 
4042    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4043 }
4044 
4045 static bool
emit_end_primitive(struct ntd_context * ctx,nir_intrinsic_instr * intr)4046 emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4047 {
4048    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM);
4049    const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
4050    if (!opcode || !stream_id)
4051       return false;
4052 
4053    const struct dxil_value *args[] = {
4054      opcode,
4055      stream_id
4056    };
4057 
4058    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE);
4059    if (!func)
4060       return false;
4061 
4062    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4063 }
4064 
4065 static bool
emit_image_store(struct ntd_context * ctx,nir_intrinsic_instr * intr)4066 emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4067 {
4068    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_store ?
4069       create_image_handle(ctx, intr) :
4070       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4071    if (!handle)
4072       return false;
4073 
4074    bool is_array = false;
4075    if (intr->intrinsic == nir_intrinsic_image_deref_store)
4076       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4077    else
4078       is_array = nir_intrinsic_image_array(intr);
4079 
4080    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4081    if (!int32_undef)
4082       return false;
4083 
4084    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4085    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_store ?
4086       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4087       nir_intrinsic_image_dim(intr);
4088    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4089    if (is_array)
4090       ++num_coords;
4091 
4092    assert(num_coords <= nir_src_num_components(intr->src[1]));
4093    for (unsigned i = 0; i < num_coords; ++i) {
4094       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4095       if (!coord[i])
4096          return false;
4097    }
4098 
4099    nir_alu_type in_type = nir_intrinsic_src_type(intr);
4100    enum overload_type overload = get_overload(in_type, 32);
4101 
4102    assert(nir_src_bit_size(intr->src[3]) == 32);
4103    unsigned num_components = nir_src_num_components(intr->src[3]);
4104    assert(num_components <= 4);
4105    const struct dxil_value *value[4];
4106    for (unsigned i = 0; i < num_components; ++i) {
4107       value[i] = get_src(ctx, &intr->src[3], i, in_type);
4108       if (!value[i])
4109          return false;
4110    }
4111 
4112    for (int i = num_components; i < 4; ++i)
4113       value[i] = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0]));
4114 
4115    const struct dxil_value *write_mask =
4116       dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
4117    if (!write_mask)
4118       return false;
4119 
4120    if (image_dim == GLSL_SAMPLER_DIM_BUF) {
4121       coord[1] = int32_undef;
4122       return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
4123    } else
4124       return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload);
4125 }
4126 
4127 static bool
emit_image_load(struct ntd_context * ctx,nir_intrinsic_instr * intr)4128 emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4129 {
4130    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_load ?
4131       create_image_handle(ctx, intr) :
4132       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4133    if (!handle)
4134       return false;
4135 
4136    bool is_array = false;
4137    if (intr->intrinsic == nir_intrinsic_image_deref_load)
4138       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4139    else
4140       is_array = nir_intrinsic_image_array(intr);
4141 
4142    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4143    if (!int32_undef)
4144       return false;
4145 
4146    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4147    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_load ?
4148       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4149       nir_intrinsic_image_dim(intr);
4150    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4151    if (is_array)
4152       ++num_coords;
4153 
4154    assert(num_coords <= nir_src_num_components(intr->src[1]));
4155    for (unsigned i = 0; i < num_coords; ++i) {
4156       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4157       if (!coord[i])
4158          return false;
4159    }
4160 
4161    nir_alu_type out_type = nir_intrinsic_dest_type(intr);
4162    enum overload_type overload = get_overload(out_type, 32);
4163 
4164    const struct dxil_value *load_result;
4165    if (image_dim == GLSL_SAMPLER_DIM_BUF) {
4166       coord[1] = int32_undef;
4167       load_result = emit_bufferload_call(ctx, handle, coord, overload);
4168    } else
4169       load_result = emit_textureload_call(ctx, handle, coord, overload);
4170 
4171    if (!load_result)
4172       return false;
4173 
4174    assert(intr->def.bit_size == 32);
4175    unsigned num_components = intr->def.num_components;
4176    assert(num_components <= 4);
4177    for (unsigned i = 0; i < num_components; ++i) {
4178       const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i);
4179       if (!component)
4180          return false;
4181       store_def(ctx, &intr->def, i, component);
4182    }
4183 
4184    if (util_format_get_nr_components(nir_intrinsic_format(intr)) > 1)
4185       ctx->mod.feats.typed_uav_load_additional_formats = true;
4186 
4187    return true;
4188 }
4189 
4190 static bool
emit_image_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4191 emit_image_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4192 {
4193    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic ?
4194       create_image_handle(ctx, intr) :
4195       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4196    if (!handle)
4197       return false;
4198 
4199    bool is_array = false;
4200    if (intr->intrinsic == nir_intrinsic_image_deref_atomic)
4201       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4202    else
4203       is_array = nir_intrinsic_image_array(intr);
4204 
4205    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4206    if (!int32_undef)
4207       return false;
4208 
4209    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4210    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic ?
4211       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4212       nir_intrinsic_image_dim(intr);
4213    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4214    if (is_array)
4215       ++num_coords;
4216 
4217    assert(num_coords <= nir_src_num_components(intr->src[1]));
4218    for (unsigned i = 0; i < num_coords; ++i) {
4219       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4220       if (!coord[i])
4221          return false;
4222    }
4223 
4224    nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr);
4225    enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op);
4226    nir_alu_type type = nir_atomic_op_type(nir_op);
4227    const struct dxil_value *value = get_src(ctx, &intr->src[3], 0, type);
4228    if (!value)
4229       return false;
4230 
4231    const struct dxil_value *retval =
4232       emit_atomic_binop(ctx, handle, dxil_op, coord, value);
4233 
4234    if (!retval)
4235       return false;
4236 
4237    store_def(ctx, &intr->def, 0, retval);
4238    return true;
4239 }
4240 
4241 static bool
emit_image_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4242 emit_image_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4243 {
4244    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic_swap ?
4245       create_image_handle(ctx, intr) :
4246       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4247    if (!handle)
4248       return false;
4249 
4250    bool is_array = false;
4251    if (intr->intrinsic == nir_intrinsic_image_deref_atomic_swap)
4252       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4253    else
4254       is_array = nir_intrinsic_image_array(intr);
4255 
4256    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4257    if (!int32_undef)
4258       return false;
4259 
4260    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4261    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ?
4262       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4263       nir_intrinsic_image_dim(intr);
4264    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4265    if (is_array)
4266       ++num_coords;
4267 
4268    assert(num_coords <= nir_src_num_components(intr->src[1]));
4269    for (unsigned i = 0; i < num_coords; ++i) {
4270       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4271       if (!coord[i])
4272          return false;
4273    }
4274 
4275    const struct dxil_value *cmpval = get_src(ctx, &intr->src[3], 0, nir_type_uint);
4276    const struct dxil_value *newval = get_src(ctx, &intr->src[4], 0, nir_type_uint);
4277    if (!cmpval || !newval)
4278       return false;
4279 
4280    const struct dxil_value *retval =
4281       emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
4282 
4283    if (!retval)
4284       return false;
4285 
4286    store_def(ctx, &intr->def, 0, retval);
4287    return true;
4288 }
4289 
4290 struct texop_parameters {
4291    const struct dxil_value *tex;
4292    const struct dxil_value *sampler;
4293    const struct dxil_value *bias, *lod_or_sample, *min_lod;
4294    const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3];
4295    const struct dxil_value *cmp;
4296    enum overload_type overload;
4297 };
4298 
4299 static const struct dxil_value *
emit_texture_size(struct ntd_context * ctx,struct texop_parameters * params)4300 emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params)
4301 {
4302    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE);
4303    if (!func)
4304       return false;
4305 
4306    const struct dxil_value *args[] = {
4307       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE),
4308       params->tex,
4309       params->lod_or_sample
4310    };
4311 
4312    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4313 }
4314 
4315 static bool
emit_image_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)4316 emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4317 {
4318    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_size ?
4319       create_image_handle(ctx, intr) :
4320       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4321    if (!handle)
4322       return false;
4323 
4324    enum glsl_sampler_dim sampler_dim = intr->intrinsic == nir_intrinsic_image_deref_size ?
4325       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4326       nir_intrinsic_image_dim(intr);
4327    const struct dxil_value *lod = sampler_dim == GLSL_SAMPLER_DIM_BUF ?
4328       dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)) :
4329       get_src(ctx, &intr->src[1], 0, nir_type_uint);
4330    if (!lod)
4331       return false;
4332 
4333    struct texop_parameters params = {
4334       .tex = handle,
4335       .lod_or_sample = lod
4336    };
4337    const struct dxil_value *dimensions = emit_texture_size(ctx, &params);
4338    if (!dimensions)
4339       return false;
4340 
4341    for (unsigned i = 0; i < intr->def.num_components; ++i) {
4342       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i);
4343       store_def(ctx, &intr->def, i, retval);
4344    }
4345 
4346    return true;
4347 }
4348 
4349 static bool
emit_get_ssbo_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)4350 emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4351 {
4352    enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
4353    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
4354       nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4355       if (var && var->data.access & ACCESS_NON_WRITEABLE)
4356          class = DXIL_RESOURCE_CLASS_SRV;
4357    }
4358 
4359    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
4360    if (!handle)
4361       return false;
4362 
4363    struct texop_parameters params = {
4364       .tex = handle,
4365       .lod_or_sample = dxil_module_get_undef(
4366                         &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32))
4367    };
4368 
4369    const struct dxil_value *dimensions = emit_texture_size(ctx, &params);
4370    if (!dimensions)
4371       return false;
4372 
4373    const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0);
4374    store_def(ctx, &intr->def, 0, retval);
4375 
4376    return true;
4377 }
4378 
4379 static bool
emit_ssbo_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4380 emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4381 {
4382    nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr);
4383    enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op);
4384    nir_alu_type type = nir_atomic_op_type(nir_op);
4385    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
4386    const struct dxil_value *offset =
4387       get_src(ctx, &intr->src[1], 0, nir_type_uint);
4388    const struct dxil_value *value =
4389       get_src(ctx, &intr->src[2], 0, type);
4390 
4391    if (!value || !handle || !offset)
4392       return false;
4393 
4394    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4395    if (!int32_undef)
4396       return false;
4397 
4398    const struct dxil_value *coord[3] = {
4399       offset, int32_undef, int32_undef
4400    };
4401 
4402    const struct dxil_value *retval =
4403       emit_atomic_binop(ctx, handle, dxil_op, coord, value);
4404 
4405    if (!retval)
4406       return false;
4407 
4408    store_def(ctx, &intr->def, 0, retval);
4409    return true;
4410 }
4411 
4412 static bool
emit_ssbo_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4413 emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4414 {
4415    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
4416    const struct dxil_value *offset =
4417       get_src(ctx, &intr->src[1], 0, nir_type_uint);
4418    const struct dxil_value *cmpval =
4419       get_src(ctx, &intr->src[2], 0, nir_type_int);
4420    const struct dxil_value *newval =
4421       get_src(ctx, &intr->src[3], 0, nir_type_int);
4422 
4423    if (!cmpval || !newval || !handle || !offset)
4424       return false;
4425 
4426    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4427    if (!int32_undef)
4428       return false;
4429 
4430    const struct dxil_value *coord[3] = {
4431       offset, int32_undef, int32_undef
4432    };
4433 
4434    const struct dxil_value *retval =
4435       emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
4436 
4437    if (!retval)
4438       return false;
4439 
4440    store_def(ctx, &intr->def, 0, retval);
4441    return true;
4442 }
4443 
4444 static bool
emit_vulkan_resource_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)4445 emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4446 {
4447    unsigned int binding = nir_intrinsic_binding(intr);
4448 
4449    bool const_index = nir_src_is_const(intr->src[0]);
4450    if (const_index) {
4451       binding += nir_src_as_const_value(intr->src[0])->u32;
4452    }
4453 
4454    const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding);
4455    if (!index_value)
4456       return false;
4457 
4458    if (!const_index) {
4459       const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4460       if (!offset)
4461          return false;
4462 
4463       index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0);
4464       if (!index_value)
4465          return false;
4466    }
4467 
4468    store_def(ctx, &intr->def, 0, index_value);
4469    store_def(ctx, &intr->def, 1, dxil_module_get_int32_const(&ctx->mod, 0));
4470    return true;
4471 }
4472 
4473 static bool
emit_load_vulkan_descriptor(struct ntd_context * ctx,nir_intrinsic_instr * intr)4474 emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4475 {
4476    nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]);
4477    const struct dxil_value *handle = NULL;
4478 
4479    enum dxil_resource_class resource_class;
4480    enum dxil_resource_kind resource_kind;
4481    switch (nir_intrinsic_desc_type(intr)) {
4482    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4483       resource_class = DXIL_RESOURCE_CLASS_CBV;
4484       resource_kind = DXIL_RESOURCE_KIND_CBUFFER;
4485       break;
4486    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4487       resource_class = DXIL_RESOURCE_CLASS_UAV;
4488       resource_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
4489       break;
4490    default:
4491       unreachable("unknown descriptor type");
4492       return false;
4493    }
4494 
4495    if (index && index->intrinsic == nir_intrinsic_vulkan_resource_index) {
4496       unsigned binding = nir_intrinsic_binding(index);
4497       unsigned space = nir_intrinsic_desc_set(index);
4498 
4499       /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */
4500       assert(space < 32);
4501 
4502       nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4503       if (resource_class == DXIL_RESOURCE_CLASS_UAV &&
4504           (var->data.access & ACCESS_NON_WRITEABLE))
4505          resource_class = DXIL_RESOURCE_CLASS_SRV;
4506 
4507       const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4508       if (!index_value)
4509          return false;
4510 
4511       handle = emit_createhandle_call_dynamic(ctx, resource_class, space, binding, index_value, false);
4512    } else {
4513       const struct dxil_value *heap_index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4514       if (!heap_index_value)
4515          return false;
4516       const struct dxil_value *unannotated_handle = emit_createhandle_heap(ctx, heap_index_value, false, true);
4517       const struct dxil_value *res_props = dxil_module_get_buffer_res_props_const(&ctx->mod, resource_class, resource_kind);
4518       if (!unannotated_handle || !res_props)
4519          return false;
4520       handle = emit_annotate_handle(ctx, unannotated_handle, res_props);
4521    }
4522 
4523    store_ssa_def(ctx, &intr->def, 0, handle);
4524    store_def(ctx, &intr->def, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32));
4525 
4526    return true;
4527 }
4528 
4529 static bool
emit_load_sample_pos_from_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4530 emit_load_sample_pos_from_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4531 {
4532    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.renderTargetGetSamplePosition", DXIL_NONE);
4533    if (!func)
4534       return false;
4535 
4536    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION);
4537    if (!opcode)
4538       return false;
4539 
4540    const struct dxil_value *args[] = {
4541       opcode,
4542       get_src(ctx, &intr->src[0], 0, nir_type_uint32),
4543    };
4544    if (!args[1])
4545       return false;
4546 
4547    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4548    if (!v)
4549       return false;
4550 
4551    for (unsigned i = 0; i < 2; ++i) {
4552       /* GL coords go from 0 -> 1, D3D from -0.5 -> 0.5 */
4553       const struct dxil_value *coord = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4554          dxil_emit_extractval(&ctx->mod, v, i),
4555          dxil_module_get_float_const(&ctx->mod, 0.5f), 0);
4556       store_def(ctx, &intr->def, i, coord);
4557    }
4558    return true;
4559 }
4560 
4561 static bool
emit_load_sample_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4562 emit_load_sample_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4563 {
4564    assert(ctx->mod.info.has_per_sample_input ||
4565           intr->intrinsic == nir_intrinsic_load_sample_id_no_per_sample);
4566 
4567    if (ctx->mod.info.has_per_sample_input)
4568       return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex",
4569                                                DXIL_INTR_SAMPLE_INDEX, nir_type_int);
4570 
4571    store_def(ctx, &intr->def, 0, dxil_module_get_int32_const(&ctx->mod, 0));
4572    return true;
4573 }
4574 
4575 static bool
emit_read_first_invocation(struct ntd_context * ctx,nir_intrinsic_instr * intr)4576 emit_read_first_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4577 {
4578    ctx->mod.feats.wave_ops = 1;
4579    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveReadLaneFirst",
4580                                                     get_overload(nir_type_uint, intr->def.bit_size));
4581    const struct dxil_value *args[] = {
4582       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_READ_LANE_FIRST),
4583       get_src(ctx, intr->src, 0, nir_type_uint),
4584    };
4585    if (!func || !args[0] || !args[1])
4586       return false;
4587 
4588    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4589    if (!ret)
4590       return false;
4591    store_def(ctx, &intr->def, 0, ret);
4592    return true;
4593 }
4594 
4595 static bool
emit_read_invocation(struct ntd_context * ctx,nir_intrinsic_instr * intr)4596 emit_read_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4597 {
4598    ctx->mod.feats.wave_ops = 1;
4599    bool quad = intr->intrinsic == nir_intrinsic_quad_broadcast;
4600    const struct dxil_func *func = dxil_get_function(&ctx->mod, quad ? "dx.op.quadReadLaneAt" : "dx.op.waveReadLaneAt",
4601                                                     get_overload(nir_type_uint, intr->def.bit_size));
4602    const struct dxil_value *args[] = {
4603       dxil_module_get_int32_const(&ctx->mod, quad ? DXIL_INTR_QUAD_READ_LANE_AT : DXIL_INTR_WAVE_READ_LANE_AT),
4604       get_src(ctx, &intr->src[0], 0, nir_type_uint),
4605       get_src(ctx, &intr->src[1], 0, nir_type_uint),
4606    };
4607    if (!func || !args[0] || !args[1] || !args[2])
4608       return false;
4609 
4610    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4611    if (!ret)
4612       return false;
4613    store_def(ctx, &intr->def, 0, ret);
4614    return true;
4615 }
4616 
4617 static bool
emit_vote_eq(struct ntd_context * ctx,nir_intrinsic_instr * intr)4618 emit_vote_eq(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4619 {
4620    ctx->mod.feats.wave_ops = 1;
4621    nir_alu_type alu_type = intr->intrinsic == nir_intrinsic_vote_ieq ? nir_type_int : nir_type_float;
4622    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveAllEqual",
4623                                                     get_overload(alu_type, intr->src[0].ssa->bit_size));
4624    const struct dxil_value *args[] = {
4625       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL),
4626       get_src(ctx, intr->src, 0, alu_type),
4627    };
4628    if (!func || !args[0] || !args[1])
4629       return false;
4630 
4631    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4632    if (!ret)
4633       return false;
4634    store_def(ctx, &intr->def, 0, ret);
4635    return true;
4636 }
4637 
4638 static bool
emit_vote(struct ntd_context * ctx,nir_intrinsic_instr * intr)4639 emit_vote(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4640 {
4641    ctx->mod.feats.wave_ops = 1;
4642    bool any = intr->intrinsic == nir_intrinsic_vote_any;
4643    const struct dxil_func *func = dxil_get_function(&ctx->mod,
4644                                                     any ? "dx.op.waveAnyTrue" : "dx.op.waveAllTrue",
4645                                                     DXIL_NONE);
4646    const struct dxil_value *args[] = {
4647       dxil_module_get_int32_const(&ctx->mod, any ? DXIL_INTR_WAVE_ANY_TRUE : DXIL_INTR_WAVE_ALL_TRUE),
4648       get_src(ctx, intr->src, 0, nir_type_bool),
4649    };
4650    if (!func || !args[0] || !args[1])
4651       return false;
4652 
4653    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4654    if (!ret)
4655       return false;
4656    store_def(ctx, &intr->def, 0, ret);
4657    return true;
4658 }
4659 
4660 static bool
emit_ballot(struct ntd_context * ctx,nir_intrinsic_instr * intr)4661 emit_ballot(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4662 {
4663    ctx->mod.feats.wave_ops = 1;
4664    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBallot", DXIL_NONE);
4665    const struct dxil_value *args[] = {
4666       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BALLOT),
4667       get_src(ctx, intr->src, 0, nir_type_bool),
4668    };
4669    if (!func || !args[0] || !args[1])
4670       return false;
4671 
4672    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4673    if (!ret)
4674       return false;
4675    for (uint32_t i = 0; i < 4; ++i)
4676       store_def(ctx, &intr->def, i, dxil_emit_extractval(&ctx->mod, ret, i));
4677    return true;
4678 }
4679 
4680 static bool
emit_quad_op(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_quad_op_kind op)4681 emit_quad_op(struct ntd_context *ctx, nir_intrinsic_instr *intr, enum dxil_quad_op_kind op)
4682 {
4683    ctx->mod.feats.wave_ops = 1;
4684    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quadOp",
4685                                                     get_overload(nir_type_uint, intr->def.bit_size));
4686    const struct dxil_value *args[] = {
4687       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_QUAD_OP),
4688       get_src(ctx, intr->src, 0, nir_type_uint),
4689       dxil_module_get_int8_const(&ctx->mod, op),
4690    };
4691    if (!func || !args[0] || !args[1] || !args[2])
4692       return false;
4693 
4694    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4695    if (!ret)
4696       return false;
4697    store_def(ctx, &intr->def, 0, ret);
4698    return true;
4699 }
4700 
4701 static enum dxil_wave_bit_op_kind
get_reduce_bit_op(nir_op op)4702 get_reduce_bit_op(nir_op op)
4703 {
4704    switch (op) {
4705    case nir_op_ior: return DXIL_WAVE_BIT_OP_OR;
4706    case nir_op_ixor: return DXIL_WAVE_BIT_OP_XOR;
4707    case nir_op_iand: return DXIL_WAVE_BIT_OP_AND;
4708    default:
4709       unreachable("Invalid bit op");
4710    }
4711 }
4712 
4713 static bool
emit_reduce_bitwise(struct ntd_context * ctx,nir_intrinsic_instr * intr)4714 emit_reduce_bitwise(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4715 {
4716    enum dxil_wave_bit_op_kind wave_bit_op = get_reduce_bit_op(nir_intrinsic_reduction_op(intr));
4717    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBit",
4718                                                     get_overload(nir_type_uint, intr->def.bit_size));
4719    const struct dxil_value *args[] = {
4720       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BIT),
4721       get_src(ctx, intr->src, 0, nir_type_uint),
4722       dxil_module_get_int8_const(&ctx->mod, wave_bit_op),
4723    };
4724    if (!func || !args[0] || !args[1] || !args[2])
4725       return false;
4726 
4727    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4728    if (!ret)
4729       return false;
4730    store_def(ctx, &intr->def, 0, ret);
4731    return true;
4732 }
4733 
4734 static enum dxil_wave_op_kind
get_reduce_op(nir_op op)4735 get_reduce_op(nir_op op)
4736 {
4737    switch (op) {
4738    case nir_op_iadd:
4739    case nir_op_fadd:
4740       return DXIL_WAVE_OP_SUM;
4741    case nir_op_imul:
4742    case nir_op_fmul:
4743       return DXIL_WAVE_OP_PRODUCT;
4744    case nir_op_imax:
4745    case nir_op_umax:
4746    case nir_op_fmax:
4747       return DXIL_WAVE_OP_MAX;
4748    case nir_op_imin:
4749    case nir_op_umin:
4750    case nir_op_fmin:
4751       return DXIL_WAVE_OP_MIN;
4752    default:
4753       unreachable("Unexpected reduction op");
4754    }
4755 }
4756 
4757 static bool
emit_reduce(struct ntd_context * ctx,nir_intrinsic_instr * intr)4758 emit_reduce(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4759 {
4760    ctx->mod.feats.wave_ops = 1;
4761    bool is_prefix = intr->intrinsic == nir_intrinsic_exclusive_scan;
4762    nir_op reduction_op = (nir_op)nir_intrinsic_reduction_op(intr);
4763    switch (reduction_op) {
4764    case nir_op_ior:
4765    case nir_op_ixor:
4766    case nir_op_iand:
4767       assert(!is_prefix);
4768       return emit_reduce_bitwise(ctx, intr);
4769    default:
4770       break;
4771    }
4772    nir_alu_type alu_type = nir_op_infos[reduction_op].input_types[0];
4773    enum dxil_wave_op_kind wave_op = get_reduce_op(reduction_op);
4774    const struct dxil_func *func = dxil_get_function(&ctx->mod, is_prefix ? "dx.op.wavePrefixOp" : "dx.op.waveActiveOp",
4775                                                     get_overload(alu_type, intr->def.bit_size));
4776    bool is_unsigned = alu_type == nir_type_uint;
4777    const struct dxil_value *args[] = {
4778       dxil_module_get_int32_const(&ctx->mod, is_prefix ? DXIL_INTR_WAVE_PREFIX_OP : DXIL_INTR_WAVE_ACTIVE_OP),
4779       get_src(ctx, intr->src, 0, alu_type),
4780       dxil_module_get_int8_const(&ctx->mod, wave_op),
4781       dxil_module_get_int8_const(&ctx->mod, is_unsigned),
4782    };
4783    if (!func || !args[0] || !args[1] || !args[2] || !args[3])
4784       return false;
4785 
4786    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4787    if (!ret)
4788       return false;
4789    store_def(ctx, &intr->def, 0, ret);
4790    return true;
4791 }
4792 
4793 static bool
emit_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4794 emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4795 {
4796    switch (intr->intrinsic) {
4797    case nir_intrinsic_load_global_invocation_id:
4798       return emit_load_global_invocation_id(ctx, intr);
4799    case nir_intrinsic_load_local_invocation_id:
4800       return emit_load_local_invocation_id(ctx, intr);
4801    case nir_intrinsic_load_local_invocation_index:
4802       return emit_load_local_invocation_index(ctx, intr);
4803    case nir_intrinsic_load_workgroup_id:
4804       return emit_load_local_workgroup_id(ctx, intr);
4805    case nir_intrinsic_load_ssbo:
4806       return emit_load_ssbo(ctx, intr);
4807    case nir_intrinsic_store_ssbo:
4808       return emit_store_ssbo(ctx, intr);
4809    case nir_intrinsic_load_deref:
4810       return emit_load_deref(ctx, intr);
4811    case nir_intrinsic_store_deref:
4812       return emit_store_deref(ctx, intr);
4813    case nir_intrinsic_deref_atomic:
4814       return emit_atomic_deref(ctx, intr);
4815    case nir_intrinsic_deref_atomic_swap:
4816       return emit_atomic_deref_swap(ctx, intr);
4817    case nir_intrinsic_load_ubo_vec4:
4818       return emit_load_ubo_vec4(ctx, intr);
4819    case nir_intrinsic_load_primitive_id:
4820       return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID",
4821                                                DXIL_INTR_PRIMITIVE_ID, nir_type_int);
4822    case nir_intrinsic_load_sample_id:
4823    case nir_intrinsic_load_sample_id_no_per_sample:
4824       return emit_load_sample_id(ctx, intr);
4825    case nir_intrinsic_load_invocation_id:
4826       switch (ctx->mod.shader_kind) {
4827       case DXIL_HULL_SHADER:
4828          return emit_load_unary_external_function(ctx, intr, "dx.op.outputControlPointID",
4829                                                   DXIL_INTR_OUTPUT_CONTROL_POINT_ID, nir_type_int);
4830       case DXIL_GEOMETRY_SHADER:
4831          return emit_load_unary_external_function(ctx, intr, "dx.op.gsInstanceID",
4832                                                   DXIL_INTR_GS_INSTANCE_ID, nir_type_int);
4833       default:
4834          unreachable("Unexpected shader kind for invocation ID");
4835       }
4836    case nir_intrinsic_load_view_index:
4837       ctx->mod.feats.view_id = true;
4838       return emit_load_unary_external_function(ctx, intr, "dx.op.viewID",
4839                                                DXIL_INTR_VIEW_ID, nir_type_int);
4840    case nir_intrinsic_load_sample_mask_in:
4841       return emit_load_sample_mask_in(ctx, intr);
4842    case nir_intrinsic_load_tess_coord:
4843       return emit_load_tess_coord(ctx, intr);
4844    case nir_intrinsic_terminate_if:
4845    case nir_intrinsic_demote_if:
4846       return emit_discard_if(ctx, intr);
4847    case nir_intrinsic_terminate:
4848    case nir_intrinsic_demote:
4849       return emit_discard(ctx);
4850    case nir_intrinsic_emit_vertex:
4851       return emit_emit_vertex(ctx, intr);
4852    case nir_intrinsic_end_primitive:
4853       return emit_end_primitive(ctx, intr);
4854    case nir_intrinsic_barrier:
4855       return emit_barrier(ctx, intr);
4856    case nir_intrinsic_ssbo_atomic:
4857       return emit_ssbo_atomic(ctx, intr);
4858    case nir_intrinsic_ssbo_atomic_swap:
4859       return emit_ssbo_atomic_comp_swap(ctx, intr);
4860    case nir_intrinsic_image_deref_atomic:
4861    case nir_intrinsic_image_atomic:
4862    case nir_intrinsic_bindless_image_atomic:
4863       return emit_image_atomic(ctx, intr);
4864    case nir_intrinsic_image_deref_atomic_swap:
4865    case nir_intrinsic_image_atomic_swap:
4866    case nir_intrinsic_bindless_image_atomic_swap:
4867       return emit_image_atomic_comp_swap(ctx, intr);
4868    case nir_intrinsic_image_store:
4869    case nir_intrinsic_image_deref_store:
4870    case nir_intrinsic_bindless_image_store:
4871       return emit_image_store(ctx, intr);
4872    case nir_intrinsic_image_load:
4873    case nir_intrinsic_image_deref_load:
4874    case nir_intrinsic_bindless_image_load:
4875       return emit_image_load(ctx, intr);
4876    case nir_intrinsic_image_size:
4877    case nir_intrinsic_image_deref_size:
4878    case nir_intrinsic_bindless_image_size:
4879       return emit_image_size(ctx, intr);
4880    case nir_intrinsic_get_ssbo_size:
4881       return emit_get_ssbo_size(ctx, intr);
4882    case nir_intrinsic_load_input:
4883    case nir_intrinsic_load_per_vertex_input:
4884    case nir_intrinsic_load_output:
4885    case nir_intrinsic_load_per_vertex_output:
4886       return emit_load_input_via_intrinsic(ctx, intr);
4887    case nir_intrinsic_store_output:
4888    case nir_intrinsic_store_per_vertex_output:
4889       return emit_store_output_via_intrinsic(ctx, intr);
4890 
4891    case nir_intrinsic_load_barycentric_at_offset:
4892    case nir_intrinsic_load_barycentric_at_sample:
4893    case nir_intrinsic_load_barycentric_centroid:
4894    case nir_intrinsic_load_barycentric_pixel:
4895       /* Emit nothing, we only support these as inputs to load_interpolated_input */
4896       return true;
4897    case nir_intrinsic_load_interpolated_input:
4898       return emit_load_interpolated_input(ctx, intr);
4899       break;
4900 
4901    case nir_intrinsic_vulkan_resource_index:
4902       return emit_vulkan_resource_index(ctx, intr);
4903    case nir_intrinsic_load_vulkan_descriptor:
4904       return emit_load_vulkan_descriptor(ctx, intr);
4905 
4906    case nir_intrinsic_load_sample_pos_from_id:
4907       return emit_load_sample_pos_from_id(ctx, intr);
4908 
4909    case nir_intrinsic_is_helper_invocation:
4910       return emit_load_unary_external_function(
4911          ctx, intr, "dx.op.isHelperLane", DXIL_INTR_IS_HELPER_LANE, nir_type_int);
4912    case nir_intrinsic_elect:
4913       ctx->mod.feats.wave_ops = 1;
4914       return emit_load_unary_external_function(
4915          ctx, intr, "dx.op.waveIsFirstLane", DXIL_INTR_WAVE_IS_FIRST_LANE, nir_type_invalid);
4916    case nir_intrinsic_load_subgroup_size:
4917       ctx->mod.feats.wave_ops = 1;
4918       return emit_load_unary_external_function(
4919          ctx, intr, "dx.op.waveGetLaneCount", DXIL_INTR_WAVE_GET_LANE_COUNT, nir_type_invalid);
4920    case nir_intrinsic_load_subgroup_invocation:
4921       ctx->mod.feats.wave_ops = 1;
4922       return emit_load_unary_external_function(
4923          ctx, intr, "dx.op.waveGetLaneIndex", DXIL_INTR_WAVE_GET_LANE_INDEX, nir_type_invalid);
4924 
4925    case nir_intrinsic_vote_feq:
4926    case nir_intrinsic_vote_ieq:
4927       return emit_vote_eq(ctx, intr);
4928    case nir_intrinsic_vote_any:
4929    case nir_intrinsic_vote_all:
4930       return emit_vote(ctx, intr);
4931 
4932    case nir_intrinsic_ballot:
4933       return emit_ballot(ctx, intr);
4934 
4935    case nir_intrinsic_read_first_invocation:
4936       return emit_read_first_invocation(ctx, intr);
4937    case nir_intrinsic_read_invocation:
4938    case nir_intrinsic_shuffle:
4939    case nir_intrinsic_quad_broadcast:
4940       return emit_read_invocation(ctx, intr);
4941 
4942    case nir_intrinsic_quad_swap_horizontal:
4943       return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_X);
4944    case nir_intrinsic_quad_swap_vertical:
4945       return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_Y);
4946    case nir_intrinsic_quad_swap_diagonal:
4947       return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_DIAGONAL);
4948 
4949    case nir_intrinsic_reduce:
4950    case nir_intrinsic_exclusive_scan:
4951       return emit_reduce(ctx, intr);
4952 
4953    case nir_intrinsic_ddx:
4954    case nir_intrinsic_ddx_coarse: return emit_derivative(ctx, intr, DXIL_INTR_DDX_COARSE);
4955    case nir_intrinsic_ddx_fine: return emit_derivative(ctx, intr, DXIL_INTR_DDX_FINE);
4956    case nir_intrinsic_ddy:
4957    case nir_intrinsic_ddy_coarse: return emit_derivative(ctx, intr, DXIL_INTR_DDY_COARSE);
4958    case nir_intrinsic_ddy_fine: return emit_derivative(ctx, intr, DXIL_INTR_DDY_FINE);
4959 
4960    case nir_intrinsic_load_first_vertex:
4961       ctx->mod.feats.extended_command_info = true;
4962       return emit_load_unary_external_function(ctx, intr, "dx.op.startVertexLocation",
4963                                                DXIL_INTR_START_VERTEX_LOCATION, nir_type_int);
4964    case nir_intrinsic_load_base_instance:
4965       ctx->mod.feats.extended_command_info = true;
4966       return emit_load_unary_external_function(ctx, intr, "dx.op.startInstanceLocation",
4967                                                DXIL_INTR_START_INSTANCE_LOCATION, nir_type_int);
4968 
4969    case nir_intrinsic_load_num_workgroups:
4970    case nir_intrinsic_load_workgroup_size:
4971    default:
4972       log_nir_instr_unsupported(
4973          ctx->logger, "Unimplemented intrinsic instruction", &intr->instr);
4974       return false;
4975    }
4976 }
4977 
4978 static const struct dxil_type *
dxil_type_for_const(struct ntd_context * ctx,nir_def * def)4979 dxil_type_for_const(struct ntd_context *ctx, nir_def *def)
4980 {
4981    if (BITSET_TEST(ctx->int_types, def->index) ||
4982        !BITSET_TEST(ctx->float_types, def->index))
4983       return dxil_module_get_int_type(&ctx->mod, def->bit_size);
4984    return dxil_module_get_float_type(&ctx->mod, def->bit_size);
4985 }
4986 
4987 static bool
emit_load_const(struct ntd_context * ctx,nir_load_const_instr * load_const)4988 emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const)
4989 {
4990    for (uint32_t i = 0; i < load_const->def.num_components; ++i) {
4991       const struct dxil_type *type = dxil_type_for_const(ctx, &load_const->def);
4992       store_ssa_def(ctx, &load_const->def, i, get_value_for_const(&ctx->mod, &load_const->value[i], type));
4993    }
4994    return true;
4995 }
4996 
4997 static bool
emit_deref(struct ntd_context * ctx,nir_deref_instr * instr)4998 emit_deref(struct ntd_context* ctx, nir_deref_instr* instr)
4999 {
5000    /* There's two possible reasons we might be walking through derefs:
5001     * 1. Computing an index to be used for a texture/sampler/image binding, which
5002     *    can only do array indexing and should compute the indices along the way with
5003     *    array-of-array sizes.
5004     * 2. Storing an index to be used in a GEP for access to a variable.
5005     */
5006    nir_variable *var = nir_deref_instr_get_variable(instr);
5007    assert(var);
5008 
5009    bool is_aoa_size =
5010       glsl_type_is_sampler(glsl_without_array(var->type)) ||
5011       glsl_type_is_image(glsl_without_array(var->type)) ||
5012       glsl_type_is_texture(glsl_without_array(var->type));
5013 
5014    if (!is_aoa_size) {
5015       /* Just store the values, we'll use these to build a GEP in the load or store */
5016       switch (instr->deref_type) {
5017       case nir_deref_type_var:
5018          store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, 0, instr->def.bit_size));
5019          return true;
5020       case nir_deref_type_array:
5021          store_def(ctx, &instr->def, 0, get_src(ctx, &instr->arr.index, 0, nir_type_int));
5022          return true;
5023       case nir_deref_type_struct:
5024          store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, instr->strct.index, 32));
5025          return true;
5026       default:
5027          unreachable("Other deref types not supported");
5028       }
5029    }
5030 
5031    /* In the CL environment, there's nothing to emit. Any references to
5032     * derefs will emit the necessary logic to handle scratch/shared GEP addressing
5033     */
5034    if (ctx->opts->environment == DXIL_ENVIRONMENT_CL)
5035       return true;
5036 
5037    const struct glsl_type *type = instr->type;
5038    const struct dxil_value *binding;
5039    unsigned binding_val = ctx->opts->environment == DXIL_ENVIRONMENT_GL ?
5040       var->data.driver_location : var->data.binding;
5041 
5042    if (instr->deref_type == nir_deref_type_var) {
5043       binding = dxil_module_get_int32_const(&ctx->mod, binding_val);
5044    } else {
5045       const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32);
5046       const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32);
5047       if (!base || !offset)
5048          return false;
5049 
5050       if (glsl_type_is_array(instr->type)) {
5051          offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_MUL, offset,
5052             dxil_module_get_int32_const(&ctx->mod, glsl_get_aoa_size(instr->type)), 0);
5053          if (!offset)
5054             return false;
5055       }
5056       binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0);
5057    }
5058 
5059    if (!binding)
5060       return false;
5061 
5062    /* Haven't finished chasing the deref chain yet, just store the value */
5063    if (glsl_type_is_array(type)) {
5064       store_def(ctx, &instr->def, 0, binding);
5065       return true;
5066    }
5067 
5068    assert(glsl_type_is_sampler(type) || glsl_type_is_image(type) || glsl_type_is_texture(type));
5069    enum dxil_resource_class res_class;
5070    if (glsl_type_is_image(type))
5071       res_class = DXIL_RESOURCE_CLASS_UAV;
5072    else if (glsl_type_is_sampler(type))
5073       res_class = DXIL_RESOURCE_CLASS_SAMPLER;
5074    else
5075       res_class = DXIL_RESOURCE_CLASS_SRV;
5076 
5077    unsigned descriptor_set = ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN ?
5078       var->data.descriptor_set : (glsl_type_is_image(type) ? 1 : 0);
5079    const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, res_class,
5080       descriptor_set, binding_val, binding, false);
5081    if (!handle)
5082       return false;
5083 
5084    store_ssa_def(ctx, &instr->def, 0, handle);
5085    return true;
5086 }
5087 
5088 static bool
emit_cond_branch(struct ntd_context * ctx,const struct dxil_value * cond,int true_block,int false_block)5089 emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond,
5090                  int true_block, int false_block)
5091 {
5092    assert(cond);
5093    assert(true_block >= 0);
5094    assert(false_block >= 0);
5095    return dxil_emit_branch(&ctx->mod, cond, true_block, false_block);
5096 }
5097 
5098 static bool
emit_branch(struct ntd_context * ctx,int block)5099 emit_branch(struct ntd_context *ctx, int block)
5100 {
5101    assert(block >= 0);
5102    return dxil_emit_branch(&ctx->mod, NULL, block, -1);
5103 }
5104 
5105 static bool
emit_jump(struct ntd_context * ctx,nir_jump_instr * instr)5106 emit_jump(struct ntd_context *ctx, nir_jump_instr *instr)
5107 {
5108    switch (instr->type) {
5109    case nir_jump_break:
5110    case nir_jump_continue:
5111       assert(instr->instr.block->successors[0]);
5112       assert(!instr->instr.block->successors[1]);
5113       return emit_branch(ctx, instr->instr.block->successors[0]->index);
5114 
5115    default:
5116       unreachable("Unsupported jump type\n");
5117    }
5118 }
5119 
5120 struct phi_block {
5121    unsigned num_components;
5122    struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS];
5123 };
5124 
5125 static bool
emit_phi(struct ntd_context * ctx,nir_phi_instr * instr)5126 emit_phi(struct ntd_context *ctx, nir_phi_instr *instr)
5127 {
5128    const struct dxil_type *type = NULL;
5129    nir_foreach_phi_src(src, instr) {
5130       /* All sources have the same type, just use the first one */
5131       type = dxil_value_get_type(ctx->defs[src->src.ssa->index].chans[0]);
5132       break;
5133    }
5134 
5135    struct phi_block *vphi = ralloc(ctx->phis, struct phi_block);
5136    vphi->num_components = instr->def.num_components;
5137 
5138    for (unsigned i = 0; i < vphi->num_components; ++i) {
5139       struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type);
5140       if (!phi)
5141          return false;
5142       store_ssa_def(ctx, &instr->def, i, dxil_instr_get_return_value(phi));
5143    }
5144    _mesa_hash_table_insert(ctx->phis, instr, vphi);
5145    return true;
5146 }
5147 
5148 static bool
fixup_phi(struct ntd_context * ctx,nir_phi_instr * instr,struct phi_block * vphi)5149 fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr,
5150           struct phi_block *vphi)
5151 {
5152    const struct dxil_value *values[16];
5153    unsigned blocks[16];
5154    for (unsigned i = 0; i < vphi->num_components; ++i) {
5155       size_t num_incoming = 0;
5156       nir_foreach_phi_src(src, instr) {
5157          const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i);
5158          values[num_incoming] = val;
5159          blocks[num_incoming] = src->pred->index;
5160          ++num_incoming;
5161          if (num_incoming == ARRAY_SIZE(values)) {
5162             if (!dxil_phi_add_incoming(vphi->comp[i], values, blocks,
5163                                        num_incoming))
5164                return false;
5165             num_incoming = 0;
5166          }
5167       }
5168       if (num_incoming > 0 && !dxil_phi_add_incoming(vphi->comp[i], values,
5169                                                      blocks, num_incoming))
5170          return false;
5171    }
5172    return true;
5173 }
5174 
5175 static unsigned
get_n_src(struct ntd_context * ctx,const struct dxil_value ** values,unsigned max_components,nir_tex_src * src,nir_alu_type type)5176 get_n_src(struct ntd_context *ctx, const struct dxil_value **values,
5177           unsigned max_components, nir_tex_src *src, nir_alu_type type)
5178 {
5179    unsigned num_components = nir_src_num_components(src->src);
5180    unsigned i = 0;
5181 
5182    assert(num_components <= max_components);
5183 
5184    for (i = 0; i < num_components; ++i) {
5185       values[i] = get_src(ctx, &src->src, i, type);
5186       if (!values[i])
5187          return 0;
5188    }
5189 
5190    return num_components;
5191 }
5192 
5193 #define PAD_SRC(ctx, array, components, undef) \
5194    for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \
5195       array[i] = undef; \
5196    }
5197 
5198 static const struct dxil_value *
emit_sample(struct ntd_context * ctx,struct texop_parameters * params)5199 emit_sample(struct ntd_context *ctx, struct texop_parameters *params)
5200 {
5201    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload);
5202    if (!func)
5203       return NULL;
5204 
5205    const struct dxil_value *args[11] = {
5206       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE),
5207       params->tex, params->sampler,
5208       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5209       params->offset[0], params->offset[1], params->offset[2],
5210       params->min_lod
5211    };
5212 
5213    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5214 }
5215 
5216 static const struct dxil_value *
emit_sample_bias(struct ntd_context * ctx,struct texop_parameters * params)5217 emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params)
5218 {
5219    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload);
5220    if (!func)
5221       return NULL;
5222 
5223    assert(params->bias != NULL);
5224 
5225    const struct dxil_value *args[12] = {
5226       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS),
5227       params->tex, params->sampler,
5228       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5229       params->offset[0], params->offset[1], params->offset[2],
5230       params->bias, params->min_lod
5231    };
5232 
5233    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5234 }
5235 
5236 static const struct dxil_value *
emit_sample_level(struct ntd_context * ctx,struct texop_parameters * params)5237 emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params)
5238 {
5239    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload);
5240    if (!func)
5241       return NULL;
5242 
5243    assert(params->lod_or_sample != NULL);
5244 
5245    const struct dxil_value *args[11] = {
5246       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL),
5247       params->tex, params->sampler,
5248       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5249       params->offset[0], params->offset[1], params->offset[2],
5250       params->lod_or_sample
5251    };
5252 
5253    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5254 }
5255 
5256 static const struct dxil_value *
emit_sample_cmp(struct ntd_context * ctx,struct texop_parameters * params)5257 emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params)
5258 {
5259    const struct dxil_func *func;
5260    enum dxil_intr opcode;
5261 
5262    func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32);
5263    opcode = DXIL_INTR_SAMPLE_CMP;
5264 
5265    if (!func)
5266       return NULL;
5267 
5268    const struct dxil_value *args[12] = {
5269       dxil_module_get_int32_const(&ctx->mod, opcode),
5270       params->tex, params->sampler,
5271       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5272       params->offset[0], params->offset[1], params->offset[2],
5273       params->cmp, params->min_lod
5274    };
5275 
5276    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5277 }
5278 
5279 static const struct dxil_value *
emit_sample_cmp_level_zero(struct ntd_context * ctx,struct texop_parameters * params)5280 emit_sample_cmp_level_zero(struct ntd_context *ctx, struct texop_parameters *params)
5281 {
5282    const struct dxil_func *func;
5283    enum dxil_intr opcode;
5284 
5285    func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32);
5286    opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO;
5287 
5288    if (!func)
5289       return NULL;
5290 
5291    const struct dxil_value *args[11] = {
5292       dxil_module_get_int32_const(&ctx->mod, opcode),
5293       params->tex, params->sampler,
5294       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5295       params->offset[0], params->offset[1], params->offset[2],
5296       params->cmp
5297    };
5298 
5299    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5300 }
5301 
5302 static const struct dxil_value *
emit_sample_cmp_level(struct ntd_context * ctx,struct texop_parameters * params)5303 emit_sample_cmp_level(struct ntd_context *ctx, struct texop_parameters *params)
5304 {
5305    ctx->mod.feats.advanced_texture_ops = true;
5306    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevel", params->overload);
5307    if (!func)
5308       return NULL;
5309 
5310    assert(params->lod_or_sample != NULL);
5311 
5312    const struct dxil_value *args[12] = {
5313       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_LEVEL),
5314       params->tex, params->sampler,
5315       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5316       params->offset[0], params->offset[1], params->offset[2],
5317       params->cmp, params->lod_or_sample
5318    };
5319 
5320    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5321 }
5322 
5323 static const struct dxil_value *
emit_sample_cmp_bias(struct ntd_context * ctx,struct texop_parameters * params)5324 emit_sample_cmp_bias(struct ntd_context *ctx, struct texop_parameters *params)
5325 {
5326    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpBias", params->overload);
5327    if (!func)
5328       return NULL;
5329 
5330    assert(params->bias != NULL);
5331    ctx->mod.feats.sample_cmp_bias_gradient = 1;
5332 
5333    const struct dxil_value *args[13] = {
5334       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_BIAS),
5335       params->tex, params->sampler,
5336       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5337       params->offset[0], params->offset[1], params->offset[2],
5338       params->cmp, params->bias, params->min_lod
5339    };
5340 
5341    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5342 }
5343 
5344 static const struct dxil_value *
emit_sample_grad(struct ntd_context * ctx,struct texop_parameters * params)5345 emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params)
5346 {
5347    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload);
5348    if (!func)
5349       return false;
5350 
5351    const struct dxil_value *args[17] = {
5352       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD),
5353       params->tex, params->sampler,
5354       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5355       params->offset[0], params->offset[1], params->offset[2],
5356       params->dx[0], params->dx[1], params->dx[2],
5357       params->dy[0], params->dy[1], params->dy[2],
5358       params->min_lod
5359    };
5360 
5361    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5362 }
5363 
5364 static const struct dxil_value *
emit_sample_cmp_grad(struct ntd_context * ctx,struct texop_parameters * params)5365 emit_sample_cmp_grad(struct ntd_context *ctx, struct texop_parameters *params)
5366 {
5367    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpGrad", params->overload);
5368    if (!func)
5369       return false;
5370 
5371    ctx->mod.feats.sample_cmp_bias_gradient = 1;
5372 
5373    const struct dxil_value *args[18] = {
5374       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_GRAD),
5375       params->tex, params->sampler,
5376       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5377       params->offset[0], params->offset[1], params->offset[2],
5378       params->cmp,
5379       params->dx[0], params->dx[1], params->dx[2],
5380       params->dy[0], params->dy[1], params->dy[2],
5381       params->min_lod
5382    };
5383 
5384    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5385 }
5386 
5387 static const struct dxil_value *
emit_texel_fetch(struct ntd_context * ctx,struct texop_parameters * params)5388 emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params)
5389 {
5390    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload);
5391    if (!func)
5392       return false;
5393 
5394    if (!params->lod_or_sample)
5395       params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32));
5396 
5397    const struct dxil_value *args[] = {
5398       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD),
5399       params->tex,
5400       params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2],
5401       params->offset[0], params->offset[1], params->offset[2]
5402    };
5403 
5404    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5405 }
5406 
5407 static const struct dxil_value *
emit_texture_lod(struct ntd_context * ctx,struct texop_parameters * params,bool clamped)5408 emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params, bool clamped)
5409 {
5410    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32);
5411    if (!func)
5412       return false;
5413 
5414    const struct dxil_value *args[] = {
5415       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD),
5416       params->tex,
5417       params->sampler,
5418       params->coord[0],
5419       params->coord[1],
5420       params->coord[2],
5421       dxil_module_get_int1_const(&ctx->mod, clamped ? 1 : 0)
5422    };
5423 
5424    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5425 }
5426 
5427 static const struct dxil_value *
emit_texture_gather(struct ntd_context * ctx,struct texop_parameters * params,unsigned component)5428 emit_texture_gather(struct ntd_context *ctx, struct texop_parameters *params, unsigned component)
5429 {
5430    const struct dxil_func *func = dxil_get_function(&ctx->mod,
5431       params->cmp ? "dx.op.textureGatherCmp" : "dx.op.textureGather", params->overload);
5432    if (!func)
5433       return false;
5434 
5435    const struct dxil_value *args[] = {
5436       dxil_module_get_int32_const(&ctx->mod, params->cmp ?
5437          DXIL_INTR_TEXTURE_GATHER_CMP : DXIL_INTR_TEXTURE_GATHER),
5438       params->tex,
5439       params->sampler,
5440       params->coord[0],
5441       params->coord[1],
5442       params->coord[2],
5443       params->coord[3],
5444       params->offset[0],
5445       params->offset[1],
5446       dxil_module_get_int32_const(&ctx->mod, component),
5447       params->cmp
5448    };
5449 
5450    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args) - (params->cmp ? 0 : 1));
5451 }
5452 
5453 static bool
emit_tex(struct ntd_context * ctx,nir_tex_instr * instr)5454 emit_tex(struct ntd_context *ctx, nir_tex_instr *instr)
5455 {
5456    struct texop_parameters params;
5457    memset(&params, 0, sizeof(struct texop_parameters));
5458    if (ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN) {
5459       params.tex = ctx->srv_handles[instr->texture_index];
5460       params.sampler = ctx->sampler_handles[instr->sampler_index];
5461    }
5462 
5463    const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
5464    const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32);
5465    const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
5466    const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type);
5467 
5468    unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0;
5469    params.overload = get_overload(instr->dest_type, 32);
5470 
5471    bool lod_is_zero = false;
5472    for (unsigned i = 0; i < instr->num_srcs; i++) {
5473       nir_alu_type type = nir_tex_instr_src_type(instr, i);
5474 
5475       switch (instr->src[i].src_type) {
5476       case nir_tex_src_coord:
5477          coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord),
5478                                       &instr->src[i], type);
5479          if (!coord_components)
5480             return false;
5481          break;
5482 
5483       case nir_tex_src_offset:
5484          offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset),
5485                                        &instr->src[i],  nir_type_int);
5486          if (!offset_components)
5487             return false;
5488 
5489          /* Dynamic offsets were only allowed with gather, until "advanced texture ops" in SM7 */
5490          if (!nir_src_is_const(instr->src[i].src) && instr->op != nir_texop_tg4)
5491             ctx->mod.feats.advanced_texture_ops = true;
5492          break;
5493 
5494       case nir_tex_src_bias:
5495          assert(instr->op == nir_texop_txb);
5496          assert(nir_src_num_components(instr->src[i].src) == 1);
5497          params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
5498          if (!params.bias)
5499             return false;
5500          break;
5501 
5502       case nir_tex_src_lod:
5503          assert(nir_src_num_components(instr->src[i].src) == 1);
5504          if (instr->op == nir_texop_txf_ms) {
5505             assert(nir_src_as_int(instr->src[i].src) == 0);
5506             break;
5507          }
5508 
5509          /* Buffers don't have a LOD */
5510          if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
5511             params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type);
5512          else
5513             params.lod_or_sample = int_undef;
5514          if (!params.lod_or_sample)
5515             return false;
5516 
5517          if (nir_src_is_const(instr->src[i].src) && nir_src_as_float(instr->src[i].src) == 0.0f)
5518             lod_is_zero = true;
5519          break;
5520 
5521       case nir_tex_src_min_lod:
5522          assert(nir_src_num_components(instr->src[i].src) == 1);
5523          params.min_lod = get_src(ctx, &instr->src[i].src, 0, type);
5524          if (!params.min_lod)
5525             return false;
5526          break;
5527 
5528       case nir_tex_src_comparator:
5529          assert(nir_src_num_components(instr->src[i].src) == 1);
5530          params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
5531          if (!params.cmp)
5532             return false;
5533          break;
5534 
5535       case nir_tex_src_ddx:
5536          dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx),
5537                                    &instr->src[i], nir_type_float);
5538          if (!dx_components)
5539             return false;
5540          break;
5541 
5542       case nir_tex_src_ddy:
5543          dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy),
5544                                    &instr->src[i], nir_type_float);
5545          if (!dy_components)
5546             return false;
5547          break;
5548 
5549       case nir_tex_src_ms_index:
5550          params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int);
5551          if (!params.lod_or_sample)
5552             return false;
5553          break;
5554 
5555       case nir_tex_src_texture_deref:
5556          assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
5557          params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
5558          break;
5559 
5560       case nir_tex_src_sampler_deref:
5561          assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
5562          params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
5563          break;
5564 
5565       case nir_tex_src_texture_offset:
5566          params.tex = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SRV,
5567             0, instr->texture_index,
5568             dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
5569                get_src(ctx, &instr->src[i].src, 0, nir_type_uint),
5570                dxil_module_get_int32_const(&ctx->mod, instr->texture_index), 0),
5571             instr->texture_non_uniform);
5572          break;
5573 
5574       case nir_tex_src_sampler_offset:
5575          if (nir_tex_instr_need_sampler(instr)) {
5576             params.sampler = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SAMPLER,
5577                0, instr->sampler_index,
5578                dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
5579                   get_src(ctx, &instr->src[i].src, 0, nir_type_uint),
5580                   dxil_module_get_int32_const(&ctx->mod, instr->sampler_index), 0),
5581                instr->sampler_non_uniform);
5582          }
5583          break;
5584 
5585       case nir_tex_src_texture_handle:
5586          params.tex = create_srv_handle(ctx, instr, &instr->src[i].src);
5587          break;
5588 
5589       case nir_tex_src_sampler_handle:
5590          if (nir_tex_instr_need_sampler(instr))
5591             params.sampler = create_sampler_handle(ctx, instr->is_shadow, &instr->src[i].src);
5592          break;
5593 
5594       case nir_tex_src_projector:
5595          unreachable("Texture projector should have been lowered");
5596 
5597       default:
5598          fprintf(stderr, "texture source: %d\n", instr->src[i].src_type);
5599          unreachable("unknown texture source");
5600       }
5601    }
5602 
5603    assert(params.tex != NULL);
5604    assert(instr->op == nir_texop_txf ||
5605           instr->op == nir_texop_txf_ms ||
5606           nir_tex_instr_is_query(instr) ||
5607           params.sampler != NULL);
5608 
5609    PAD_SRC(ctx, params.coord, coord_components, float_undef);
5610    PAD_SRC(ctx, params.offset, offset_components, int_undef);
5611    if (!params.min_lod) params.min_lod = float_undef;
5612 
5613    const struct dxil_value *sample = NULL;
5614    switch (instr->op) {
5615    case nir_texop_txb:
5616       if (params.cmp != NULL && ctx->mod.minor_version >= 8)
5617          sample = emit_sample_cmp_bias(ctx, &params);
5618       else
5619          sample = emit_sample_bias(ctx, &params);
5620       break;
5621 
5622    case nir_texop_tex:
5623       if (params.cmp != NULL) {
5624          sample = emit_sample_cmp(ctx, &params);
5625          break;
5626       } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {
5627          sample = emit_sample(ctx, &params);
5628          break;
5629       }
5630       params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0);
5631       lod_is_zero = true;
5632       FALLTHROUGH;
5633    case nir_texop_txl:
5634       if (lod_is_zero && params.cmp != NULL && ctx->mod.minor_version < 7) {
5635          /* Prior to SM 6.7, if the level is constant 0.0, ignore the LOD argument,
5636           * so level-less DXIL instructions are used. This is needed to avoid emitting
5637           * dx.op.sampleCmpLevel, which would not be available.
5638           */
5639          sample = emit_sample_cmp_level_zero(ctx, &params);
5640       } else {
5641          if (params.cmp != NULL)
5642             sample = emit_sample_cmp_level(ctx, &params);
5643          else
5644             sample = emit_sample_level(ctx, &params);
5645       }
5646       break;
5647 
5648    case nir_texop_txd:
5649       PAD_SRC(ctx, params.dx, dx_components, float_undef);
5650       PAD_SRC(ctx, params.dy, dy_components,float_undef);
5651       if (params.cmp != NULL && ctx->mod.minor_version >= 8)
5652          sample = emit_sample_cmp_grad(ctx, &params);
5653       else
5654          sample = emit_sample_grad(ctx, &params);
5655       break;
5656 
5657    case nir_texop_txf:
5658    case nir_texop_txf_ms:
5659       if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
5660          params.coord[1] = int_undef;
5661          sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload);
5662       } else {
5663          PAD_SRC(ctx, params.coord, coord_components, int_undef);
5664          sample = emit_texel_fetch(ctx, &params);
5665       }
5666       break;
5667 
5668    case nir_texop_txs:
5669       sample = emit_texture_size(ctx, &params);
5670       break;
5671 
5672    case nir_texop_tg4:
5673       sample = emit_texture_gather(ctx, &params, instr->component);
5674       break;
5675 
5676    case nir_texop_lod:
5677       sample = emit_texture_lod(ctx, &params, true);
5678       store_def(ctx, &instr->def, 0, sample);
5679       sample = emit_texture_lod(ctx, &params, false);
5680       store_def(ctx, &instr->def, 1, sample);
5681       return true;
5682 
5683    case nir_texop_query_levels: {
5684       params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32);
5685       sample = emit_texture_size(ctx, &params);
5686       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5687       store_def(ctx, &instr->def, 0, retval);
5688       return true;
5689    }
5690 
5691    case nir_texop_texture_samples: {
5692       params.lod_or_sample = int_undef;
5693       sample = emit_texture_size(ctx, &params);
5694       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5695       store_def(ctx, &instr->def, 0, retval);
5696       return true;
5697    }
5698 
5699    default:
5700       fprintf(stderr, "texture op: %d\n", instr->op);
5701       unreachable("unknown texture op");
5702    }
5703 
5704    if (!sample)
5705       return false;
5706 
5707    for (unsigned i = 0; i < instr->def.num_components; ++i) {
5708       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i);
5709       store_def(ctx, &instr->def, i, retval);
5710    }
5711 
5712    return true;
5713 }
5714 
5715 static bool
emit_undefined(struct ntd_context * ctx,nir_undef_instr * undef)5716 emit_undefined(struct ntd_context *ctx, nir_undef_instr *undef)
5717 {
5718    for (unsigned i = 0; i < undef->def.num_components; ++i)
5719       store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0));
5720    return true;
5721 }
5722 
emit_instr(struct ntd_context * ctx,struct nir_instr * instr)5723 static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr)
5724 {
5725    switch (instr->type) {
5726    case nir_instr_type_alu:
5727       return emit_alu(ctx, nir_instr_as_alu(instr));
5728    case nir_instr_type_intrinsic:
5729       return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
5730    case nir_instr_type_load_const:
5731       return emit_load_const(ctx, nir_instr_as_load_const(instr));
5732    case nir_instr_type_deref:
5733       return emit_deref(ctx, nir_instr_as_deref(instr));
5734    case nir_instr_type_jump:
5735       return emit_jump(ctx, nir_instr_as_jump(instr));
5736    case nir_instr_type_phi:
5737       return emit_phi(ctx, nir_instr_as_phi(instr));
5738    case nir_instr_type_tex:
5739       return emit_tex(ctx, nir_instr_as_tex(instr));
5740    case nir_instr_type_undef:
5741       return emit_undefined(ctx, nir_instr_as_undef(instr));
5742    default:
5743       log_nir_instr_unsupported(ctx->logger, "Unimplemented instruction type",
5744                                 instr);
5745       return false;
5746    }
5747 }
5748 
5749 
5750 static bool
emit_block(struct ntd_context * ctx,struct nir_block * block)5751 emit_block(struct ntd_context *ctx, struct nir_block *block)
5752 {
5753    assert(block->index < ctx->mod.cur_emitting_func->num_basic_block_ids);
5754    ctx->mod.cur_emitting_func->basic_block_ids[block->index] = ctx->mod.cur_emitting_func->curr_block;
5755 
5756    nir_foreach_instr(instr, block) {
5757       TRACE_CONVERSION(instr);
5758 
5759       if (!emit_instr(ctx, instr))  {
5760          return false;
5761       }
5762    }
5763    return true;
5764 }
5765 
5766 static bool
5767 emit_cf_list(struct ntd_context *ctx, struct exec_list *list);
5768 
5769 static bool
emit_if(struct ntd_context * ctx,struct nir_if * if_stmt)5770 emit_if(struct ntd_context *ctx, struct nir_if *if_stmt)
5771 {
5772    assert(nir_src_num_components(if_stmt->condition) == 1);
5773    const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0,
5774                                            nir_type_bool);
5775    if (!cond)
5776       return false;
5777 
5778    /* prepare blocks */
5779    nir_block *then_block = nir_if_first_then_block(if_stmt);
5780    assert(nir_if_last_then_block(if_stmt)->successors[0]);
5781    assert(!nir_if_last_then_block(if_stmt)->successors[1]);
5782    int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index;
5783 
5784    nir_block *else_block = NULL;
5785    int else_succ = -1;
5786    if (!exec_list_is_empty(&if_stmt->else_list)) {
5787       else_block = nir_if_first_else_block(if_stmt);
5788       assert(nir_if_last_else_block(if_stmt)->successors[0]);
5789       assert(!nir_if_last_else_block(if_stmt)->successors[1]);
5790       else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index;
5791    }
5792 
5793    if (!emit_cond_branch(ctx, cond, then_block->index,
5794                          else_block ? else_block->index : then_succ))
5795       return false;
5796 
5797    /* handle then-block */
5798    if (!emit_cf_list(ctx, &if_stmt->then_list) ||
5799        (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) &&
5800         !emit_branch(ctx, then_succ)))
5801       return false;
5802 
5803    if (else_block) {
5804       /* handle else-block */
5805       if (!emit_cf_list(ctx, &if_stmt->else_list) ||
5806           (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) &&
5807            !emit_branch(ctx, else_succ)))
5808          return false;
5809    }
5810 
5811    return true;
5812 }
5813 
5814 static bool
emit_loop(struct ntd_context * ctx,nir_loop * loop)5815 emit_loop(struct ntd_context *ctx, nir_loop *loop)
5816 {
5817    assert(!nir_loop_has_continue_construct(loop));
5818    nir_block *first_block = nir_loop_first_block(loop);
5819    nir_block *last_block = nir_loop_last_block(loop);
5820 
5821    assert(last_block->successors[0]);
5822    assert(!last_block->successors[1]);
5823 
5824    if (!emit_branch(ctx, first_block->index))
5825       return false;
5826 
5827    if (!emit_cf_list(ctx, &loop->body))
5828       return false;
5829 
5830    /* If the loop's last block doesn't explicitly jump somewhere, then there's
5831     * an implicit continue that should take it back to the first loop block
5832     */
5833    nir_instr *last_instr = nir_block_last_instr(last_block);
5834    if ((!last_instr || last_instr->type != nir_instr_type_jump) &&
5835        !emit_branch(ctx, first_block->index))
5836       return false;
5837 
5838    return true;
5839 }
5840 
5841 static bool
emit_cf_list(struct ntd_context * ctx,struct exec_list * list)5842 emit_cf_list(struct ntd_context *ctx, struct exec_list *list)
5843 {
5844    foreach_list_typed(nir_cf_node, node, node, list) {
5845       switch (node->type) {
5846       case nir_cf_node_block:
5847          if (!emit_block(ctx, nir_cf_node_as_block(node)))
5848             return false;
5849          break;
5850 
5851       case nir_cf_node_if:
5852          if (!emit_if(ctx, nir_cf_node_as_if(node)))
5853             return false;
5854          break;
5855 
5856       case nir_cf_node_loop:
5857          if (!emit_loop(ctx, nir_cf_node_as_loop(node)))
5858             return false;
5859          break;
5860 
5861       default:
5862          unreachable("unsupported cf-list node");
5863          break;
5864       }
5865    }
5866    return true;
5867 }
5868 
5869 static void
insert_sorted_by_binding(struct exec_list * var_list,nir_variable * new_var)5870 insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var)
5871 {
5872    nir_foreach_variable_in_list(var, var_list) {
5873       if (var->data.binding > new_var->data.binding) {
5874          exec_node_insert_node_before(&var->node, &new_var->node);
5875          return;
5876       }
5877    }
5878    exec_list_push_tail(var_list, &new_var->node);
5879 }
5880 
5881 
5882 static void
sort_uniforms_by_binding_and_remove_structs(nir_shader * s)5883 sort_uniforms_by_binding_and_remove_structs(nir_shader *s)
5884 {
5885    struct exec_list new_list;
5886    exec_list_make_empty(&new_list);
5887 
5888    nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) {
5889       exec_node_remove(&var->node);
5890       const struct glsl_type *type = glsl_without_array(var->type);
5891       if (!glsl_type_is_struct(type))
5892          insert_sorted_by_binding(&new_list, var);
5893    }
5894    exec_list_append(&s->variables, &new_list);
5895 }
5896 
5897 static bool
emit_cbvs(struct ntd_context * ctx)5898 emit_cbvs(struct ntd_context *ctx)
5899 {
5900    if (ctx->opts->environment != DXIL_ENVIRONMENT_GL) {
5901       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) {
5902          if (!emit_ubo_var(ctx, var))
5903             return false;
5904       }
5905    } else {
5906       if (ctx->shader->info.num_ubos) {
5907          const unsigned ubo_size = 16384 /*4096 vec4's*/;
5908          uint array_base = ctx->shader->info.first_ubo_is_default_ubo ? 1 : 0;
5909          bool has_ubo0 = ctx->shader->num_uniforms > 0 && ctx->shader->info.first_ubo_is_default_ubo;
5910          bool has_state_vars = ctx->opts->last_ubo_is_not_arrayed;
5911          unsigned ubo1_array_size = ctx->shader->info.num_ubos - array_base -
5912             (has_state_vars ? 1 : 0);
5913 
5914          if (has_ubo0 &&
5915              !emit_cbv(ctx, 0, 0, ubo_size, 1, "__ubo_uniforms"))
5916             return false;
5917          if (ubo1_array_size &&
5918              !emit_cbv(ctx, array_base, 0, ubo_size, ubo1_array_size, "__ubos"))
5919             return false;
5920          if (has_state_vars &&
5921              !emit_cbv(ctx, ctx->shader->info.num_ubos - 1, 0, ubo_size, 1, "__ubo_state_vars"))
5922             return false;
5923       }
5924    }
5925 
5926    return true;
5927 }
5928 
5929 static bool
emit_scratch(struct ntd_context * ctx,nir_function_impl * impl)5930 emit_scratch(struct ntd_context *ctx, nir_function_impl *impl)
5931 {
5932    uint32_t index = 0;
5933    nir_foreach_function_temp_variable(var, impl)
5934       var->data.driver_location = index++;
5935 
5936    if (ctx->scratchvars)
5937       ralloc_free((void *)ctx->scratchvars);
5938 
5939    ctx->scratchvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
5940 
5941    nir_foreach_function_temp_variable(var, impl) {
5942       const struct dxil_type *type = get_type_for_glsl_type(&ctx->mod, var->type);
5943       const struct dxil_value *length = dxil_module_get_int32_const(&ctx->mod, 1);
5944       const struct dxil_value *ptr = dxil_emit_alloca(&ctx->mod, type, length, 16);
5945       if (!ptr)
5946          return false;
5947 
5948       ctx->scratchvars[var->data.driver_location] = ptr;
5949    }
5950 
5951    return true;
5952 }
5953 
5954 static bool
emit_function(struct ntd_context * ctx,nir_function * func,nir_function_impl * impl)5955 emit_function(struct ntd_context *ctx, nir_function *func, nir_function_impl *impl)
5956 {
5957    assert(func->num_params == 0);
5958    nir_metadata_require(impl, nir_metadata_block_index);
5959 
5960    const char *attr_keys[2] = { NULL };
5961    const char *attr_values[2] = { NULL };
5962    if (ctx->shader->info.float_controls_execution_mode &
5963        (FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 | FLOAT_CONTROLS_DENORM_PRESERVE_FP32))
5964       attr_keys[0] = "fp32-denorm-mode";
5965    if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32)
5966       attr_values[0] = "ftz";
5967    else if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32)
5968       attr_values[0] = "preserve";
5969 
5970    const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod);
5971    const struct dxil_type *func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0);
5972    struct dxil_func_def *func_def = dxil_add_function_def(&ctx->mod, func->name, func_type, impl->num_blocks, attr_keys, attr_values);
5973    if (!func_def)
5974       return false;
5975 
5976    if (func->is_entrypoint)
5977       ctx->main_func_def = func_def;
5978    else if (func == ctx->tess_ctrl_patch_constant_func)
5979       ctx->tess_ctrl_patch_constant_func_def = func_def;
5980 
5981    ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def, impl->ssa_alloc);
5982    ctx->float_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
5983    ctx->int_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
5984    if (!ctx->defs || !ctx->float_types || !ctx->int_types)
5985       return false;
5986    ctx->num_defs = impl->ssa_alloc;
5987 
5988    ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
5989    if (!ctx->phis)
5990       return false;
5991 
5992    nir_gather_types(impl, ctx->float_types, ctx->int_types);
5993 
5994    if (!emit_scratch(ctx, impl))
5995       return false;
5996 
5997    if (!emit_static_indexing_handles(ctx))
5998       return false;
5999 
6000    if (!emit_cf_list(ctx, &impl->body))
6001       return false;
6002 
6003    hash_table_foreach(ctx->phis, entry) {
6004       if (!fixup_phi(ctx, (nir_phi_instr *)entry->key,
6005                      (struct phi_block *)entry->data))
6006          return false;
6007    }
6008 
6009    if (!dxil_emit_ret_void(&ctx->mod))
6010       return false;
6011 
6012    ralloc_free(ctx->defs);
6013    ctx->defs = NULL;
6014    _mesa_hash_table_destroy(ctx->phis, NULL);
6015    return true;
6016 }
6017 
6018 static bool
emit_module(struct ntd_context * ctx,const struct nir_to_dxil_options * opts)6019 emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts)
6020 {
6021    /* The validator forces us to emit resources in a specific order:
6022     * CBVs, Samplers, SRVs, UAVs. While we are at it also remove
6023     * stale struct uniforms, they are lowered but might not have been removed */
6024    sort_uniforms_by_binding_and_remove_structs(ctx->shader);
6025 
6026    /* CBVs */
6027    if (!emit_cbvs(ctx))
6028       return false;
6029 
6030    /* Samplers */
6031    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
6032       unsigned count = glsl_type_get_sampler_count(var->type);
6033       assert(count == 0 || glsl_type_is_bare_sampler(glsl_without_array(var->type)));
6034       if (count > 0 && !emit_sampler(ctx, var, count))
6035          return false;
6036    }
6037 
6038    /* SRVs */
6039    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
6040       unsigned count = glsl_type_get_texture_count(var->type);
6041       assert(count == 0 || glsl_type_is_texture(glsl_without_array(var->type)));
6042       if (count > 0 && !emit_srv(ctx, var, count))
6043          return false;
6044    }
6045 
6046    /* Handle read-only SSBOs as SRVs */
6047    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6048       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
6049          if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) {
6050             unsigned count = 1;
6051             if (glsl_type_is_array(var->type))
6052                count = glsl_get_length(var->type);
6053             if (!emit_srv(ctx, var, count))
6054                return false;
6055          }
6056       }
6057    }
6058 
6059    if (!emit_shared_vars(ctx))
6060       return false;
6061    if (!emit_global_consts(ctx))
6062       return false;
6063 
6064    /* UAVs */
6065    if (ctx->shader->info.stage == MESA_SHADER_KERNEL) {
6066       if (!emit_globals(ctx, opts->num_kernel_globals))
6067          return false;
6068 
6069    } else if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6070       /* Handle read/write SSBOs as UAVs */
6071       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
6072          if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) {
6073             unsigned count = 1;
6074             if (glsl_type_is_array(var->type))
6075                count = glsl_get_length(var->type);
6076             if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set,
6077                         count, DXIL_COMP_TYPE_INVALID, 1,
6078                         DXIL_RESOURCE_KIND_RAW_BUFFER, var->data.access, var->name))
6079                return false;
6080 
6081          }
6082       }
6083    } else {
6084       for (unsigned i = 0; i < ctx->shader->info.num_ssbos; ++i) {
6085          char name[64];
6086          snprintf(name, sizeof(name), "__ssbo%d", i);
6087          if (!emit_uav(ctx, i, 0, 1, DXIL_COMP_TYPE_INVALID, 1,
6088                        DXIL_RESOURCE_KIND_RAW_BUFFER, 0, name))
6089             return false;
6090       }
6091       /* To work around a WARP bug, bind these descriptors a second time in descriptor
6092        * space 2. Space 0 will be used for static indexing, while space 2 will be used
6093        * for dynamic indexing. Space 0 will be individual SSBOs in the DXIL shader, while
6094        * space 2 will be a single array.
6095        */
6096       if (ctx->shader->info.num_ssbos &&
6097           !emit_uav(ctx, 0, 2, ctx->shader->info.num_ssbos, DXIL_COMP_TYPE_INVALID, 1,
6098                     DXIL_RESOURCE_KIND_RAW_BUFFER, 0, "__ssbo_dynamic"))
6099          return false;
6100    }
6101 
6102    nir_foreach_image_variable(var, ctx->shader) {
6103       if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type)))
6104          return false;
6105    }
6106 
6107    ctx->mod.info.has_per_sample_input =
6108       BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
6109       ctx->shader->info.fs.uses_sample_shading ||
6110       ctx->shader->info.fs.uses_sample_qualifier;
6111    if (!ctx->mod.info.has_per_sample_input && ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
6112       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in | nir_var_system_value) {
6113          if (var->data.sample) {
6114             ctx->mod.info.has_per_sample_input = true;
6115             break;
6116          }
6117       }
6118    }
6119 
6120    /* From the Vulkan spec 1.3.238, section 15.8:
6121     * When Sample Shading is enabled, the x and y components of FragCoord reflect the location
6122     * of one of the samples corresponding to the shader invocation.
6123     *
6124     * In other words, if the fragment shader is executing per-sample, then the position variable
6125     * should always be per-sample,
6126     *
6127     * Also:
6128     * The Centroid interpolation decoration is ignored, but allowed, on FragCoord.
6129     */
6130    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6131       nir_variable *pos_var = nir_find_variable_with_location(ctx->shader, nir_var_shader_in, VARYING_SLOT_POS);
6132       if (pos_var) {
6133          if (ctx->mod.info.has_per_sample_input)
6134             pos_var->data.sample = true;
6135          pos_var->data.centroid = false;
6136       }
6137    }
6138 
6139    unsigned input_clip_size = ctx->mod.shader_kind == DXIL_PIXEL_SHADER ?
6140       ctx->shader->info.clip_distance_array_size : ctx->opts->input_clip_size;
6141    preprocess_signatures(&ctx->mod, ctx->shader, input_clip_size);
6142 
6143    nir_foreach_function_with_impl(func, impl, ctx->shader) {
6144       if (!emit_function(ctx, func, impl))
6145          return false;
6146    }
6147 
6148    if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
6149       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) {
6150          if (var->data.location == FRAG_RESULT_STENCIL) {
6151             ctx->mod.feats.stencil_ref = true;
6152          }
6153       }
6154    } else if (ctx->shader->info.stage == MESA_SHADER_VERTEX ||
6155               ctx->shader->info.stage == MESA_SHADER_TESS_EVAL) {
6156       if (ctx->shader->info.outputs_written &
6157           (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
6158          ctx->mod.feats.array_layer_from_vs_or_ds = true;
6159    } else if (ctx->shader->info.stage == MESA_SHADER_GEOMETRY ||
6160               ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
6161       if (ctx->shader->info.inputs_read &
6162           (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
6163          ctx->mod.feats.array_layer_from_vs_or_ds = true;
6164    }
6165 
6166    if (ctx->mod.feats.native_low_precision && ctx->mod.minor_version < 2) {
6167       ctx->logger->log(ctx->logger->priv,
6168                        "Shader uses 16bit, which requires shader model 6.2, but 6.2 is unsupported\n");
6169       return false;
6170    }
6171 
6172    return emit_metadata(ctx) &&
6173           dxil_emit_module(&ctx->mod);
6174 }
6175 
6176 static unsigned int
get_dxil_shader_kind(struct nir_shader * s)6177 get_dxil_shader_kind(struct nir_shader *s)
6178 {
6179    switch (s->info.stage) {
6180    case MESA_SHADER_VERTEX:
6181       return DXIL_VERTEX_SHADER;
6182    case MESA_SHADER_TESS_CTRL:
6183       return DXIL_HULL_SHADER;
6184    case MESA_SHADER_TESS_EVAL:
6185       return DXIL_DOMAIN_SHADER;
6186    case MESA_SHADER_GEOMETRY:
6187       return DXIL_GEOMETRY_SHADER;
6188    case MESA_SHADER_FRAGMENT:
6189       return DXIL_PIXEL_SHADER;
6190    case MESA_SHADER_KERNEL:
6191    case MESA_SHADER_COMPUTE:
6192       return DXIL_COMPUTE_SHADER;
6193    default:
6194       unreachable("unknown shader stage in nir_to_dxil");
6195       return DXIL_COMPUTE_SHADER;
6196    }
6197 }
6198 
6199 static unsigned
lower_bit_size_callback(const nir_instr * instr,void * data)6200 lower_bit_size_callback(const nir_instr* instr, void *data)
6201 {
6202    if (instr->type != nir_instr_type_alu)
6203       return 0;
6204    nir_alu_instr *alu = nir_instr_as_alu(instr);
6205 
6206    if (nir_op_infos[alu->op].is_conversion)
6207       return 0;
6208 
6209    if (nir_op_is_vec_or_mov(alu->op))
6210       return 0;
6211 
6212    unsigned num_inputs = nir_op_infos[alu->op].num_inputs;
6213    const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data;
6214    unsigned min_bit_size = opts->lower_int16 ? 32 : 16;
6215 
6216    unsigned ret = 0;
6217    for (unsigned i = 0; i < num_inputs; i++) {
6218       unsigned bit_size = nir_src_bit_size(alu->src[i].src);
6219       if (bit_size != 1 && bit_size < min_bit_size)
6220          ret = min_bit_size;
6221    }
6222 
6223    return ret;
6224 }
6225 
6226 static bool
vectorize_filter(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)6227 vectorize_filter(
6228    unsigned align_mul,
6229    unsigned align_offset,
6230    unsigned bit_size,
6231    unsigned num_components,
6232    nir_intrinsic_instr *low, nir_intrinsic_instr *high,
6233    void *data)
6234 {
6235    return util_is_power_of_two_nonzero(num_components);
6236 }
6237 
6238 struct lower_mem_bit_sizes_data {
6239    const nir_shader_compiler_options *nir_options;
6240    const struct nir_to_dxil_options *dxil_options;
6241 };
6242 
6243 static nir_mem_access_size_align
lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size_in,uint32_t align_mul,uint32_t align_offset,bool offset_is_const,const void * cb_data)6244 lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin,
6245                               uint8_t bytes,
6246                               uint8_t bit_size_in,
6247                               uint32_t align_mul,
6248                               uint32_t align_offset,
6249                               bool offset_is_const,
6250                               const void *cb_data)
6251 {
6252    const struct lower_mem_bit_sizes_data *data = cb_data;
6253    unsigned max_bit_size = 32;
6254    unsigned min_bit_size = data->dxil_options->lower_int16 ? 32 : 16;
6255    unsigned closest_bit_size = MAX2(min_bit_size, MIN2(max_bit_size, bit_size_in));
6256    if (intrin == nir_intrinsic_load_ubo) {
6257       /* UBO loads can be done at whatever (supported) bit size, but require 16 byte
6258        * alignment and can load up to 16 bytes per instruction. However this pass requires
6259        * loading 16 bytes of data to get 16-byte alignment. We're going to run lower_ubo_vec4
6260        * which can deal with unaligned vec4s, so for this pass let's just deal with bit size
6261        * and total size restrictions. */
6262       return (nir_mem_access_size_align) {
6263          .align = closest_bit_size / 8,
6264          .bit_size = closest_bit_size,
6265          .num_components = DIV_ROUND_UP(MIN2(bytes, 16) * 8, closest_bit_size),
6266       };
6267    }
6268 
6269    assert(intrin == nir_intrinsic_load_ssbo || intrin == nir_intrinsic_store_ssbo);
6270    uint32_t align = nir_combined_align(align_mul, align_offset);
6271    if (align < min_bit_size / 8) {
6272       /* Unaligned load/store, use the minimum bit size, up to 4 components */
6273       unsigned ideal_num_components = intrin == nir_intrinsic_load_ssbo ?
6274          DIV_ROUND_UP(bytes * 8, min_bit_size) :
6275          (32 / min_bit_size);
6276       return (nir_mem_access_size_align) {
6277          .align = min_bit_size / 8,
6278          .bit_size = min_bit_size,
6279          .num_components = MIN2(4, ideal_num_components),
6280       };
6281    }
6282 
6283    /* Increase/decrease bit size to try to get closer to the requested byte size/align */
6284    unsigned bit_size = closest_bit_size;
6285    unsigned target = MIN2(bytes, align);
6286    while (target < bit_size / 8 && bit_size > min_bit_size)
6287       bit_size /= 2;
6288    while (target > bit_size / 8 * 4 && bit_size < max_bit_size)
6289       bit_size *= 2;
6290 
6291    /* This is the best we can do */
6292    unsigned num_components = intrin == nir_intrinsic_load_ssbo ?
6293       DIV_ROUND_UP(bytes * 8, bit_size) :
6294       MAX2(1, (bytes * 8 / bit_size));
6295    return (nir_mem_access_size_align) {
6296       .align = bit_size / 8,
6297       .bit_size = bit_size,
6298       .num_components = MIN2(4, num_components),
6299    };
6300 }
6301 
6302 static void
optimize_nir(struct nir_shader * s,const struct nir_to_dxil_options * opts)6303 optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
6304 {
6305    bool progress;
6306    do {
6307       progress = false;
6308       NIR_PASS_V(s, nir_lower_vars_to_ssa);
6309       NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, 4);
6310       NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
6311       NIR_PASS(progress, s, nir_copy_prop);
6312       NIR_PASS(progress, s, nir_opt_copy_prop_vars);
6313       NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts);
6314       NIR_PASS(progress, s, dxil_nir_lower_8bit_conv);
6315       if (opts->lower_int16)
6316          NIR_PASS(progress, s, dxil_nir_lower_16bit_conv);
6317       NIR_PASS(progress, s, nir_opt_remove_phis);
6318       NIR_PASS(progress, s, nir_opt_dce);
6319       NIR_PASS(progress, s, nir_opt_if,
6320                nir_opt_if_optimize_phi_true_false | nir_opt_if_avoid_64bit_phis);
6321       NIR_PASS(progress, s, nir_opt_dead_cf);
6322       NIR_PASS(progress, s, nir_opt_cse);
6323       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
6324       NIR_PASS(progress, s, nir_opt_algebraic);
6325       NIR_PASS(progress, s, dxil_nir_algebraic);
6326       if (s->options->lower_int64_options)
6327          NIR_PASS(progress, s, nir_lower_int64);
6328       NIR_PASS(progress, s, nir_lower_alu);
6329       NIR_PASS(progress, s, nir_opt_constant_folding);
6330       NIR_PASS(progress, s, nir_opt_undef);
6331       NIR_PASS(progress, s, nir_opt_deref);
6332       NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);
6333       NIR_PASS(progress, s, nir_lower_64bit_phis);
6334       NIR_PASS(progress, s, nir_lower_phis_to_scalar, true);
6335       NIR_PASS(progress, s, nir_opt_loop_unroll);
6336       NIR_PASS(progress, s, nir_lower_pack);
6337       NIR_PASS(progress, s, dxil_nir_remove_oob_array_accesses);
6338       NIR_PASS_V(s, nir_lower_system_values);
6339    } while (progress);
6340 
6341    do {
6342       progress = false;
6343       NIR_PASS(progress, s, nir_opt_algebraic_late);
6344    } while (progress);
6345 
6346    NIR_PASS_V(s, nir_lower_undef_to_zero);
6347 }
6348 
6349 static
dxil_fill_validation_state(struct ntd_context * ctx,struct dxil_validation_state * state)6350 void dxil_fill_validation_state(struct ntd_context *ctx,
6351                                 struct dxil_validation_state *state)
6352 {
6353    unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
6354       sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
6355    state->num_resources = ctx->resources.size / resource_element_size;
6356    state->resources.v0 = (struct dxil_resource_v0*)ctx->resources.data;
6357    if (ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_4) {
6358       state->state.psv1.psv0.max_expected_wave_lane_count = ctx->shader->info.subgroup_size;
6359       state->state.psv1.psv0.min_expected_wave_lane_count = ctx->shader->info.subgroup_size;
6360    } else {
6361       state->state.psv1.psv0.max_expected_wave_lane_count = UINT_MAX;
6362    }
6363    state->state.psv1.shader_stage = (uint8_t)ctx->mod.shader_kind;
6364    state->state.psv1.uses_view_id = (uint8_t)ctx->mod.feats.view_id;
6365    state->state.psv1.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs;
6366    state->state.psv1.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs;
6367    state->state.psv1.sig_patch_const_or_prim_elements = (uint8_t)ctx->mod.num_sig_patch_consts;
6368 
6369    switch (ctx->mod.shader_kind) {
6370    case DXIL_VERTEX_SHADER:
6371       state->state.psv1.psv0.vs.output_position_present = ctx->mod.info.has_out_position;
6372       break;
6373    case DXIL_PIXEL_SHADER:
6374       /* TODO: handle depth outputs */
6375       state->state.psv1.psv0.ps.depth_output = ctx->mod.info.has_out_depth;
6376       state->state.psv1.psv0.ps.sample_frequency =
6377          ctx->mod.info.has_per_sample_input;
6378       break;
6379    case DXIL_COMPUTE_SHADER:
6380       state->state.num_threads_x = MAX2(ctx->shader->info.workgroup_size[0], 1);
6381       state->state.num_threads_y = MAX2(ctx->shader->info.workgroup_size[1], 1);
6382       state->state.num_threads_z = MAX2(ctx->shader->info.workgroup_size[2], 1);
6383       break;
6384    case DXIL_GEOMETRY_SHADER:
6385       state->state.psv1.max_vertex_count = ctx->shader->info.gs.vertices_out;
6386       state->state.psv1.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive);
6387       state->state.psv1.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive);
6388       state->state.psv1.psv0.gs.output_stream_mask = MAX2(ctx->shader->info.gs.active_stream_mask, 1);
6389       state->state.psv1.psv0.gs.output_position_present = ctx->mod.info.has_out_position;
6390       break;
6391    case DXIL_HULL_SHADER:
6392       state->state.psv1.psv0.hs.input_control_point_count = ctx->tess_input_control_point_count;
6393       state->state.psv1.psv0.hs.output_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
6394       state->state.psv1.psv0.hs.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
6395       state->state.psv1.psv0.hs.tessellator_output_primitive = get_tessellator_output_primitive(&ctx->shader->info);
6396       state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
6397       break;
6398    case DXIL_DOMAIN_SHADER:
6399       state->state.psv1.psv0.ds.input_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
6400       state->state.psv1.psv0.ds.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
6401       state->state.psv1.psv0.ds.output_position_present = ctx->mod.info.has_out_position;
6402       state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
6403       break;
6404    default:
6405       assert(0 && "Shader type not (yet) supported");
6406    }
6407 }
6408 
6409 static nir_variable *
add_sysvalue(struct ntd_context * ctx,uint8_t value,char * name,int driver_location)6410 add_sysvalue(struct ntd_context *ctx,
6411               uint8_t value, char *name,
6412               int driver_location)
6413 {
6414 
6415    nir_variable *var = rzalloc(ctx->shader, nir_variable);
6416    if (!var)
6417       return NULL;
6418    var->data.driver_location = driver_location;
6419    var->data.location = value;
6420    var->type = glsl_uint_type();
6421    var->name = name;
6422    var->data.mode = nir_var_system_value;
6423    var->data.interpolation = INTERP_MODE_FLAT;
6424    return var;
6425 }
6426 
6427 static bool
append_input_or_sysvalue(struct ntd_context * ctx,int input_loc,int sv_slot,char * name,int driver_location)6428 append_input_or_sysvalue(struct ntd_context *ctx,
6429                          int input_loc,  int sv_slot,
6430                          char *name, int driver_location)
6431 {
6432    if (input_loc >= 0) {
6433       /* Check inputs whether a variable is available the corresponds
6434        * to the sysvalue */
6435       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
6436          if (var->data.location == input_loc) {
6437             ctx->system_value[sv_slot] = var;
6438             return true;
6439          }
6440       }
6441    }
6442 
6443    ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location);
6444    if (!ctx->system_value[sv_slot])
6445       return false;
6446 
6447    nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]);
6448    return true;
6449 }
6450 
6451 struct sysvalue_name {
6452    gl_system_value value;
6453    int slot;
6454    char *name;
6455    gl_shader_stage only_in_shader;
6456 } possible_sysvalues[] = {
6457    {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID", MESA_SHADER_NONE},
6458    {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID", MESA_SHADER_NONE},
6459    {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace", MESA_SHADER_NONE},
6460    {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID", MESA_SHADER_GEOMETRY},
6461    {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex", MESA_SHADER_NONE},
6462 };
6463 
6464 static bool
allocate_sysvalues(struct ntd_context * ctx)6465 allocate_sysvalues(struct ntd_context *ctx)
6466 {
6467    unsigned driver_location = 0;
6468    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in)
6469       driver_location = MAX2(driver_location, var->data.driver_location + 1);
6470    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value)
6471       driver_location = MAX2(driver_location, var->data.driver_location + 1);
6472 
6473    if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
6474        !BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID)) {
6475       bool need_sample_id = ctx->shader->info.fs.uses_sample_shading;
6476 
6477       /* "var->data.sample = true" sometimes just mean, "I want per-sample
6478        * shading", which explains why we can end up with vars having flat
6479        * interpolation with the per-sample bit set. If there's only such
6480        * type of variables, we need to tell DXIL that we read SV_SampleIndex
6481        * to make DXIL validation happy.
6482        */
6483       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
6484          bool var_can_be_sample_rate = !var->data.centroid && var->data.interpolation != INTERP_MODE_FLAT;
6485          /* If there's an input that will actually force sample-rate shading, then we don't
6486           * need SV_SampleIndex. */
6487          if (var->data.sample && var_can_be_sample_rate) {
6488             need_sample_id = false;
6489             break;
6490          }
6491          /* If there's an input that wants to be sample-rate, but can't be, then we might
6492           * need SV_SampleIndex. */
6493          if (var->data.sample && !var_can_be_sample_rate)
6494             need_sample_id = true;
6495       }
6496 
6497       if (need_sample_id)
6498          BITSET_SET(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
6499    }
6500 
6501    for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) {
6502       struct sysvalue_name *info = &possible_sysvalues[i];
6503       if (info->only_in_shader != MESA_SHADER_NONE &&
6504           info->only_in_shader != ctx->shader->info.stage)
6505          continue;
6506       if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) {
6507          if (!append_input_or_sysvalue(ctx, info->slot,
6508                                        info->value, info->name,
6509                                        driver_location++))
6510             return false;
6511       }
6512    }
6513    return true;
6514 }
6515 
6516 static int
type_size_vec4(const struct glsl_type * type,bool bindless)6517 type_size_vec4(const struct glsl_type *type, bool bindless)
6518 {
6519    return glsl_count_attribute_slots(type, false);
6520 }
6521 
6522 static const unsigned dxil_validator_min_capable_version = DXIL_VALIDATOR_1_4;
6523 static const unsigned dxil_validator_max_capable_version = DXIL_VALIDATOR_1_8;
6524 static const unsigned dxil_min_shader_model = SHADER_MODEL_6_0;
6525 static const unsigned dxil_max_shader_model = SHADER_MODEL_6_8;
6526 
6527 bool
nir_to_dxil(struct nir_shader * s,const struct nir_to_dxil_options * opts,const struct dxil_logger * logger,struct blob * blob)6528 nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
6529             const struct dxil_logger *logger, struct blob *blob)
6530 {
6531    assert(opts);
6532    bool retval = true;
6533    debug_dxil = (int)debug_get_option_debug_dxil();
6534    blob_init(blob);
6535 
6536    if (opts->shader_model_max < dxil_min_shader_model) {
6537       debug_printf("D3D12: cannot support emitting shader models lower than %d.%d\n",
6538                    dxil_min_shader_model >> 16,
6539                    dxil_min_shader_model & 0xffff);
6540       return false;
6541    }
6542 
6543    if (opts->shader_model_max > dxil_max_shader_model) {
6544       debug_printf("D3D12: cannot support emitting higher than shader model %d.%d\n",
6545                    dxil_max_shader_model >> 16,
6546                    dxil_max_shader_model & 0xffff);
6547       return false;
6548    }
6549 
6550    if (opts->validator_version_max != NO_DXIL_VALIDATION &&
6551        opts->validator_version_max < dxil_validator_min_capable_version) {
6552       debug_printf("D3D12: Invalid validator version %d.%d, must be 1.4 or greater\n",
6553          opts->validator_version_max >> 16,
6554          opts->validator_version_max & 0xffff);
6555       return false;
6556    }
6557 
6558    /* If no validation, write a blob as if it was going to be validated by the newest understood validator.
6559     * Same if the validator is newer than we know how to write for.
6560     */
6561    uint32_t validator_version =
6562       opts->validator_version_max == NO_DXIL_VALIDATION ||
6563       opts->validator_version_max > dxil_validator_max_capable_version ?
6564       dxil_validator_max_capable_version : opts->validator_version_max;
6565 
6566    struct ntd_context *ctx = calloc(1, sizeof(*ctx));
6567    if (!ctx)
6568       return false;
6569 
6570    ctx->opts = opts;
6571    ctx->shader = s;
6572    ctx->logger = logger ? logger : &default_logger;
6573 
6574    ctx->ralloc_ctx = ralloc_context(NULL);
6575    if (!ctx->ralloc_ctx) {
6576       retval = false;
6577       goto out;
6578    }
6579 
6580    util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx);
6581    util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx);
6582    util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx);
6583    util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx);
6584    util_dynarray_init(&ctx->resources, ctx->ralloc_ctx);
6585    dxil_module_init(&ctx->mod, ctx->ralloc_ctx);
6586    ctx->mod.shader_kind = get_dxil_shader_kind(s);
6587    ctx->mod.major_version = 6;
6588    /* Use the highest shader model that's supported and can be validated */
6589    ctx->mod.minor_version =
6590       MIN2(opts->shader_model_max & 0xffff, validator_version & 0xffff);
6591    ctx->mod.major_validator = validator_version >> 16;
6592    ctx->mod.minor_validator = validator_version & 0xffff;
6593 
6594    if (s->info.stage <= MESA_SHADER_FRAGMENT) {
6595       uint64_t in_mask =
6596          s->info.stage == MESA_SHADER_VERTEX ?
6597          0 : (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
6598       uint64_t out_mask =
6599          s->info.stage == MESA_SHADER_FRAGMENT ?
6600          ((1ull << FRAG_RESULT_STENCIL) | (1ull << FRAG_RESULT_SAMPLE_MASK)) :
6601          (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
6602 
6603       NIR_PASS_V(s, dxil_nir_fix_io_uint_type, in_mask, out_mask);
6604    }
6605 
6606    NIR_PASS_V(s, dxil_nir_lower_fquantize2f16);
6607    NIR_PASS_V(s, nir_lower_frexp);
6608    NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true);
6609    NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32);
6610    NIR_PASS_V(s, dxil_nir_ensure_position_writes);
6611    NIR_PASS_V(s, dxil_nir_lower_system_values);
6612    NIR_PASS_V(s, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_system_value | nir_var_shader_out, NULL, NULL);
6613 
6614    /* Do a round of optimization to try to vectorize loads/stores. Otherwise the addresses used for loads
6615     * might be too opaque for the pass to see that they're next to each other. */
6616    optimize_nir(s, opts);
6617 
6618 /* Vectorize UBO/SSBO accesses aggressively. This can help increase alignment to enable us to do better
6619     * chunking of loads and stores after lowering bit sizes. Ignore load/store size limitations here, we'll
6620     * address them with lower_mem_access_bit_sizes */
6621    nir_load_store_vectorize_options vectorize_opts = {
6622       .callback = vectorize_filter,
6623       .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
6624    };
6625    NIR_PASS_V(s, nir_opt_load_store_vectorize, &vectorize_opts);
6626 
6627    /* Now that they're bloated to the max, address bit size restrictions and overall size limitations for
6628     * a single load/store op. */
6629    struct lower_mem_bit_sizes_data mem_size_data = { s->options, opts };
6630    nir_lower_mem_access_bit_sizes_options mem_size_options = {
6631       .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
6632       .callback = lower_mem_access_bit_sizes_cb,
6633       .may_lower_unaligned_stores_to_atomics = true,
6634       .cb_data = &mem_size_data
6635    };
6636    NIR_PASS_V(s, nir_lower_mem_access_bit_sizes, &mem_size_options);
6637 
6638    /* Lastly, conver byte-address UBO loads to vec-addressed. This pass can also deal with selecting sub-
6639     * components from the load and dealing with vec-straddling loads. */
6640    NIR_PASS_V(s, nir_lower_ubo_vec4);
6641 
6642    if (opts->shader_model_max < SHADER_MODEL_6_6) {
6643       /* In a later pass, load_helper_invocation will be lowered to sample mask based fallback,
6644        * so both load- and is- will be emulated eventually.
6645        */
6646       NIR_PASS_V(s, nir_lower_is_helper_invocation);
6647    }
6648 
6649    if (ctx->mod.shader_kind == DXIL_HULL_SHADER)
6650       NIR_PASS_V(s, dxil_nir_split_tess_ctrl, &ctx->tess_ctrl_patch_constant_func);
6651 
6652    if (ctx->mod.shader_kind == DXIL_HULL_SHADER ||
6653        ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
6654       /* Make sure any derefs are gone after lower_io before updating tess level vars */
6655       NIR_PASS_V(s, nir_opt_dce);
6656       NIR_PASS_V(s, dxil_nir_fixup_tess_level_for_domain);
6657    }
6658 
6659    optimize_nir(s, opts);
6660 
6661    NIR_PASS_V(s, nir_remove_dead_variables,
6662               nir_var_function_temp | nir_var_mem_constant | nir_var_mem_shared, NULL);
6663 
6664    if (!allocate_sysvalues(ctx))
6665       return false;
6666 
6667    NIR_PASS_V(s, dxil_nir_lower_sysval_to_load_input, ctx->system_value);
6668    NIR_PASS_V(s, nir_opt_dce);
6669 
6670    /* This needs to be after any copy prop is done to prevent these movs from being erased */
6671    NIR_PASS_V(s, dxil_nir_move_consts);
6672    NIR_PASS_V(s, nir_opt_dce);
6673 
6674    NIR_PASS_V(s, dxil_nir_guess_image_formats);
6675 
6676    if (debug_dxil & DXIL_DEBUG_VERBOSE)
6677       nir_print_shader(s, stderr);
6678 
6679    if (!emit_module(ctx, opts)) {
6680       debug_printf("D3D12: dxil_container_add_module failed\n");
6681       retval = false;
6682       goto out;
6683    }
6684 
6685    if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) {
6686       struct dxil_dumper *dumper = dxil_dump_create();
6687       dxil_dump_module(dumper, &ctx->mod);
6688       fprintf(stderr, "\n");
6689       dxil_dump_buf_to_file(dumper, stderr);
6690       fprintf(stderr, "\n\n");
6691       dxil_dump_free(dumper);
6692    }
6693 
6694    struct dxil_container container;
6695    dxil_container_init(&container);
6696    /* Native low precision disables min-precision */
6697    if (ctx->mod.feats.native_low_precision)
6698       ctx->mod.feats.min_precision = false;
6699    if (!dxil_container_add_features(&container, &ctx->mod.feats)) {
6700       debug_printf("D3D12: dxil_container_add_features failed\n");
6701       retval = false;
6702       goto out;
6703    }
6704 
6705    if (!dxil_container_add_io_signature(&container,
6706                                         DXIL_ISG1,
6707                                         ctx->mod.num_sig_inputs,
6708                                         ctx->mod.inputs,
6709                                         ctx->mod.minor_validator >= 7)) {
6710       debug_printf("D3D12: failed to write input signature\n");
6711       retval = false;
6712       goto out;
6713    }
6714 
6715    if (!dxil_container_add_io_signature(&container,
6716                                         DXIL_OSG1,
6717                                         ctx->mod.num_sig_outputs,
6718                                         ctx->mod.outputs,
6719                                         ctx->mod.minor_validator >= 7)) {
6720       debug_printf("D3D12: failed to write output signature\n");
6721       retval = false;
6722       goto out;
6723    }
6724 
6725    if ((ctx->mod.shader_kind == DXIL_HULL_SHADER ||
6726         ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) &&
6727        !dxil_container_add_io_signature(&container,
6728                                         DXIL_PSG1,
6729                                         ctx->mod.num_sig_patch_consts,
6730                                         ctx->mod.patch_consts,
6731                                         ctx->mod.minor_validator >= 7)) {
6732       debug_printf("D3D12: failed to write patch constant signature\n");
6733       retval = false;
6734       goto out;
6735    }
6736 
6737    struct dxil_validation_state validation_state;
6738    memset(&validation_state, 0, sizeof(validation_state));
6739    dxil_fill_validation_state(ctx, &validation_state);
6740 
6741    if (!dxil_container_add_state_validation(&container,&ctx->mod,
6742                                             &validation_state)) {
6743       debug_printf("D3D12: failed to write state-validation\n");
6744       retval = false;
6745       goto out;
6746    }
6747 
6748    if (!dxil_container_add_module(&container, &ctx->mod)) {
6749       debug_printf("D3D12: failed to write module\n");
6750       retval = false;
6751       goto out;
6752    }
6753 
6754    if (!dxil_container_write(&container, blob)) {
6755       debug_printf("D3D12: dxil_container_write failed\n");
6756       retval = false;
6757       goto out;
6758    }
6759    dxil_container_finish(&container);
6760 
6761    if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) {
6762       static int shader_id = 0;
6763       char buffer[64];
6764       snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob",
6765                get_shader_kind_str(ctx->mod.shader_kind), shader_id++);
6766       debug_printf("Try to write blob to %s\n", buffer);
6767       FILE *f = fopen(buffer, "wb");
6768       if (f) {
6769          fwrite(blob->data, 1, blob->size, f);
6770          fclose(f);
6771       }
6772    }
6773 
6774 out:
6775    dxil_module_release(&ctx->mod);
6776    ralloc_free(ctx->ralloc_ctx);
6777    free(ctx);
6778    return retval;
6779 }
6780