1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_to_dxil.h"
25
26 #include "dxil_container.h"
27 #include "dxil_dump.h"
28 #include "dxil_enums.h"
29 #include "dxil_function.h"
30 #include "dxil_module.h"
31 #include "dxil_nir.h"
32 #include "dxil_signature.h"
33
34 #include "nir/nir_builder.h"
35 #include "nir_deref.h"
36 #include "util/ralloc.h"
37 #include "util/u_debug.h"
38 #include "util/u_dynarray.h"
39 #include "util/u_math.h"
40
41 #include "git_sha1.h"
42
43 #include "vulkan/vulkan_core.h"
44
45 #include <stdint.h>
46
47 int debug_dxil = 0;
48
49 static const struct debug_named_value
50 dxil_debug_options[] = {
51 { "verbose", DXIL_DEBUG_VERBOSE, NULL },
52 { "dump_blob", DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" },
53 { "trace", DXIL_DEBUG_TRACE , "Trace instruction conversion" },
54 { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"},
55 DEBUG_NAMED_VALUE_END
56 };
57
58 DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0)
59
60 static void
log_nir_instr_unsupported(const struct dxil_logger * logger,const char * message_prefix,const nir_instr * instr)61 log_nir_instr_unsupported(const struct dxil_logger *logger,
62 const char *message_prefix, const nir_instr *instr)
63 {
64 char *msg = NULL;
65 char *instr_str = nir_instr_as_str(instr, NULL);
66 asprintf(&msg, "%s: %s\n", message_prefix, instr_str);
67 ralloc_free(instr_str);
68 assert(msg);
69 logger->log(logger->priv, msg);
70 free(msg);
71 }
72
73 static void
default_logger_func(void * priv,const char * msg)74 default_logger_func(void *priv, const char *msg)
75 {
76 fprintf(stderr, "%s", msg);
77 unreachable("Unhandled error");
78 }
79
80 static const struct dxil_logger default_logger = { .priv = NULL, .log = default_logger_func };
81
82 #define TRACE_CONVERSION(instr) \
83 if (debug_dxil & DXIL_DEBUG_TRACE) \
84 do { \
85 fprintf(stderr, "Convert '"); \
86 nir_print_instr(instr, stderr); \
87 fprintf(stderr, "'\n"); \
88 } while (0)
89
90 static const nir_shader_compiler_options
91 nir_options = {
92 .compact_arrays = true,
93 .lower_ineg = true,
94 .lower_fneg = true,
95 .lower_ffma16 = true,
96 .lower_ffma32 = true,
97 .lower_isign = true,
98 .lower_fsign = true,
99 .lower_iabs = true,
100 .lower_fmod = true,
101 .lower_fpow = true,
102 .lower_scmp = true,
103 .lower_ldexp = true,
104 .lower_flrp16 = true,
105 .lower_flrp32 = true,
106 .lower_flrp64 = true,
107 .lower_bitfield_extract = true,
108 .lower_ifind_msb = true,
109 .lower_ufind_msb = true,
110 .lower_extract_word = true,
111 .lower_extract_byte = true,
112 .lower_insert_word = true,
113 .lower_insert_byte = true,
114 .lower_all_io_to_elements = true,
115 .lower_hadd = true,
116 .lower_uadd_sat = true,
117 .lower_usub_sat = true,
118 .lower_iadd_sat = true,
119 .lower_uadd_carry = true,
120 .lower_usub_borrow = true,
121 .lower_mul_high = true,
122 .lower_pack_half_2x16 = true,
123 .lower_pack_unorm_4x8 = true,
124 .lower_pack_snorm_4x8 = true,
125 .lower_pack_snorm_2x16 = true,
126 .lower_pack_unorm_2x16 = true,
127 .lower_pack_64_2x32_split = true,
128 .lower_pack_32_2x16_split = true,
129 .lower_pack_64_4x16 = true,
130 .lower_unpack_64_2x32_split = true,
131 .lower_unpack_32_2x16_split = true,
132 .lower_unpack_half_2x16 = true,
133 .lower_unpack_snorm_2x16 = true,
134 .lower_unpack_snorm_4x8 = true,
135 .lower_unpack_unorm_2x16 = true,
136 .lower_unpack_unorm_4x8 = true,
137 .lower_interpolate_at = true,
138 .has_fsub = true,
139 .has_isub = true,
140 .has_bfe = true,
141 .has_find_msb_rev = true,
142 .vertex_id_zero_based = true,
143 .lower_base_vertex = true,
144 .lower_helper_invocation = true,
145 .has_cs_global_id = true,
146 .lower_mul_2x32_64 = true,
147 .lower_doubles_options =
148 nir_lower_drcp |
149 nir_lower_dsqrt |
150 nir_lower_drsq |
151 nir_lower_dfract |
152 nir_lower_dtrunc |
153 nir_lower_dfloor |
154 nir_lower_dceil |
155 nir_lower_dround_even,
156 .lower_uniforms_to_ubo = true,
157 .max_unroll_iterations = 32, /* arbitrary */
158 .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out),
159 .lower_device_index_to_zero = true,
160 .linker_ignore_precision = true,
161 .support_16bit_alu = true,
162 .preserve_mediump = true,
163 .discard_is_demote = true,
164 .has_ddx_intrinsics = true,
165 .scalarize_ddx = true,
166 };
167
168 const nir_shader_compiler_options*
dxil_get_base_nir_compiler_options(void)169 dxil_get_base_nir_compiler_options(void)
170 {
171 return &nir_options;
172 }
173
174 void
dxil_get_nir_compiler_options(nir_shader_compiler_options * options,enum dxil_shader_model shader_model_max,unsigned supported_int_sizes,unsigned supported_float_sizes)175 dxil_get_nir_compiler_options(nir_shader_compiler_options *options,
176 enum dxil_shader_model shader_model_max,
177 unsigned supported_int_sizes,
178 unsigned supported_float_sizes)
179 {
180 *options = nir_options;
181 if (!(supported_int_sizes & 64)) {
182 options->lower_pack_64_2x32_split = false;
183 options->lower_unpack_64_2x32_split = false;
184 options->lower_int64_options = ~0;
185 }
186 if (!(supported_float_sizes & 64))
187 options->lower_doubles_options = ~0;
188 if (shader_model_max >= SHADER_MODEL_6_4) {
189 options->has_sdot_4x8 = true;
190 options->has_udot_4x8 = true;
191 }
192 }
193
194 static bool
emit_llvm_ident(struct dxil_module * m)195 emit_llvm_ident(struct dxil_module *m)
196 {
197 const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1);
198 if (!compiler)
199 return false;
200
201 const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1);
202 return llvm_ident &&
203 dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1);
204 }
205
206 static bool
emit_named_version(struct dxil_module * m,const char * name,int major,int minor)207 emit_named_version(struct dxil_module *m, const char *name,
208 int major, int minor)
209 {
210 const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major);
211 const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor);
212 const struct dxil_mdnode *version_nodes[] = { major_node, minor_node };
213 const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes,
214 ARRAY_SIZE(version_nodes));
215 return dxil_add_metadata_named_node(m, name, &version, 1);
216 }
217
218 static const char *
get_shader_kind_str(enum dxil_shader_kind kind)219 get_shader_kind_str(enum dxil_shader_kind kind)
220 {
221 switch (kind) {
222 case DXIL_PIXEL_SHADER:
223 return "ps";
224 case DXIL_VERTEX_SHADER:
225 return "vs";
226 case DXIL_GEOMETRY_SHADER:
227 return "gs";
228 case DXIL_HULL_SHADER:
229 return "hs";
230 case DXIL_DOMAIN_SHADER:
231 return "ds";
232 case DXIL_COMPUTE_SHADER:
233 return "cs";
234 default:
235 unreachable("invalid shader kind");
236 }
237 }
238
239 static bool
emit_dx_shader_model(struct dxil_module * m)240 emit_dx_shader_model(struct dxil_module *m)
241 {
242 const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind));
243 const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version);
244 const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version);
245 const struct dxil_mdnode *shader_model[] = { type_node, major_node,
246 minor_node };
247 const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model));
248
249 return dxil_add_metadata_named_node(m, "dx.shaderModel",
250 &dx_shader_model, 1);
251 }
252
253 enum {
254 DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0,
255 DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1
256 };
257
258 enum dxil_intr {
259 DXIL_INTR_LOAD_INPUT = 4,
260 DXIL_INTR_STORE_OUTPUT = 5,
261 DXIL_INTR_FABS = 6,
262 DXIL_INTR_SATURATE = 7,
263
264 DXIL_INTR_ISFINITE = 10,
265 DXIL_INTR_ISNORMAL = 11,
266
267 DXIL_INTR_FCOS = 12,
268 DXIL_INTR_FSIN = 13,
269
270 DXIL_INTR_FEXP2 = 21,
271 DXIL_INTR_FRC = 22,
272 DXIL_INTR_FLOG2 = 23,
273
274 DXIL_INTR_SQRT = 24,
275 DXIL_INTR_RSQRT = 25,
276 DXIL_INTR_ROUND_NE = 26,
277 DXIL_INTR_ROUND_NI = 27,
278 DXIL_INTR_ROUND_PI = 28,
279 DXIL_INTR_ROUND_Z = 29,
280
281 DXIL_INTR_BFREV = 30,
282 DXIL_INTR_COUNTBITS = 31,
283 DXIL_INTR_FIRSTBIT_LO = 32,
284 DXIL_INTR_FIRSTBIT_HI = 33,
285 DXIL_INTR_FIRSTBIT_SHI = 34,
286
287 DXIL_INTR_FMAX = 35,
288 DXIL_INTR_FMIN = 36,
289 DXIL_INTR_IMAX = 37,
290 DXIL_INTR_IMIN = 38,
291 DXIL_INTR_UMAX = 39,
292 DXIL_INTR_UMIN = 40,
293
294 DXIL_INTR_FMA = 47,
295
296 DXIL_INTR_IBFE = 51,
297 DXIL_INTR_UBFE = 52,
298 DXIL_INTR_BFI = 53,
299
300 DXIL_INTR_CREATE_HANDLE = 57,
301 DXIL_INTR_CBUFFER_LOAD_LEGACY = 59,
302
303 DXIL_INTR_SAMPLE = 60,
304 DXIL_INTR_SAMPLE_BIAS = 61,
305 DXIL_INTR_SAMPLE_LEVEL = 62,
306 DXIL_INTR_SAMPLE_GRAD = 63,
307 DXIL_INTR_SAMPLE_CMP = 64,
308 DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65,
309
310 DXIL_INTR_TEXTURE_LOAD = 66,
311 DXIL_INTR_TEXTURE_STORE = 67,
312
313 DXIL_INTR_BUFFER_LOAD = 68,
314 DXIL_INTR_BUFFER_STORE = 69,
315
316 DXIL_INTR_TEXTURE_SIZE = 72,
317 DXIL_INTR_TEXTURE_GATHER = 73,
318 DXIL_INTR_TEXTURE_GATHER_CMP = 74,
319
320 DXIL_INTR_TEXTURE2DMS_GET_SAMPLE_POSITION = 75,
321 DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION = 76,
322 DXIL_INTR_RENDER_TARGET_GET_SAMPLE_COUNT = 77,
323
324 DXIL_INTR_ATOMIC_BINOP = 78,
325 DXIL_INTR_ATOMIC_CMPXCHG = 79,
326 DXIL_INTR_BARRIER = 80,
327 DXIL_INTR_TEXTURE_LOD = 81,
328
329 DXIL_INTR_DISCARD = 82,
330 DXIL_INTR_DDX_COARSE = 83,
331 DXIL_INTR_DDY_COARSE = 84,
332 DXIL_INTR_DDX_FINE = 85,
333 DXIL_INTR_DDY_FINE = 86,
334
335 DXIL_INTR_EVAL_SNAPPED = 87,
336 DXIL_INTR_EVAL_SAMPLE_INDEX = 88,
337 DXIL_INTR_EVAL_CENTROID = 89,
338
339 DXIL_INTR_SAMPLE_INDEX = 90,
340 DXIL_INTR_COVERAGE = 91,
341
342 DXIL_INTR_THREAD_ID = 93,
343 DXIL_INTR_GROUP_ID = 94,
344 DXIL_INTR_THREAD_ID_IN_GROUP = 95,
345 DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96,
346
347 DXIL_INTR_EMIT_STREAM = 97,
348 DXIL_INTR_CUT_STREAM = 98,
349
350 DXIL_INTR_GS_INSTANCE_ID = 100,
351
352 DXIL_INTR_MAKE_DOUBLE = 101,
353 DXIL_INTR_SPLIT_DOUBLE = 102,
354
355 DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT = 103,
356 DXIL_INTR_LOAD_PATCH_CONSTANT = 104,
357 DXIL_INTR_DOMAIN_LOCATION = 105,
358 DXIL_INTR_STORE_PATCH_CONSTANT = 106,
359 DXIL_INTR_OUTPUT_CONTROL_POINT_ID = 107,
360 DXIL_INTR_PRIMITIVE_ID = 108,
361
362 DXIL_INTR_WAVE_IS_FIRST_LANE = 110,
363 DXIL_INTR_WAVE_GET_LANE_INDEX = 111,
364 DXIL_INTR_WAVE_GET_LANE_COUNT = 112,
365 DXIL_INTR_WAVE_ANY_TRUE = 113,
366 DXIL_INTR_WAVE_ALL_TRUE = 114,
367 DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL = 115,
368 DXIL_INTR_WAVE_ACTIVE_BALLOT = 116,
369 DXIL_INTR_WAVE_READ_LANE_AT = 117,
370 DXIL_INTR_WAVE_READ_LANE_FIRST = 118,
371 DXIL_INTR_WAVE_ACTIVE_OP = 119,
372 DXIL_INTR_WAVE_ACTIVE_BIT = 120,
373 DXIL_INTR_WAVE_PREFIX_OP = 121,
374 DXIL_INTR_QUAD_READ_LANE_AT = 122,
375 DXIL_INTR_QUAD_OP = 123,
376
377 DXIL_INTR_LEGACY_F32TOF16 = 130,
378 DXIL_INTR_LEGACY_F16TOF32 = 131,
379
380 DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137,
381 DXIL_INTR_VIEW_ID = 138,
382
383 DXIL_INTR_RAW_BUFFER_LOAD = 139,
384 DXIL_INTR_RAW_BUFFER_STORE = 140,
385
386 DXIL_INTR_DOT4_ADD_I8_PACKED = 163,
387 DXIL_INTR_DOT4_ADD_U8_PACKED = 164,
388
389 DXIL_INTR_ANNOTATE_HANDLE = 216,
390 DXIL_INTR_CREATE_HANDLE_FROM_BINDING = 217,
391 DXIL_INTR_CREATE_HANDLE_FROM_HEAP = 218,
392
393 DXIL_INTR_IS_HELPER_LANE = 221,
394 DXIL_INTR_SAMPLE_CMP_LEVEL = 224,
395 DXIL_INTR_SAMPLE_CMP_GRAD = 254,
396 DXIL_INTR_SAMPLE_CMP_BIAS = 255,
397
398 DXIL_INTR_START_VERTEX_LOCATION = 256,
399 DXIL_INTR_START_INSTANCE_LOCATION = 257,
400 };
401
402 enum dxil_atomic_op {
403 DXIL_ATOMIC_ADD = 0,
404 DXIL_ATOMIC_AND = 1,
405 DXIL_ATOMIC_OR = 2,
406 DXIL_ATOMIC_XOR = 3,
407 DXIL_ATOMIC_IMIN = 4,
408 DXIL_ATOMIC_IMAX = 5,
409 DXIL_ATOMIC_UMIN = 6,
410 DXIL_ATOMIC_UMAX = 7,
411 DXIL_ATOMIC_EXCHANGE = 8,
412 };
413
414 static enum dxil_atomic_op
nir_atomic_to_dxil_atomic(nir_atomic_op op)415 nir_atomic_to_dxil_atomic(nir_atomic_op op)
416 {
417 switch (op) {
418 case nir_atomic_op_iadd: return DXIL_ATOMIC_ADD;
419 case nir_atomic_op_iand: return DXIL_ATOMIC_AND;
420 case nir_atomic_op_ior: return DXIL_ATOMIC_OR;
421 case nir_atomic_op_ixor: return DXIL_ATOMIC_XOR;
422 case nir_atomic_op_imin: return DXIL_ATOMIC_IMIN;
423 case nir_atomic_op_imax: return DXIL_ATOMIC_IMAX;
424 case nir_atomic_op_umin: return DXIL_ATOMIC_UMIN;
425 case nir_atomic_op_umax: return DXIL_ATOMIC_UMAX;
426 case nir_atomic_op_xchg: return DXIL_ATOMIC_EXCHANGE;
427 default: unreachable("Unsupported atomic op");
428 }
429 }
430
431 static enum dxil_rmw_op
nir_atomic_to_dxil_rmw(nir_atomic_op op)432 nir_atomic_to_dxil_rmw(nir_atomic_op op)
433 {
434 switch (op) {
435 case nir_atomic_op_iadd: return DXIL_RMWOP_ADD;
436 case nir_atomic_op_iand: return DXIL_RMWOP_AND;
437 case nir_atomic_op_ior: return DXIL_RMWOP_OR;
438 case nir_atomic_op_ixor: return DXIL_RMWOP_XOR;
439 case nir_atomic_op_imin: return DXIL_RMWOP_MIN;
440 case nir_atomic_op_imax: return DXIL_RMWOP_MAX;
441 case nir_atomic_op_umin: return DXIL_RMWOP_UMIN;
442 case nir_atomic_op_umax: return DXIL_RMWOP_UMAX;
443 case nir_atomic_op_xchg: return DXIL_RMWOP_XCHG;
444 default: unreachable("Unsupported atomic op");
445 }
446 }
447
448 typedef struct {
449 unsigned id;
450 unsigned binding;
451 unsigned size;
452 unsigned space;
453 } resource_array_layout;
454
455 static void
fill_resource_metadata(struct dxil_module * m,const struct dxil_mdnode ** fields,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout)456 fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields,
457 const struct dxil_type *struct_type,
458 const char *name, const resource_array_layout *layout)
459 {
460 const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type);
461 const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type);
462
463 fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID
464 fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol
465 fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name
466 fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID
467 fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound
468 fields[5] = dxil_get_metadata_int32(m, layout->size); // range size
469 }
470
471 static const struct dxil_mdnode *
emit_srv_metadata(struct dxil_module * m,const struct dxil_type * elem_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind)472 emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type,
473 const char *name, const resource_array_layout *layout,
474 enum dxil_component_type comp_type,
475 enum dxil_resource_kind res_kind)
476 {
477 const struct dxil_mdnode *fields[9];
478
479 const struct dxil_mdnode *metadata_tag_nodes[2];
480
481 fill_resource_metadata(m, fields, elem_type, name, layout);
482 fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
483 fields[7] = dxil_get_metadata_int1(m, 0); // sample count
484 if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
485 res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
486 metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
487 metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
488 fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
489 } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
490 fields[8] = NULL;
491 else
492 unreachable("Structured buffers not supported yet");
493
494 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
495 }
496
497 static const struct dxil_mdnode *
emit_uav_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind,enum gl_access_qualifier access)498 emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
499 const char *name, const resource_array_layout *layout,
500 enum dxil_component_type comp_type,
501 enum dxil_resource_kind res_kind,
502 enum gl_access_qualifier access)
503 {
504 const struct dxil_mdnode *fields[11];
505
506 const struct dxil_mdnode *metadata_tag_nodes[2];
507
508 fill_resource_metadata(m, fields, struct_type, name, layout);
509 fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
510 fields[7] = dxil_get_metadata_int1(m, (access & ACCESS_COHERENT) != 0); // globally-coherent
511 fields[8] = dxil_get_metadata_int1(m, false); // has counter
512 fields[9] = dxil_get_metadata_int1(m, false); // is ROV
513 if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
514 res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
515 metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
516 metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
517 fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
518 } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
519 fields[10] = NULL;
520 else
521 unreachable("Structured buffers not supported yet");
522
523 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
524 }
525
526 static const struct dxil_mdnode *
emit_cbv_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,unsigned size)527 emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
528 const char *name, const resource_array_layout *layout,
529 unsigned size)
530 {
531 const struct dxil_mdnode *fields[8];
532
533 fill_resource_metadata(m, fields, struct_type, name, layout);
534 fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size
535 fields[7] = NULL; // metadata
536
537 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
538 }
539
540 static const struct dxil_mdnode *
emit_sampler_metadata(struct dxil_module * m,const struct dxil_type * struct_type,nir_variable * var,const resource_array_layout * layout)541 emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
542 nir_variable *var, const resource_array_layout *layout)
543 {
544 const struct dxil_mdnode *fields[8];
545 const struct glsl_type *type = glsl_without_array(var->type);
546
547 fill_resource_metadata(m, fields, struct_type, var->name, layout);
548 enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ?
549 DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT;
550 fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind
551 fields[7] = NULL; // metadata
552
553 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
554 }
555
556
557 #define MAX_SRVS 128
558 #define MAX_UAVS 64
559 #define MAX_CBVS 64 // ??
560 #define MAX_SAMPLERS 64 // ??
561
562 struct dxil_def {
563 const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS];
564 };
565
566 struct ntd_context {
567 void *ralloc_ctx;
568 const struct nir_to_dxil_options *opts;
569 struct nir_shader *shader;
570
571 struct dxil_module mod;
572
573 struct util_dynarray srv_metadata_nodes;
574 const struct dxil_value *srv_handles[MAX_SRVS];
575
576 struct util_dynarray uav_metadata_nodes;
577 const struct dxil_value *ssbo_handles[MAX_UAVS];
578 const struct dxil_value *image_handles[MAX_UAVS];
579 uint32_t num_uavs;
580
581 struct util_dynarray cbv_metadata_nodes;
582 const struct dxil_value *cbv_handles[MAX_CBVS];
583
584 struct util_dynarray sampler_metadata_nodes;
585 const struct dxil_value *sampler_handles[MAX_SAMPLERS];
586
587 struct util_dynarray resources;
588
589 const struct dxil_mdnode *shader_property_nodes[6];
590 size_t num_shader_property_nodes;
591
592 struct dxil_def *defs;
593 unsigned num_defs;
594 struct hash_table *phis;
595
596 const struct dxil_value **sharedvars;
597 const struct dxil_value **scratchvars;
598 const struct dxil_value **consts;
599
600 nir_variable *system_value[SYSTEM_VALUE_MAX];
601
602 nir_function *tess_ctrl_patch_constant_func;
603 unsigned tess_input_control_point_count;
604
605 struct dxil_func_def *main_func_def;
606 struct dxil_func_def *tess_ctrl_patch_constant_func_def;
607 unsigned unnamed_ubo_count;
608
609 BITSET_WORD *float_types;
610 BITSET_WORD *int_types;
611
612 const struct dxil_logger *logger;
613 };
614
615 static const char*
unary_func_name(enum dxil_intr intr)616 unary_func_name(enum dxil_intr intr)
617 {
618 switch (intr) {
619 case DXIL_INTR_COUNTBITS:
620 case DXIL_INTR_FIRSTBIT_HI:
621 case DXIL_INTR_FIRSTBIT_SHI:
622 case DXIL_INTR_FIRSTBIT_LO:
623 return "dx.op.unaryBits";
624 case DXIL_INTR_ISFINITE:
625 case DXIL_INTR_ISNORMAL:
626 return "dx.op.isSpecialFloat";
627 default:
628 return "dx.op.unary";
629 }
630 }
631
632 static const struct dxil_value *
emit_unary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0)633 emit_unary_call(struct ntd_context *ctx, enum overload_type overload,
634 enum dxil_intr intr,
635 const struct dxil_value *op0)
636 {
637 const struct dxil_func *func = dxil_get_function(&ctx->mod,
638 unary_func_name(intr),
639 overload);
640 if (!func)
641 return NULL;
642
643 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
644 if (!opcode)
645 return NULL;
646
647 const struct dxil_value *args[] = {
648 opcode,
649 op0
650 };
651
652 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
653 }
654
655 static const struct dxil_value *
emit_binary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)656 emit_binary_call(struct ntd_context *ctx, enum overload_type overload,
657 enum dxil_intr intr,
658 const struct dxil_value *op0, const struct dxil_value *op1)
659 {
660 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload);
661 if (!func)
662 return NULL;
663
664 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
665 if (!opcode)
666 return NULL;
667
668 const struct dxil_value *args[] = {
669 opcode,
670 op0,
671 op1
672 };
673
674 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
675 }
676
677 static const struct dxil_value *
emit_tertiary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)678 emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload,
679 enum dxil_intr intr,
680 const struct dxil_value *op0,
681 const struct dxil_value *op1,
682 const struct dxil_value *op2)
683 {
684 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload);
685 if (!func)
686 return NULL;
687
688 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
689 if (!opcode)
690 return NULL;
691
692 const struct dxil_value *args[] = {
693 opcode,
694 op0,
695 op1,
696 op2
697 };
698
699 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
700 }
701
702 static const struct dxil_value *
emit_quaternary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2,const struct dxil_value * op3)703 emit_quaternary_call(struct ntd_context *ctx, enum overload_type overload,
704 enum dxil_intr intr,
705 const struct dxil_value *op0,
706 const struct dxil_value *op1,
707 const struct dxil_value *op2,
708 const struct dxil_value *op3)
709 {
710 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quaternary", overload);
711 if (!func)
712 return NULL;
713
714 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
715 if (!opcode)
716 return NULL;
717
718 const struct dxil_value *args[] = {
719 opcode,
720 op0,
721 op1,
722 op2,
723 op3
724 };
725
726 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
727 }
728
729 static const struct dxil_value *
emit_threadid_call(struct ntd_context * ctx,const struct dxil_value * comp)730 emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp)
731 {
732 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32);
733 if (!func)
734 return NULL;
735
736 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
737 DXIL_INTR_THREAD_ID);
738 if (!opcode)
739 return NULL;
740
741 const struct dxil_value *args[] = {
742 opcode,
743 comp
744 };
745
746 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
747 }
748
749 static const struct dxil_value *
emit_threadidingroup_call(struct ntd_context * ctx,const struct dxil_value * comp)750 emit_threadidingroup_call(struct ntd_context *ctx,
751 const struct dxil_value *comp)
752 {
753 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32);
754
755 if (!func)
756 return NULL;
757
758 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
759 DXIL_INTR_THREAD_ID_IN_GROUP);
760 if (!opcode)
761 return NULL;
762
763 const struct dxil_value *args[] = {
764 opcode,
765 comp
766 };
767
768 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
769 }
770
771 static const struct dxil_value *
emit_flattenedthreadidingroup_call(struct ntd_context * ctx)772 emit_flattenedthreadidingroup_call(struct ntd_context *ctx)
773 {
774 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32);
775
776 if (!func)
777 return NULL;
778
779 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
780 DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP);
781 if (!opcode)
782 return NULL;
783
784 const struct dxil_value *args[] = {
785 opcode
786 };
787
788 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
789 }
790
791 static const struct dxil_value *
emit_groupid_call(struct ntd_context * ctx,const struct dxil_value * comp)792 emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp)
793 {
794 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32);
795
796 if (!func)
797 return NULL;
798
799 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
800 DXIL_INTR_GROUP_ID);
801 if (!opcode)
802 return NULL;
803
804 const struct dxil_value *args[] = {
805 opcode,
806 comp
807 };
808
809 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
810 }
811
812 static const struct dxil_value *
emit_raw_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload,unsigned component_count,unsigned alignment)813 emit_raw_bufferload_call(struct ntd_context *ctx,
814 const struct dxil_value *handle,
815 const struct dxil_value *coord[2],
816 enum overload_type overload,
817 unsigned component_count,
818 unsigned alignment)
819 {
820 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferLoad", overload);
821 if (!func)
822 return NULL;
823
824 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
825 DXIL_INTR_RAW_BUFFER_LOAD);
826 const struct dxil_value *args[] = {
827 opcode, handle, coord[0], coord[1],
828 dxil_module_get_int8_const(&ctx->mod, (1 << component_count) - 1),
829 dxil_module_get_int32_const(&ctx->mod, alignment),
830 };
831
832 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
833 }
834
835 static const struct dxil_value *
emit_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload)836 emit_bufferload_call(struct ntd_context *ctx,
837 const struct dxil_value *handle,
838 const struct dxil_value *coord[2],
839 enum overload_type overload)
840 {
841 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload);
842 if (!func)
843 return NULL;
844
845 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
846 DXIL_INTR_BUFFER_LOAD);
847 const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] };
848
849 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
850 }
851
852 static bool
emit_raw_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload,unsigned alignment)853 emit_raw_bufferstore_call(struct ntd_context *ctx,
854 const struct dxil_value *handle,
855 const struct dxil_value *coord[2],
856 const struct dxil_value *value[4],
857 const struct dxil_value *write_mask,
858 enum overload_type overload,
859 unsigned alignment)
860 {
861 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferStore", overload);
862
863 if (!func)
864 return false;
865
866 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
867 DXIL_INTR_RAW_BUFFER_STORE);
868 const struct dxil_value *args[] = {
869 opcode, handle, coord[0], coord[1],
870 value[0], value[1], value[2], value[3],
871 write_mask,
872 dxil_module_get_int32_const(&ctx->mod, alignment),
873 };
874
875 return dxil_emit_call_void(&ctx->mod, func,
876 args, ARRAY_SIZE(args));
877 }
878
879 static bool
emit_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)880 emit_bufferstore_call(struct ntd_context *ctx,
881 const struct dxil_value *handle,
882 const struct dxil_value *coord[2],
883 const struct dxil_value *value[4],
884 const struct dxil_value *write_mask,
885 enum overload_type overload)
886 {
887 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload);
888
889 if (!func)
890 return false;
891
892 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
893 DXIL_INTR_BUFFER_STORE);
894 const struct dxil_value *args[] = {
895 opcode, handle, coord[0], coord[1],
896 value[0], value[1], value[2], value[3],
897 write_mask
898 };
899
900 return dxil_emit_call_void(&ctx->mod, func,
901 args, ARRAY_SIZE(args));
902 }
903
904 static const struct dxil_value *
emit_textureload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],enum overload_type overload)905 emit_textureload_call(struct ntd_context *ctx,
906 const struct dxil_value *handle,
907 const struct dxil_value *coord[3],
908 enum overload_type overload)
909 {
910 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload);
911 if (!func)
912 return NULL;
913 const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
914 const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
915
916 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
917 DXIL_INTR_TEXTURE_LOAD);
918 const struct dxil_value *args[] = { opcode, handle,
919 /*lod_or_sample*/ int_undef,
920 coord[0], coord[1], coord[2],
921 /* offsets */ int_undef, int_undef, int_undef};
922
923 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
924 }
925
926 static bool
emit_texturestore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)927 emit_texturestore_call(struct ntd_context *ctx,
928 const struct dxil_value *handle,
929 const struct dxil_value *coord[3],
930 const struct dxil_value *value[4],
931 const struct dxil_value *write_mask,
932 enum overload_type overload)
933 {
934 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload);
935
936 if (!func)
937 return false;
938
939 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
940 DXIL_INTR_TEXTURE_STORE);
941 const struct dxil_value *args[] = {
942 opcode, handle, coord[0], coord[1], coord[2],
943 value[0], value[1], value[2], value[3],
944 write_mask
945 };
946
947 return dxil_emit_call_void(&ctx->mod, func,
948 args, ARRAY_SIZE(args));
949 }
950
951 static const struct dxil_value *
emit_atomic_binop(struct ntd_context * ctx,const struct dxil_value * handle,enum dxil_atomic_op atomic_op,const struct dxil_value * coord[3],const struct dxil_value * value)952 emit_atomic_binop(struct ntd_context *ctx,
953 const struct dxil_value *handle,
954 enum dxil_atomic_op atomic_op,
955 const struct dxil_value *coord[3],
956 const struct dxil_value *value)
957 {
958 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32);
959
960 if (!func)
961 return false;
962
963 const struct dxil_value *opcode =
964 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP);
965 const struct dxil_value *atomic_op_value =
966 dxil_module_get_int32_const(&ctx->mod, atomic_op);
967 const struct dxil_value *args[] = {
968 opcode, handle, atomic_op_value,
969 coord[0], coord[1], coord[2], value
970 };
971
972 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
973 }
974
975 static const struct dxil_value *
emit_atomic_cmpxchg(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * cmpval,const struct dxil_value * newval)976 emit_atomic_cmpxchg(struct ntd_context *ctx,
977 const struct dxil_value *handle,
978 const struct dxil_value *coord[3],
979 const struct dxil_value *cmpval,
980 const struct dxil_value *newval)
981 {
982 const struct dxil_func *func =
983 dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32);
984
985 if (!func)
986 return false;
987
988 const struct dxil_value *opcode =
989 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG);
990 const struct dxil_value *args[] = {
991 opcode, handle, coord[0], coord[1], coord[2], cmpval, newval
992 };
993
994 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
995 }
996
997 static const struct dxil_value *
emit_createhandle_call_pre_6_6(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)998 emit_createhandle_call_pre_6_6(struct ntd_context *ctx,
999 enum dxil_resource_class resource_class,
1000 unsigned lower_bound,
1001 unsigned upper_bound,
1002 unsigned space,
1003 unsigned resource_range_id,
1004 const struct dxil_value *resource_range_index,
1005 bool non_uniform_resource_index)
1006 {
1007 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE);
1008 const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class);
1009 const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id);
1010 const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1011 if (!opcode || !resource_class_value || !resource_range_id_value ||
1012 !non_uniform_resource_index_value)
1013 return NULL;
1014
1015 const struct dxil_value *args[] = {
1016 opcode,
1017 resource_class_value,
1018 resource_range_id_value,
1019 resource_range_index,
1020 non_uniform_resource_index_value
1021 };
1022
1023 const struct dxil_func *func =
1024 dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE);
1025
1026 if (!func)
1027 return NULL;
1028
1029 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1030 }
1031
1032 static const struct dxil_value *
emit_annotate_handle(struct ntd_context * ctx,const struct dxil_value * unannotated_handle,const struct dxil_value * res_props)1033 emit_annotate_handle(struct ntd_context *ctx,
1034 const struct dxil_value *unannotated_handle,
1035 const struct dxil_value *res_props)
1036 {
1037 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ANNOTATE_HANDLE);
1038 if (!opcode)
1039 return NULL;
1040
1041 const struct dxil_value *args[] = {
1042 opcode,
1043 unannotated_handle,
1044 res_props
1045 };
1046
1047 const struct dxil_func *func =
1048 dxil_get_function(&ctx->mod, "dx.op.annotateHandle", DXIL_NONE);
1049
1050 if (!func)
1051 return NULL;
1052
1053 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1054 }
1055
1056 static const struct dxil_value *
emit_annotate_handle_from_metadata(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned resource_range_id,const struct dxil_value * unannotated_handle)1057 emit_annotate_handle_from_metadata(struct ntd_context *ctx,
1058 enum dxil_resource_class resource_class,
1059 unsigned resource_range_id,
1060 const struct dxil_value *unannotated_handle)
1061 {
1062
1063 const struct util_dynarray *mdnodes;
1064 switch (resource_class) {
1065 case DXIL_RESOURCE_CLASS_SRV:
1066 mdnodes = &ctx->srv_metadata_nodes;
1067 break;
1068 case DXIL_RESOURCE_CLASS_UAV:
1069 mdnodes = &ctx->uav_metadata_nodes;
1070 break;
1071 case DXIL_RESOURCE_CLASS_CBV:
1072 mdnodes = &ctx->cbv_metadata_nodes;
1073 break;
1074 case DXIL_RESOURCE_CLASS_SAMPLER:
1075 mdnodes = &ctx->sampler_metadata_nodes;
1076 break;
1077 default:
1078 unreachable("Invalid resource class");
1079 }
1080
1081 const struct dxil_mdnode *mdnode = *util_dynarray_element(mdnodes, const struct dxil_mdnode *, resource_range_id);
1082 const struct dxil_value *res_props = dxil_module_get_res_props_const(&ctx->mod, resource_class, mdnode);
1083 if (!res_props)
1084 return NULL;
1085
1086 return emit_annotate_handle(ctx, unannotated_handle, res_props);
1087 }
1088
1089 static const struct dxil_value *
emit_createhandle_and_annotate(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1090 emit_createhandle_and_annotate(struct ntd_context *ctx,
1091 enum dxil_resource_class resource_class,
1092 unsigned lower_bound,
1093 unsigned upper_bound,
1094 unsigned space,
1095 unsigned resource_range_id,
1096 const struct dxil_value *resource_range_index,
1097 bool non_uniform_resource_index)
1098 {
1099 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_BINDING);
1100 const struct dxil_value *res_bind = dxil_module_get_res_bind_const(&ctx->mod, lower_bound, upper_bound, space, resource_class);
1101 const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1102 if (!opcode || !res_bind || !non_uniform_resource_index_value)
1103 return NULL;
1104
1105 const struct dxil_value *args[] = {
1106 opcode,
1107 res_bind,
1108 resource_range_index,
1109 non_uniform_resource_index_value
1110 };
1111
1112 const struct dxil_func *func =
1113 dxil_get_function(&ctx->mod, "dx.op.createHandleFromBinding", DXIL_NONE);
1114
1115 if (!func)
1116 return NULL;
1117
1118 const struct dxil_value *unannotated_handle = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1119 if (!unannotated_handle)
1120 return NULL;
1121
1122 return emit_annotate_handle_from_metadata(ctx, resource_class, resource_range_id, unannotated_handle);
1123 }
1124
1125 static const struct dxil_value *
emit_createhandle_call(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1126 emit_createhandle_call(struct ntd_context *ctx,
1127 enum dxil_resource_class resource_class,
1128 unsigned lower_bound,
1129 unsigned upper_bound,
1130 unsigned space,
1131 unsigned resource_range_id,
1132 const struct dxil_value *resource_range_index,
1133 bool non_uniform_resource_index)
1134 {
1135 if (ctx->mod.minor_version < 6)
1136 return emit_createhandle_call_pre_6_6(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index);
1137 else
1138 return emit_createhandle_and_annotate(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index);
1139 }
1140
1141 static const struct dxil_value *
emit_createhandle_call_const_index(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,unsigned resource_range_index,bool non_uniform_resource_index)1142 emit_createhandle_call_const_index(struct ntd_context *ctx,
1143 enum dxil_resource_class resource_class,
1144 unsigned lower_bound,
1145 unsigned upper_bound,
1146 unsigned space,
1147 unsigned resource_range_id,
1148 unsigned resource_range_index,
1149 bool non_uniform_resource_index)
1150 {
1151
1152 const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index);
1153 if (!resource_range_index_value)
1154 return NULL;
1155
1156 return emit_createhandle_call(ctx, resource_class, lower_bound, upper_bound, space,
1157 resource_range_id, resource_range_index_value,
1158 non_uniform_resource_index);
1159 }
1160
1161 static const struct dxil_value *
emit_createhandle_heap(struct ntd_context * ctx,const struct dxil_value * resource_range_index,bool is_sampler,bool non_uniform_resource_index)1162 emit_createhandle_heap(struct ntd_context *ctx,
1163 const struct dxil_value *resource_range_index,
1164 bool is_sampler,
1165 bool non_uniform_resource_index)
1166 {
1167 if (is_sampler)
1168 ctx->mod.feats.sampler_descriptor_heap_indexing = true;
1169 else
1170 ctx->mod.feats.resource_descriptor_heap_indexing = true;
1171
1172 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_HEAP);
1173 const struct dxil_value *sampler = dxil_module_get_int1_const(&ctx->mod, is_sampler);
1174 const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1175 if (!opcode || !sampler || !non_uniform_resource_index_value)
1176 return NULL;
1177
1178 const struct dxil_value *args[] = {
1179 opcode,
1180 resource_range_index,
1181 sampler,
1182 non_uniform_resource_index_value
1183 };
1184
1185 const struct dxil_func *func =
1186 dxil_get_function(&ctx->mod, "dx.op.createHandleFromHeap", DXIL_NONE);
1187
1188 if (!func)
1189 return NULL;
1190
1191 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1192 }
1193
1194 static void
add_resource(struct ntd_context * ctx,enum dxil_resource_type type,enum dxil_resource_kind kind,const resource_array_layout * layout)1195 add_resource(struct ntd_context *ctx, enum dxil_resource_type type,
1196 enum dxil_resource_kind kind,
1197 const resource_array_layout *layout)
1198 {
1199 struct dxil_resource_v0 *resource_v0 = NULL;
1200 struct dxil_resource_v1 *resource_v1 = NULL;
1201 if (ctx->mod.minor_validator >= 6) {
1202 resource_v1 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v1, 1);
1203 resource_v0 = &resource_v1->v0;
1204 } else {
1205 resource_v0 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v0, 1);
1206 }
1207 resource_v0->resource_type = type;
1208 resource_v0->space = layout->space;
1209 resource_v0->lower_bound = layout->binding;
1210 if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX)
1211 resource_v0->upper_bound = UINT_MAX;
1212 else
1213 resource_v0->upper_bound = layout->binding + layout->size - 1;
1214 if (type == DXIL_RES_UAV_TYPED ||
1215 type == DXIL_RES_UAV_RAW ||
1216 type == DXIL_RES_UAV_STRUCTURED) {
1217 uint32_t new_uav_count = ctx->num_uavs + layout->size;
1218 if (layout->size == 0 || new_uav_count < ctx->num_uavs)
1219 ctx->num_uavs = UINT_MAX;
1220 else
1221 ctx->num_uavs = new_uav_count;
1222 if (ctx->mod.minor_validator >= 6 && ctx->num_uavs > 8)
1223 ctx->mod.feats.use_64uavs = 1;
1224 }
1225
1226 if (resource_v1) {
1227 resource_v1->resource_kind = kind;
1228 /* No flags supported yet */
1229 resource_v1->resource_flags = 0;
1230 }
1231 }
1232
1233 static const struct dxil_value *
emit_createhandle_call_dynamic(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned space,unsigned binding,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1234 emit_createhandle_call_dynamic(struct ntd_context *ctx,
1235 enum dxil_resource_class resource_class,
1236 unsigned space,
1237 unsigned binding,
1238 const struct dxil_value *resource_range_index,
1239 bool non_uniform_resource_index)
1240 {
1241 unsigned offset = 0;
1242 unsigned count = 0;
1243
1244 unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
1245 unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1246 unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1247 unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1248
1249 switch (resource_class) {
1250 case DXIL_RESOURCE_CLASS_UAV:
1251 offset = num_srvs + num_samplers + num_cbvs;
1252 count = num_uavs;
1253 break;
1254 case DXIL_RESOURCE_CLASS_SRV:
1255 offset = num_samplers + num_cbvs;
1256 count = num_srvs;
1257 break;
1258 case DXIL_RESOURCE_CLASS_SAMPLER:
1259 offset = num_cbvs;
1260 count = num_samplers;
1261 break;
1262 case DXIL_RESOURCE_CLASS_CBV:
1263 offset = 0;
1264 count = num_cbvs;
1265 break;
1266 }
1267
1268 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1269 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1270 assert(offset + count <= ctx->resources.size / resource_element_size);
1271 for (unsigned i = offset; i < offset + count; ++i) {
1272 const struct dxil_resource_v0 *resource = (const struct dxil_resource_v0 *)((const char *)ctx->resources.data + resource_element_size * i);
1273 if (resource->space == space &&
1274 resource->lower_bound <= binding &&
1275 resource->upper_bound >= binding) {
1276 return emit_createhandle_call(ctx, resource_class, resource->lower_bound,
1277 resource->upper_bound, space,
1278 i - offset,
1279 resource_range_index,
1280 non_uniform_resource_index);
1281 }
1282 }
1283
1284 unreachable("Resource access for undeclared range");
1285 }
1286
1287 static bool
emit_srv(struct ntd_context * ctx,nir_variable * var,unsigned count)1288 emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count)
1289 {
1290 unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
1291 unsigned binding = var->data.binding;
1292 resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1293
1294 enum dxil_component_type comp_type;
1295 enum dxil_resource_kind res_kind;
1296 enum dxil_resource_type res_type;
1297 if (var->data.mode == nir_var_mem_ssbo) {
1298 comp_type = DXIL_COMP_TYPE_INVALID;
1299 res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
1300 res_type = DXIL_RES_SRV_RAW;
1301 } else {
1302 comp_type = dxil_get_comp_type(var->type);
1303 res_kind = dxil_get_resource_kind(var->type);
1304 res_type = DXIL_RES_SRV_TYPED;
1305 }
1306 const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, 4, false /* readwrite */);
1307
1308 if (glsl_type_is_array(var->type))
1309 res_type_as_type = dxil_module_get_array_type(&ctx->mod, res_type_as_type, count);
1310
1311 const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name,
1312 &layout, comp_type, res_kind);
1313
1314 if (!srv_meta)
1315 return false;
1316
1317 util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta);
1318 add_resource(ctx, res_type, res_kind, &layout);
1319 if (res_type == DXIL_RES_SRV_RAW)
1320 ctx->mod.raw_and_structured_buffers = true;
1321
1322 return true;
1323 }
1324
1325 static bool
emit_uav(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned count,enum dxil_component_type comp_type,unsigned num_comps,enum dxil_resource_kind res_kind,enum gl_access_qualifier access,const char * name)1326 emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count,
1327 enum dxil_component_type comp_type, unsigned num_comps, enum dxil_resource_kind res_kind,
1328 enum gl_access_qualifier access, const char *name)
1329 {
1330 unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1331 resource_array_layout layout = { id, binding, count, space };
1332
1333 const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, num_comps, true /* readwrite */);
1334 res_type = dxil_module_get_array_type(&ctx->mod, res_type, count);
1335 const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name,
1336 &layout, comp_type, res_kind, access);
1337
1338 if (!uav_meta)
1339 return false;
1340
1341 util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
1342 if (ctx->mod.minor_validator < 6 &&
1343 util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
1344 ctx->mod.feats.use_64uavs = 1;
1345
1346 add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, res_kind, &layout);
1347 if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
1348 ctx->mod.raw_and_structured_buffers = true;
1349 if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER &&
1350 ctx->mod.shader_kind != DXIL_COMPUTE_SHADER)
1351 ctx->mod.feats.uavs_at_every_stage = true;
1352
1353 return true;
1354 }
1355
1356 static bool
emit_globals(struct ntd_context * ctx,unsigned size)1357 emit_globals(struct ntd_context *ctx, unsigned size)
1358 {
1359 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo)
1360 size++;
1361
1362 if (!size)
1363 return true;
1364
1365 if (!emit_uav(ctx, 0, 0, size, DXIL_COMP_TYPE_INVALID, 1, DXIL_RESOURCE_KIND_RAW_BUFFER, 0, "globals"))
1366 return false;
1367
1368 return true;
1369 }
1370
1371 static bool
emit_uav_var(struct ntd_context * ctx,nir_variable * var,unsigned count)1372 emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count)
1373 {
1374 unsigned binding, space;
1375 if (ctx->opts->environment == DXIL_ENVIRONMENT_GL) {
1376 /* For GL, the image intrinsics are already lowered, using driver_location
1377 * as the 0-based image index. Use space 1 so that we can keep using these
1378 * NIR constants without having to remap them, and so they don't overlap
1379 * SSBOs, which are also 0-based UAV bindings.
1380 */
1381 binding = var->data.driver_location;
1382 space = 1;
1383 } else {
1384 binding = var->data.binding;
1385 space = var->data.descriptor_set;
1386 }
1387 enum dxil_component_type comp_type = dxil_get_comp_type(var->type);
1388 enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type);
1389 const char *name = var->name;
1390
1391 return emit_uav(ctx, binding, space, count, comp_type,
1392 util_format_get_nr_components(var->data.image.format),
1393 res_kind, var->data.access, name);
1394 }
1395
1396 static const struct dxil_value *
get_value_for_const(struct dxil_module * mod,nir_const_value * c,const struct dxil_type * type)1397 get_value_for_const(struct dxil_module *mod, nir_const_value *c, const struct dxil_type *type)
1398 {
1399 if (type == mod->int1_type) return dxil_module_get_int1_const(mod, c->b);
1400 if (type == mod->float32_type) return dxil_module_get_float_const(mod, c->f32);
1401 if (type == mod->int32_type) return dxil_module_get_int32_const(mod, c->i32);
1402 if (type == mod->int16_type) {
1403 mod->feats.min_precision = true;
1404 return dxil_module_get_int16_const(mod, c->i16);
1405 }
1406 if (type == mod->int64_type) {
1407 mod->feats.int64_ops = true;
1408 return dxil_module_get_int64_const(mod, c->i64);
1409 }
1410 if (type == mod->float16_type) {
1411 mod->feats.min_precision = true;
1412 return dxil_module_get_float16_const(mod, c->u16);
1413 }
1414 if (type == mod->float64_type) {
1415 mod->feats.doubles = true;
1416 return dxil_module_get_double_const(mod, c->f64);
1417 }
1418 unreachable("Invalid type");
1419 }
1420
1421 static const struct dxil_type *
get_type_for_glsl_base_type(struct dxil_module * mod,enum glsl_base_type type)1422 get_type_for_glsl_base_type(struct dxil_module *mod, enum glsl_base_type type)
1423 {
1424 uint32_t bit_size = glsl_base_type_bit_size(type);
1425 if (nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(type)) == nir_type_float)
1426 return dxil_module_get_float_type(mod, bit_size);
1427 return dxil_module_get_int_type(mod, bit_size);
1428 }
1429
1430 static const struct dxil_type *
get_type_for_glsl_type(struct dxil_module * mod,const struct glsl_type * type)1431 get_type_for_glsl_type(struct dxil_module *mod, const struct glsl_type *type)
1432 {
1433 if (glsl_type_is_scalar(type))
1434 return get_type_for_glsl_base_type(mod, glsl_get_base_type(type));
1435
1436 if (glsl_type_is_vector(type))
1437 return dxil_module_get_vector_type(mod, get_type_for_glsl_base_type(mod, glsl_get_base_type(type)),
1438 glsl_get_vector_elements(type));
1439
1440 if (glsl_type_is_array(type))
1441 return dxil_module_get_array_type(mod, get_type_for_glsl_type(mod, glsl_get_array_element(type)),
1442 glsl_array_size(type));
1443
1444 assert(glsl_type_is_struct(type));
1445 uint32_t size = glsl_get_length(type);
1446 const struct dxil_type **fields = calloc(sizeof(const struct dxil_type *), size);
1447 for (uint32_t i = 0; i < size; ++i)
1448 fields[i] = get_type_for_glsl_type(mod, glsl_get_struct_field(type, i));
1449 const struct dxil_type *ret = dxil_module_get_struct_type(mod, glsl_get_type_name(type), fields, size);
1450 free((void *)fields);
1451 return ret;
1452 }
1453
1454 static const struct dxil_value *
get_value_for_const_aggregate(struct dxil_module * mod,nir_constant * c,const struct glsl_type * type)1455 get_value_for_const_aggregate(struct dxil_module *mod, nir_constant *c, const struct glsl_type *type)
1456 {
1457 const struct dxil_type *dxil_type = get_type_for_glsl_type(mod, type);
1458 if (glsl_type_is_vector_or_scalar(type)) {
1459 const struct dxil_type *element_type = get_type_for_glsl_base_type(mod, glsl_get_base_type(type));
1460 const struct dxil_value *elements[NIR_MAX_VEC_COMPONENTS];
1461 for (uint32_t i = 0; i < glsl_get_vector_elements(type); ++i)
1462 elements[i] = get_value_for_const(mod, &c->values[i], element_type);
1463 if (glsl_type_is_scalar(type))
1464 return elements[0];
1465 return dxil_module_get_vector_const(mod, dxil_type, elements);
1466 }
1467
1468 uint32_t num_values = glsl_get_length(type);
1469 assert(num_values == c->num_elements);
1470 const struct dxil_value **values = calloc(sizeof(const struct dxil_value *), num_values);
1471 const struct dxil_value *ret;
1472 if (glsl_type_is_array(type)) {
1473 const struct glsl_type *element_type = glsl_get_array_element(type);
1474 for (uint32_t i = 0; i < num_values; ++i)
1475 values[i] = get_value_for_const_aggregate(mod, c->elements[i], element_type);
1476 ret = dxil_module_get_array_const(mod, dxil_type, values);
1477 } else {
1478 for (uint32_t i = 0; i < num_values; ++i)
1479 values[i] = get_value_for_const_aggregate(mod, c->elements[i], glsl_get_struct_field(type, i));
1480 ret = dxil_module_get_struct_const(mod, dxil_type, values);
1481 }
1482 free((void *)values);
1483 return ret;
1484 }
1485
1486 static bool
emit_global_consts(struct ntd_context * ctx)1487 emit_global_consts(struct ntd_context *ctx)
1488 {
1489 uint32_t index = 0;
1490 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) {
1491 assert(var->constant_initializer);
1492 var->data.driver_location = index++;
1493 }
1494
1495 ctx->consts = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
1496
1497 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) {
1498 if (!var->name)
1499 var->name = ralloc_asprintf(var, "const_%d", var->data.driver_location);
1500
1501 const struct dxil_value *agg_vals =
1502 get_value_for_const_aggregate(&ctx->mod, var->constant_initializer, var->type);
1503 if (!agg_vals)
1504 return false;
1505
1506 const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name,
1507 dxil_value_get_type(agg_vals),
1508 DXIL_AS_DEFAULT, 16,
1509 agg_vals);
1510 if (!gvar)
1511 return false;
1512
1513 ctx->consts[var->data.driver_location] = gvar;
1514 }
1515
1516 return true;
1517 }
1518
1519 static bool
emit_shared_vars(struct ntd_context * ctx)1520 emit_shared_vars(struct ntd_context *ctx)
1521 {
1522 uint32_t index = 0;
1523 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared)
1524 var->data.driver_location = index++;
1525
1526 ctx->sharedvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
1527
1528 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared) {
1529 if (!var->name)
1530 var->name = ralloc_asprintf(var, "shared_%d", var->data.driver_location);
1531 const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name,
1532 get_type_for_glsl_type(&ctx->mod, var->type),
1533 DXIL_AS_GROUPSHARED, 16,
1534 NULL);
1535 if (!gvar)
1536 return false;
1537
1538 ctx->sharedvars[var->data.driver_location] = gvar;
1539 }
1540
1541 return true;
1542 }
1543
1544 static bool
emit_cbv(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned size,unsigned count,char * name)1545 emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space,
1546 unsigned size, unsigned count, char *name)
1547 {
1548 assert(count != 0);
1549
1550 unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1551
1552 const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32);
1553 const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size);
1554 const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name,
1555 &array_type, 1);
1556 // All ubo[1]s should have been lowered to ubo with static indexing
1557 const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type;
1558 resource_array_layout layout = {idx, binding, count, space};
1559 const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type,
1560 name, &layout, 4 * size);
1561
1562 if (!cbv_meta)
1563 return false;
1564
1565 util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta);
1566 add_resource(ctx, DXIL_RES_CBV, DXIL_RESOURCE_KIND_CBUFFER, &layout);
1567
1568 return true;
1569 }
1570
1571 static bool
emit_ubo_var(struct ntd_context * ctx,nir_variable * var)1572 emit_ubo_var(struct ntd_context *ctx, nir_variable *var)
1573 {
1574 unsigned count = 1;
1575 if (glsl_type_is_array(var->type))
1576 count = glsl_get_length(var->type);
1577
1578 char *name = var->name;
1579 char temp_name[30];
1580 if (name && strlen(name) == 0) {
1581 snprintf(temp_name, sizeof(temp_name), "__unnamed_ubo_%d",
1582 ctx->unnamed_ubo_count++);
1583 name = temp_name;
1584 }
1585
1586 const struct glsl_type *type = glsl_without_array(var->type);
1587 assert(glsl_type_is_struct(type) || glsl_type_is_interface(type));
1588 unsigned dwords = ALIGN_POT(glsl_get_explicit_size(type, false), 16) / 4;
1589
1590 return emit_cbv(ctx, var->data.binding, var->data.descriptor_set,
1591 dwords, count, name);
1592 }
1593
1594 static bool
emit_sampler(struct ntd_context * ctx,nir_variable * var,unsigned count)1595 emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count)
1596 {
1597 unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1598 unsigned binding = var->data.binding;
1599 resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1600 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
1601 const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1);
1602
1603 if (glsl_type_is_array(var->type))
1604 sampler_type = dxil_module_get_array_type(&ctx->mod, sampler_type, count);
1605
1606 const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout);
1607
1608 if (!sampler_meta)
1609 return false;
1610
1611 util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta);
1612 add_resource(ctx, DXIL_RES_SAMPLER, DXIL_RESOURCE_KIND_SAMPLER, &layout);
1613
1614 return true;
1615 }
1616
1617 static bool
emit_static_indexing_handles(struct ntd_context * ctx)1618 emit_static_indexing_handles(struct ntd_context *ctx)
1619 {
1620 /* Vulkan always uses dynamic handles, from instructions in the NIR */
1621 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN)
1622 return true;
1623
1624 unsigned last_res_class = -1;
1625 unsigned id = 0;
1626
1627 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1628 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1629 for (struct dxil_resource_v0 *res = (struct dxil_resource_v0 *)ctx->resources.data;
1630 res < (struct dxil_resource_v0 *)((char *)ctx->resources.data + ctx->resources.size);
1631 res = (struct dxil_resource_v0 *)((char *)res + resource_element_size)) {
1632 enum dxil_resource_class res_class;
1633 const struct dxil_value **handle_array;
1634 switch (res->resource_type) {
1635 case DXIL_RES_SRV_TYPED:
1636 case DXIL_RES_SRV_RAW:
1637 case DXIL_RES_SRV_STRUCTURED:
1638 res_class = DXIL_RESOURCE_CLASS_SRV;
1639 handle_array = ctx->srv_handles;
1640 break;
1641 case DXIL_RES_CBV:
1642 res_class = DXIL_RESOURCE_CLASS_CBV;
1643 handle_array = ctx->cbv_handles;
1644 break;
1645 case DXIL_RES_SAMPLER:
1646 res_class = DXIL_RESOURCE_CLASS_SAMPLER;
1647 handle_array = ctx->sampler_handles;
1648 break;
1649 case DXIL_RES_UAV_RAW:
1650 res_class = DXIL_RESOURCE_CLASS_UAV;
1651 handle_array = ctx->ssbo_handles;
1652 break;
1653 case DXIL_RES_UAV_TYPED:
1654 case DXIL_RES_UAV_STRUCTURED:
1655 case DXIL_RES_UAV_STRUCTURED_WITH_COUNTER:
1656 res_class = DXIL_RESOURCE_CLASS_UAV;
1657 handle_array = ctx->image_handles;
1658 break;
1659 default:
1660 unreachable("Unexpected resource type");
1661 }
1662
1663 if (last_res_class != res_class)
1664 id = 0;
1665 else
1666 id++;
1667 last_res_class = res_class;
1668
1669 if (res->space > 1)
1670 continue;
1671 assert(res->space == 0 ||
1672 (res->space == 1 &&
1673 res->resource_type != DXIL_RES_UAV_RAW &&
1674 ctx->opts->environment == DXIL_ENVIRONMENT_GL));
1675
1676 /* CL uses dynamic handles for the "globals" UAV array, but uses static
1677 * handles for UBOs, textures, and samplers.
1678 */
1679 if (ctx->opts->environment == DXIL_ENVIRONMENT_CL &&
1680 res->resource_type == DXIL_RES_UAV_RAW)
1681 continue;
1682
1683 for (unsigned i = res->lower_bound; i <= res->upper_bound; ++i) {
1684 handle_array[i] = emit_createhandle_call_const_index(ctx,
1685 res_class,
1686 res->lower_bound,
1687 res->upper_bound,
1688 res->space,
1689 id,
1690 i,
1691 false);
1692 if (!handle_array[i])
1693 return false;
1694 }
1695 }
1696 return true;
1697 }
1698
1699 static const struct dxil_mdnode *
emit_gs_state(struct ntd_context * ctx)1700 emit_gs_state(struct ntd_context *ctx)
1701 {
1702 const struct dxil_mdnode *gs_state_nodes[5];
1703 const nir_shader *s = ctx->shader;
1704
1705 gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive));
1706 gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out);
1707 gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.gs.active_stream_mask, 1));
1708 gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive));
1709 gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations);
1710
1711 for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) {
1712 if (!gs_state_nodes[i])
1713 return NULL;
1714 }
1715
1716 return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes));
1717 }
1718
1719 static enum dxil_tessellator_domain
get_tessellator_domain(enum tess_primitive_mode primitive_mode)1720 get_tessellator_domain(enum tess_primitive_mode primitive_mode)
1721 {
1722 switch (primitive_mode) {
1723 case TESS_PRIMITIVE_QUADS: return DXIL_TESSELLATOR_DOMAIN_QUAD;
1724 case TESS_PRIMITIVE_TRIANGLES: return DXIL_TESSELLATOR_DOMAIN_TRI;
1725 case TESS_PRIMITIVE_ISOLINES: return DXIL_TESSELLATOR_DOMAIN_ISOLINE;
1726 default:
1727 unreachable("Invalid tessellator primitive mode");
1728 }
1729 }
1730
1731 static enum dxil_tessellator_partitioning
get_tessellator_partitioning(enum gl_tess_spacing spacing)1732 get_tessellator_partitioning(enum gl_tess_spacing spacing)
1733 {
1734 switch (spacing) {
1735 default:
1736 case TESS_SPACING_EQUAL:
1737 return DXIL_TESSELLATOR_PARTITIONING_INTEGER;
1738 case TESS_SPACING_FRACTIONAL_EVEN:
1739 return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
1740 case TESS_SPACING_FRACTIONAL_ODD:
1741 return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
1742 }
1743 }
1744
1745 static enum dxil_tessellator_output_primitive
get_tessellator_output_primitive(const struct shader_info * info)1746 get_tessellator_output_primitive(const struct shader_info *info)
1747 {
1748 if (info->tess.point_mode)
1749 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_POINT;
1750 if (info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES)
1751 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_LINE;
1752 /* Note: GL tessellation domain is inverted from D3D, which means triangle
1753 * winding needs to be inverted.
1754 */
1755 if (info->tess.ccw)
1756 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CW;
1757 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CCW;
1758 }
1759
1760 static const struct dxil_mdnode *
emit_hs_state(struct ntd_context * ctx)1761 emit_hs_state(struct ntd_context *ctx)
1762 {
1763 const struct dxil_mdnode *hs_state_nodes[7];
1764
1765 hs_state_nodes[0] = dxil_get_metadata_func(&ctx->mod, ctx->tess_ctrl_patch_constant_func_def->func);
1766 hs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->tess_input_control_point_count);
1767 hs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1768 hs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1769 hs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_partitioning(ctx->shader->info.tess.spacing));
1770 hs_state_nodes[5] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_output_primitive(&ctx->shader->info));
1771 hs_state_nodes[6] = dxil_get_metadata_float32(&ctx->mod, 64.0f);
1772
1773 return dxil_get_metadata_node(&ctx->mod, hs_state_nodes, ARRAY_SIZE(hs_state_nodes));
1774 }
1775
1776 static const struct dxil_mdnode *
emit_ds_state(struct ntd_context * ctx)1777 emit_ds_state(struct ntd_context *ctx)
1778 {
1779 const struct dxil_mdnode *ds_state_nodes[2];
1780
1781 ds_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1782 ds_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1783
1784 return dxil_get_metadata_node(&ctx->mod, ds_state_nodes, ARRAY_SIZE(ds_state_nodes));
1785 }
1786
1787 static const struct dxil_mdnode *
emit_threads(struct ntd_context * ctx)1788 emit_threads(struct ntd_context *ctx)
1789 {
1790 const nir_shader *s = ctx->shader;
1791 const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1));
1792 const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1));
1793 const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1));
1794 if (!threads_x || !threads_y || !threads_z)
1795 return false;
1796
1797 const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z };
1798 return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes));
1799 }
1800
1801 static const struct dxil_mdnode *
emit_wave_size(struct ntd_context * ctx)1802 emit_wave_size(struct ntd_context *ctx)
1803 {
1804 const nir_shader *s = ctx->shader;
1805 const struct dxil_mdnode *wave_size_node = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size);
1806 return dxil_get_metadata_node(&ctx->mod, &wave_size_node, 1);
1807 }
1808
1809 static const struct dxil_mdnode *
emit_wave_size_range(struct ntd_context * ctx)1810 emit_wave_size_range(struct ntd_context *ctx)
1811 {
1812 const nir_shader *s = ctx->shader;
1813 const struct dxil_mdnode *wave_size_nodes[3];
1814 wave_size_nodes[0] = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size);
1815 wave_size_nodes[1] = wave_size_nodes[0];
1816 wave_size_nodes[2] = wave_size_nodes[0];
1817 return dxil_get_metadata_node(&ctx->mod, wave_size_nodes, ARRAY_SIZE(wave_size_nodes));
1818 }
1819
1820 static int64_t
get_module_flags(struct ntd_context * ctx)1821 get_module_flags(struct ntd_context *ctx)
1822 {
1823 /* See the DXIL documentation for the definition of these flags:
1824 *
1825 * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags
1826 */
1827
1828 uint64_t flags = 0;
1829 if (ctx->mod.feats.doubles)
1830 flags |= (1 << 2);
1831 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
1832 ctx->shader->info.fs.early_fragment_tests)
1833 flags |= (1 << 3);
1834 if (ctx->mod.raw_and_structured_buffers)
1835 flags |= (1 << 4);
1836 if (ctx->mod.feats.min_precision)
1837 flags |= (1 << 5);
1838 if (ctx->mod.feats.dx11_1_double_extensions)
1839 flags |= (1 << 6);
1840 if (ctx->mod.feats.array_layer_from_vs_or_ds)
1841 flags |= (1 << 9);
1842 if (ctx->mod.feats.inner_coverage)
1843 flags |= (1 << 10);
1844 if (ctx->mod.feats.stencil_ref)
1845 flags |= (1 << 11);
1846 if (ctx->mod.feats.tiled_resources)
1847 flags |= (1 << 12);
1848 if (ctx->mod.feats.typed_uav_load_additional_formats)
1849 flags |= (1 << 13);
1850 if (ctx->mod.feats.use_64uavs)
1851 flags |= (1 << 15);
1852 if (ctx->mod.feats.uavs_at_every_stage)
1853 flags |= (1 << 16);
1854 if (ctx->mod.feats.cs_4x_raw_sb)
1855 flags |= (1 << 17);
1856 if (ctx->mod.feats.rovs)
1857 flags |= (1 << 18);
1858 if (ctx->mod.feats.wave_ops)
1859 flags |= (1 << 19);
1860 if (ctx->mod.feats.int64_ops)
1861 flags |= (1 << 20);
1862 if (ctx->mod.feats.view_id)
1863 flags |= (1 << 21);
1864 if (ctx->mod.feats.barycentrics)
1865 flags |= (1 << 22);
1866 if (ctx->mod.feats.native_low_precision)
1867 flags |= (1 << 23) | (1 << 5);
1868 if (ctx->mod.feats.shading_rate)
1869 flags |= (1 << 24);
1870 if (ctx->mod.feats.raytracing_tier_1_1)
1871 flags |= (1 << 25);
1872 if (ctx->mod.feats.sampler_feedback)
1873 flags |= (1 << 26);
1874 if (ctx->mod.feats.atomic_int64_typed)
1875 flags |= (1 << 27);
1876 if (ctx->mod.feats.atomic_int64_tgsm)
1877 flags |= (1 << 28);
1878 if (ctx->mod.feats.derivatives_in_mesh_or_amp)
1879 flags |= (1 << 29);
1880 if (ctx->mod.feats.resource_descriptor_heap_indexing)
1881 flags |= (1 << 30);
1882 if (ctx->mod.feats.sampler_descriptor_heap_indexing)
1883 flags |= (1ull << 31);
1884 if (ctx->mod.feats.atomic_int64_heap_resource)
1885 flags |= (1ull << 32);
1886 if (ctx->mod.feats.advanced_texture_ops)
1887 flags |= (1ull << 34);
1888 if (ctx->mod.feats.writable_msaa)
1889 flags |= (1ull << 35);
1890 // Bit 36 is wave MMA
1891 if (ctx->mod.feats.sample_cmp_bias_gradient)
1892 flags |= (1ull << 37);
1893 if (ctx->mod.feats.extended_command_info)
1894 flags |= (1ull << 38);
1895
1896 if (ctx->opts->disable_math_refactoring)
1897 flags |= (1 << 1);
1898
1899 /* Work around https://github.com/microsoft/DirectXShaderCompiler/issues/4616
1900 * When targeting SM6.7 and with at least one UAV, if no other flags are present,
1901 * set the resources-may-not-alias flag, or else the DXIL validator may end up
1902 * with uninitialized memory which will fail validation, due to missing that flag.
1903 */
1904 if (flags == 0 && ctx->mod.minor_version >= 7 && ctx->num_uavs > 0)
1905 flags |= (1ull << 33);
1906
1907 return flags;
1908 }
1909
1910 static const struct dxil_mdnode *
emit_entrypoint(struct ntd_context * ctx,const struct dxil_func * func,const char * name,const struct dxil_mdnode * signatures,const struct dxil_mdnode * resources,const struct dxil_mdnode * shader_props)1911 emit_entrypoint(struct ntd_context *ctx,
1912 const struct dxil_func *func, const char *name,
1913 const struct dxil_mdnode *signatures,
1914 const struct dxil_mdnode *resources,
1915 const struct dxil_mdnode *shader_props)
1916 {
1917 char truncated_name[254] = { 0 };
1918 strncpy(truncated_name, name, ARRAY_SIZE(truncated_name) - 1);
1919
1920 const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func);
1921 const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, truncated_name);
1922 const struct dxil_mdnode *nodes[] = {
1923 func_md,
1924 name_md,
1925 signatures,
1926 resources,
1927 shader_props
1928 };
1929 return dxil_get_metadata_node(&ctx->mod, nodes,
1930 ARRAY_SIZE(nodes));
1931 }
1932
1933 static const struct dxil_mdnode *
emit_resources(struct ntd_context * ctx)1934 emit_resources(struct ntd_context *ctx)
1935 {
1936 bool emit_resources = false;
1937 const struct dxil_mdnode *resources_nodes[] = {
1938 NULL, NULL, NULL, NULL
1939 };
1940
1941 #define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *)
1942
1943 if (ctx->srv_metadata_nodes.size) {
1944 resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes));
1945 emit_resources = true;
1946 }
1947
1948 if (ctx->uav_metadata_nodes.size) {
1949 resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes));
1950 emit_resources = true;
1951 }
1952
1953 if (ctx->cbv_metadata_nodes.size) {
1954 resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes));
1955 emit_resources = true;
1956 }
1957
1958 if (ctx->sampler_metadata_nodes.size) {
1959 resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes));
1960 emit_resources = true;
1961 }
1962
1963 #undef ARRAY_AND_SIZE
1964
1965 return emit_resources ?
1966 dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL;
1967 }
1968
1969 static bool
emit_tag(struct ntd_context * ctx,enum dxil_shader_tag tag,const struct dxil_mdnode * value_node)1970 emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag,
1971 const struct dxil_mdnode *value_node)
1972 {
1973 const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag);
1974 if (!tag_node || !value_node)
1975 return false;
1976 assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2);
1977 ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node;
1978 ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node;
1979
1980 return true;
1981 }
1982
1983 static bool
emit_metadata(struct ntd_context * ctx)1984 emit_metadata(struct ntd_context *ctx)
1985 {
1986 /* DXIL versions are 1.x for shader model 6.x */
1987 assert(ctx->mod.major_version == 6);
1988 unsigned dxilMajor = 1;
1989 unsigned dxilMinor = ctx->mod.minor_version;
1990 unsigned valMajor = ctx->mod.major_validator;
1991 unsigned valMinor = ctx->mod.minor_validator;
1992 if (!emit_llvm_ident(&ctx->mod) ||
1993 !emit_named_version(&ctx->mod, "dx.version", dxilMajor, dxilMinor) ||
1994 !emit_named_version(&ctx->mod, "dx.valver", valMajor, valMinor) ||
1995 !emit_dx_shader_model(&ctx->mod))
1996 return false;
1997
1998 const struct dxil_func_def *main_func_def = ctx->main_func_def;
1999 if (!main_func_def)
2000 return false;
2001 const struct dxil_func *main_func = main_func_def->func;
2002
2003 const struct dxil_mdnode *resources_node = emit_resources(ctx);
2004
2005 const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func);
2006 const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0);
2007
2008 const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0);
2009 const struct dxil_mdnode *nodes_4_27_27[] = {
2010 node4, node27, node27
2011 };
2012 const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27,
2013 ARRAY_SIZE(nodes_4_27_27));
2014
2015 const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1);
2016
2017 const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1);
2018 const struct dxil_mdnode *main_type_annotation_nodes[] = {
2019 node3, main_entrypoint, node29
2020 };
2021 const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes,
2022 ARRAY_SIZE(main_type_annotation_nodes));
2023
2024 if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
2025 if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx)))
2026 return false;
2027 } else if (ctx->mod.shader_kind == DXIL_HULL_SHADER) {
2028 ctx->tess_input_control_point_count = 32;
2029 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
2030 if (nir_is_arrayed_io(var, MESA_SHADER_TESS_CTRL)) {
2031 ctx->tess_input_control_point_count = glsl_array_size(var->type);
2032 break;
2033 }
2034 }
2035
2036 if (!emit_tag(ctx, DXIL_SHADER_TAG_HS_STATE, emit_hs_state(ctx)))
2037 return false;
2038 } else if (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
2039 if (!emit_tag(ctx, DXIL_SHADER_TAG_DS_STATE, emit_ds_state(ctx)))
2040 return false;
2041 } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) {
2042 if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx)))
2043 return false;
2044 if (ctx->mod.minor_version >= 6 &&
2045 ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_4) {
2046 if (ctx->mod.minor_version < 8) {
2047 if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE, emit_wave_size(ctx)))
2048 return false;
2049 } else {
2050 if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE_RANGE, emit_wave_size_range(ctx)))
2051 return false;
2052 }
2053 }
2054 }
2055
2056 uint64_t flags = get_module_flags(ctx);
2057 if (flags != 0) {
2058 if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags)))
2059 return false;
2060 }
2061 const struct dxil_mdnode *shader_properties = NULL;
2062 if (ctx->num_shader_property_nodes > 0) {
2063 shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes,
2064 ctx->num_shader_property_nodes);
2065 if (!shader_properties)
2066 return false;
2067 }
2068
2069 nir_function_impl *entry_func_impl = nir_shader_get_entrypoint(ctx->shader);
2070 const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func,
2071 entry_func_impl->function->name, get_signatures(&ctx->mod), resources_node, shader_properties);
2072 if (!dx_entry_point)
2073 return false;
2074
2075 if (resources_node) {
2076 const struct dxil_mdnode *dx_resources = resources_node;
2077 dxil_add_metadata_named_node(&ctx->mod, "dx.resources",
2078 &dx_resources, 1);
2079 }
2080
2081 if (ctx->mod.minor_version >= 2 &&
2082 dxil_nir_analyze_io_dependencies(&ctx->mod, ctx->shader)) {
2083 const struct dxil_type *i32_type = dxil_module_get_int_type(&ctx->mod, 32);
2084 if (!i32_type)
2085 return false;
2086
2087 const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, i32_type, ctx->mod.serialized_dependency_table_size);
2088 if (!array_type)
2089 return false;
2090
2091 const struct dxil_value **array_entries = malloc(sizeof(const struct value *) * ctx->mod.serialized_dependency_table_size);
2092 if (!array_entries)
2093 return false;
2094
2095 for (uint32_t i = 0; i < ctx->mod.serialized_dependency_table_size; ++i)
2096 array_entries[i] = dxil_module_get_int32_const(&ctx->mod, ctx->mod.serialized_dependency_table[i]);
2097 const struct dxil_value *array_val = dxil_module_get_array_const(&ctx->mod, array_type, array_entries);
2098 free((void *)array_entries);
2099
2100 const struct dxil_mdnode *view_id_state_val = dxil_get_metadata_value(&ctx->mod, array_type, array_val);
2101 if (!view_id_state_val)
2102 return false;
2103
2104 const struct dxil_mdnode *view_id_state_node = dxil_get_metadata_node(&ctx->mod, &view_id_state_val, 1);
2105
2106 dxil_add_metadata_named_node(&ctx->mod, "dx.viewIdState", &view_id_state_node, 1);
2107 }
2108
2109 const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation };
2110 return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations",
2111 dx_type_annotations,
2112 ARRAY_SIZE(dx_type_annotations)) &&
2113 dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints",
2114 &dx_entry_point, 1);
2115 }
2116
2117 static const struct dxil_value *
bitcast_to_int(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)2118 bitcast_to_int(struct ntd_context *ctx, unsigned bit_size,
2119 const struct dxil_value *value)
2120 {
2121 const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size);
2122 if (!type)
2123 return NULL;
2124
2125 return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
2126 }
2127
2128 static const struct dxil_value *
bitcast_to_float(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)2129 bitcast_to_float(struct ntd_context *ctx, unsigned bit_size,
2130 const struct dxil_value *value)
2131 {
2132 const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size);
2133 if (!type)
2134 return NULL;
2135
2136 return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
2137 }
2138
2139 static bool
is_phi_src(nir_def * ssa)2140 is_phi_src(nir_def *ssa)
2141 {
2142 nir_foreach_use(src, ssa)
2143 if (nir_src_parent_instr(src)->type == nir_instr_type_phi)
2144 return true;
2145 return false;
2146 }
2147
2148 static void
store_ssa_def(struct ntd_context * ctx,nir_def * ssa,unsigned chan,const struct dxil_value * value)2149 store_ssa_def(struct ntd_context *ctx, nir_def *ssa, unsigned chan,
2150 const struct dxil_value *value)
2151 {
2152 assert(ssa->index < ctx->num_defs);
2153 assert(chan < ssa->num_components);
2154 /* Insert bitcasts for phi srcs in the parent block */
2155 if (is_phi_src(ssa)) {
2156 /* Prefer ints over floats if it could be both or if we have no type info */
2157 nir_alu_type expect_type =
2158 BITSET_TEST(ctx->int_types, ssa->index) ? nir_type_int :
2159 (BITSET_TEST(ctx->float_types, ssa->index) ? nir_type_float :
2160 nir_type_int);
2161 assert(ssa->bit_size != 1 || expect_type == nir_type_int);
2162 if (ssa->bit_size != 1 && expect_type != dxil_type_to_nir_type(dxil_value_get_type(value)))
2163 value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST,
2164 expect_type == nir_type_int ?
2165 dxil_module_get_int_type(&ctx->mod, ssa->bit_size) :
2166 dxil_module_get_float_type(&ctx->mod, ssa->bit_size), value);
2167 if (ssa->bit_size == 64) {
2168 if (expect_type == nir_type_int)
2169 ctx->mod.feats.int64_ops = true;
2170 if (expect_type == nir_type_float)
2171 ctx->mod.feats.doubles = true;
2172 }
2173 }
2174 ctx->defs[ssa->index].chans[chan] = value;
2175 }
2176
2177 static void
store_def(struct ntd_context * ctx,nir_def * def,unsigned chan,const struct dxil_value * value)2178 store_def(struct ntd_context *ctx, nir_def *def, unsigned chan,
2179 const struct dxil_value *value)
2180 {
2181 const struct dxil_type *type = dxil_value_get_type(value);
2182 if (type == ctx->mod.float64_type)
2183 ctx->mod.feats.doubles = true;
2184 if (type == ctx->mod.float16_type ||
2185 type == ctx->mod.int16_type)
2186 ctx->mod.feats.min_precision = true;
2187 if (type == ctx->mod.int64_type)
2188 ctx->mod.feats.int64_ops = true;
2189 store_ssa_def(ctx, def, chan, value);
2190 }
2191
2192 static void
store_alu_dest(struct ntd_context * ctx,nir_alu_instr * alu,unsigned chan,const struct dxil_value * value)2193 store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan,
2194 const struct dxil_value *value)
2195 {
2196 store_def(ctx, &alu->def, chan, value);
2197 }
2198
2199 static const struct dxil_value *
get_src_ssa(struct ntd_context * ctx,const nir_def * ssa,unsigned chan)2200 get_src_ssa(struct ntd_context *ctx, const nir_def *ssa, unsigned chan)
2201 {
2202 assert(ssa->index < ctx->num_defs);
2203 assert(chan < ssa->num_components);
2204 assert(ctx->defs[ssa->index].chans[chan]);
2205 return ctx->defs[ssa->index].chans[chan];
2206 }
2207
2208 static const struct dxil_value *
get_src(struct ntd_context * ctx,nir_src * src,unsigned chan,nir_alu_type type)2209 get_src(struct ntd_context *ctx, nir_src *src, unsigned chan,
2210 nir_alu_type type)
2211 {
2212 const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan);
2213
2214 const int bit_size = nir_src_bit_size(*src);
2215
2216 switch (nir_alu_type_get_base_type(type)) {
2217 case nir_type_int:
2218 case nir_type_uint: {
2219 const struct dxil_type *expect_type = dxil_module_get_int_type(&ctx->mod, bit_size);
2220 /* nohing to do */
2221 if (dxil_value_type_equal_to(value, expect_type)) {
2222 assert(bit_size != 64 || ctx->mod.feats.int64_ops);
2223 return value;
2224 }
2225 if (bit_size == 64) {
2226 assert(ctx->mod.feats.doubles);
2227 ctx->mod.feats.int64_ops = true;
2228 }
2229 if (bit_size == 16)
2230 ctx->mod.feats.native_low_precision = true;
2231 assert(dxil_value_type_bitsize_equal_to(value, bit_size));
2232 return bitcast_to_int(ctx, bit_size, value);
2233 }
2234
2235 case nir_type_float:
2236 assert(nir_src_bit_size(*src) >= 16);
2237 if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size))) {
2238 assert(nir_src_bit_size(*src) != 64 || ctx->mod.feats.doubles);
2239 return value;
2240 }
2241 if (bit_size == 64) {
2242 assert(ctx->mod.feats.int64_ops);
2243 ctx->mod.feats.doubles = true;
2244 }
2245 if (bit_size == 16)
2246 ctx->mod.feats.native_low_precision = true;
2247 assert(dxil_value_type_bitsize_equal_to(value, bit_size));
2248 return bitcast_to_float(ctx, bit_size, value);
2249
2250 case nir_type_bool:
2251 if (!dxil_value_type_bitsize_equal_to(value, 1)) {
2252 return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC,
2253 dxil_module_get_int_type(&ctx->mod, 1), value);
2254 }
2255 return value;
2256
2257 default:
2258 unreachable("unexpected nir_alu_type");
2259 }
2260 }
2261
2262 static const struct dxil_value *
get_alu_src(struct ntd_context * ctx,nir_alu_instr * alu,unsigned src)2263 get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
2264 {
2265 unsigned chan = alu->src[src].swizzle[0];
2266 return get_src(ctx, &alu->src[src].src, chan,
2267 nir_op_infos[alu->op].input_types[src]);
2268 }
2269
2270 static bool
emit_binop(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)2271 emit_binop(struct ntd_context *ctx, nir_alu_instr *alu,
2272 enum dxil_bin_opcode opcode,
2273 const struct dxil_value *op0, const struct dxil_value *op1)
2274 {
2275 bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float;
2276
2277 enum dxil_opt_flags flags = 0;
2278 if (is_float_op && !alu->exact)
2279 flags |= DXIL_UNSAFE_ALGEBRA;
2280
2281 const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags);
2282 if (!v)
2283 return false;
2284 store_alu_dest(ctx, alu, 0, v);
2285 return true;
2286 }
2287
2288 static bool
emit_shift(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)2289 emit_shift(struct ntd_context *ctx, nir_alu_instr *alu,
2290 enum dxil_bin_opcode opcode,
2291 const struct dxil_value *op0, const struct dxil_value *op1)
2292 {
2293 unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src);
2294 unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src);
2295
2296 uint64_t shift_mask = op0_bit_size - 1;
2297 if (!nir_src_is_const(alu->src[1].src)) {
2298 if (op0_bit_size != op1_bit_size) {
2299 const struct dxil_type *type =
2300 dxil_module_get_int_type(&ctx->mod, op0_bit_size);
2301 enum dxil_cast_opcode cast_op =
2302 op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC;
2303 op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1);
2304 }
2305 op1 = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND,
2306 op1,
2307 dxil_module_get_int_const(&ctx->mod, shift_mask, op0_bit_size),
2308 0);
2309 } else {
2310 uint64_t val = nir_scalar_as_uint(
2311 nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1));
2312 op1 = dxil_module_get_int_const(&ctx->mod, val & shift_mask, op0_bit_size);
2313 }
2314
2315 const struct dxil_value *v =
2316 dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0);
2317 if (!v)
2318 return false;
2319 store_alu_dest(ctx, alu, 0, v);
2320 return true;
2321 }
2322
2323 static bool
emit_cmp(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_cmp_pred pred,const struct dxil_value * op0,const struct dxil_value * op1)2324 emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu,
2325 enum dxil_cmp_pred pred,
2326 const struct dxil_value *op0, const struct dxil_value *op1)
2327 {
2328 const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1);
2329 if (!v)
2330 return false;
2331 store_alu_dest(ctx, alu, 0, v);
2332 return true;
2333 }
2334
2335 static enum dxil_cast_opcode
get_cast_op(nir_alu_instr * alu)2336 get_cast_op(nir_alu_instr *alu)
2337 {
2338 unsigned dst_bits = alu->def.bit_size;
2339 unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2340
2341 switch (alu->op) {
2342 /* bool -> int */
2343 case nir_op_b2i16:
2344 case nir_op_b2i32:
2345 case nir_op_b2i64:
2346 return DXIL_CAST_ZEXT;
2347
2348 /* float -> float */
2349 case nir_op_f2f16_rtz:
2350 case nir_op_f2f16:
2351 case nir_op_f2fmp:
2352 case nir_op_f2f32:
2353 case nir_op_f2f64:
2354 assert(dst_bits != src_bits);
2355 if (dst_bits < src_bits)
2356 return DXIL_CAST_FPTRUNC;
2357 else
2358 return DXIL_CAST_FPEXT;
2359
2360 /* int -> int */
2361 case nir_op_i2i1:
2362 case nir_op_i2i16:
2363 case nir_op_i2imp:
2364 case nir_op_i2i32:
2365 case nir_op_i2i64:
2366 assert(dst_bits != src_bits);
2367 if (dst_bits < src_bits)
2368 return DXIL_CAST_TRUNC;
2369 else
2370 return DXIL_CAST_SEXT;
2371
2372 /* uint -> uint */
2373 case nir_op_u2u1:
2374 case nir_op_u2u16:
2375 case nir_op_u2u32:
2376 case nir_op_u2u64:
2377 assert(dst_bits != src_bits);
2378 if (dst_bits < src_bits)
2379 return DXIL_CAST_TRUNC;
2380 else
2381 return DXIL_CAST_ZEXT;
2382
2383 /* float -> int */
2384 case nir_op_f2i16:
2385 case nir_op_f2imp:
2386 case nir_op_f2i32:
2387 case nir_op_f2i64:
2388 return DXIL_CAST_FPTOSI;
2389
2390 /* float -> uint */
2391 case nir_op_f2u16:
2392 case nir_op_f2ump:
2393 case nir_op_f2u32:
2394 case nir_op_f2u64:
2395 return DXIL_CAST_FPTOUI;
2396
2397 /* int -> float */
2398 case nir_op_i2f16:
2399 case nir_op_i2fmp:
2400 case nir_op_i2f32:
2401 case nir_op_i2f64:
2402 return DXIL_CAST_SITOFP;
2403
2404 /* uint -> float */
2405 case nir_op_u2f16:
2406 case nir_op_u2fmp:
2407 case nir_op_u2f32:
2408 case nir_op_u2f64:
2409 return DXIL_CAST_UITOFP;
2410
2411 default:
2412 unreachable("unexpected cast op");
2413 }
2414 }
2415
2416 static const struct dxil_type *
get_cast_dest_type(struct ntd_context * ctx,nir_alu_instr * alu)2417 get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu)
2418 {
2419 unsigned dst_bits = alu->def.bit_size;
2420 switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) {
2421 case nir_type_bool:
2422 assert(dst_bits == 1);
2423 FALLTHROUGH;
2424 case nir_type_int:
2425 case nir_type_uint:
2426 return dxil_module_get_int_type(&ctx->mod, dst_bits);
2427
2428 case nir_type_float:
2429 return dxil_module_get_float_type(&ctx->mod, dst_bits);
2430
2431 default:
2432 unreachable("unknown nir_alu_type");
2433 }
2434 }
2435
2436 static bool
is_double(nir_alu_type alu_type,unsigned bit_size)2437 is_double(nir_alu_type alu_type, unsigned bit_size)
2438 {
2439 return nir_alu_type_get_base_type(alu_type) == nir_type_float &&
2440 bit_size == 64;
2441 }
2442
2443 static bool
emit_cast(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * value)2444 emit_cast(struct ntd_context *ctx, nir_alu_instr *alu,
2445 const struct dxil_value *value)
2446 {
2447 enum dxil_cast_opcode opcode = get_cast_op(alu);
2448 const struct dxil_type *type = get_cast_dest_type(ctx, alu);
2449 if (!type)
2450 return false;
2451
2452 const nir_op_info *info = &nir_op_infos[alu->op];
2453 switch (opcode) {
2454 case DXIL_CAST_UITOFP:
2455 case DXIL_CAST_SITOFP:
2456 if (is_double(info->output_type, alu->def.bit_size))
2457 ctx->mod.feats.dx11_1_double_extensions = true;
2458 break;
2459 case DXIL_CAST_FPTOUI:
2460 case DXIL_CAST_FPTOSI:
2461 if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src)))
2462 ctx->mod.feats.dx11_1_double_extensions = true;
2463 break;
2464 default:
2465 break;
2466 }
2467
2468 if (alu->def.bit_size == 16) {
2469 switch (alu->op) {
2470 case nir_op_f2fmp:
2471 case nir_op_i2imp:
2472 case nir_op_f2imp:
2473 case nir_op_f2ump:
2474 case nir_op_i2fmp:
2475 case nir_op_u2fmp:
2476 break;
2477 default:
2478 ctx->mod.feats.native_low_precision = true;
2479 }
2480 }
2481
2482 const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type,
2483 value);
2484 if (!v)
2485 return false;
2486 store_alu_dest(ctx, alu, 0, v);
2487 return true;
2488 }
2489
2490 static enum overload_type
get_overload(nir_alu_type alu_type,unsigned bit_size)2491 get_overload(nir_alu_type alu_type, unsigned bit_size)
2492 {
2493 switch (nir_alu_type_get_base_type(alu_type)) {
2494 case nir_type_int:
2495 case nir_type_uint:
2496 case nir_type_bool:
2497 switch (bit_size) {
2498 case 1: return DXIL_I1;
2499 case 16: return DXIL_I16;
2500 case 32: return DXIL_I32;
2501 case 64: return DXIL_I64;
2502 default:
2503 unreachable("unexpected bit_size");
2504 }
2505 case nir_type_float:
2506 switch (bit_size) {
2507 case 16: return DXIL_F16;
2508 case 32: return DXIL_F32;
2509 case 64: return DXIL_F64;
2510 default:
2511 unreachable("unexpected bit_size");
2512 }
2513 case nir_type_invalid:
2514 return DXIL_NONE;
2515 default:
2516 unreachable("unexpected output type");
2517 }
2518 }
2519
2520 static enum overload_type
get_ambiguous_overload(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum overload_type default_type)2521 get_ambiguous_overload(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2522 enum overload_type default_type)
2523 {
2524 if (BITSET_TEST(ctx->int_types, intr->def.index))
2525 return get_overload(nir_type_int, intr->def.bit_size);
2526 if (BITSET_TEST(ctx->float_types, intr->def.index))
2527 return get_overload(nir_type_float, intr->def.bit_size);
2528 return default_type;
2529 }
2530
2531 static enum overload_type
get_ambiguous_overload_alu_type(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_alu_type alu_type)2532 get_ambiguous_overload_alu_type(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2533 nir_alu_type alu_type)
2534 {
2535 return get_ambiguous_overload(ctx, intr, get_overload(alu_type, intr->def.bit_size));
2536 }
2537
2538 static bool
emit_unary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op)2539 emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2540 enum dxil_intr intr, const struct dxil_value *op)
2541 {
2542 const nir_op_info *info = &nir_op_infos[alu->op];
2543 unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2544 enum overload_type overload = get_overload(info->input_types[0], src_bits);
2545
2546 const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op);
2547 if (!v)
2548 return false;
2549 store_alu_dest(ctx, alu, 0, v);
2550 return true;
2551 }
2552
2553 static bool
emit_binary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)2554 emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2555 enum dxil_intr intr,
2556 const struct dxil_value *op0, const struct dxil_value *op1)
2557 {
2558 const nir_op_info *info = &nir_op_infos[alu->op];
2559 assert(info->output_type == info->input_types[0]);
2560 assert(info->output_type == info->input_types[1]);
2561 unsigned dst_bits = alu->def.bit_size;
2562 assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2563 assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2564 enum overload_type overload = get_overload(info->output_type, dst_bits);
2565
2566 const struct dxil_value *v = emit_binary_call(ctx, overload, intr,
2567 op0, op1);
2568 if (!v)
2569 return false;
2570 store_alu_dest(ctx, alu, 0, v);
2571 return true;
2572 }
2573
2574 static bool
emit_tertiary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)2575 emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2576 enum dxil_intr intr,
2577 const struct dxil_value *op0,
2578 const struct dxil_value *op1,
2579 const struct dxil_value *op2)
2580 {
2581 const nir_op_info *info = &nir_op_infos[alu->op];
2582 unsigned dst_bits = alu->def.bit_size;
2583 assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2584 assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2585 assert(nir_src_bit_size(alu->src[2].src) == dst_bits);
2586
2587 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[0], dst_bits));
2588 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[1], dst_bits));
2589 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[2], dst_bits));
2590
2591 enum overload_type overload = get_overload(info->output_type, dst_bits);
2592
2593 const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr,
2594 op0, op1, op2);
2595 if (!v)
2596 return false;
2597 store_alu_dest(ctx, alu, 0, v);
2598 return true;
2599 }
2600
2601 static bool
emit_derivative(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_intr dxil_intr)2602 emit_derivative(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2603 enum dxil_intr dxil_intr)
2604 {
2605 const struct dxil_value *src = get_src(ctx, &intr->src[0], 0, nir_type_float);
2606 enum overload_type overload = get_overload(nir_type_float, intr->src[0].ssa->bit_size);
2607 const struct dxil_value *v = emit_unary_call(ctx, overload, dxil_intr, src);
2608 if (!v)
2609 return false;
2610 store_def(ctx, &intr->def, 0, v);
2611 return true;
2612 }
2613
2614 static bool
emit_bitfield_insert(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * base,const struct dxil_value * insert,const struct dxil_value * offset,const struct dxil_value * width)2615 emit_bitfield_insert(struct ntd_context *ctx, nir_alu_instr *alu,
2616 const struct dxil_value *base,
2617 const struct dxil_value *insert,
2618 const struct dxil_value *offset,
2619 const struct dxil_value *width)
2620 {
2621 /* DXIL is width, offset, insert, base, NIR is base, insert, offset, width */
2622 const struct dxil_value *v = emit_quaternary_call(ctx, DXIL_I32, DXIL_INTR_BFI,
2623 width, offset, insert, base);
2624 if (!v)
2625 return false;
2626
2627 /* DXIL uses the 5 LSB from width/offset. Special-case width >= 32 == copy insert. */
2628 const struct dxil_value *compare_width = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_SGE,
2629 width, dxil_module_get_int32_const(&ctx->mod, 32));
2630 v = dxil_emit_select(&ctx->mod, compare_width, insert, v);
2631 store_alu_dest(ctx, alu, 0, v);
2632 return true;
2633 }
2634
2635 static bool
emit_dot4add_packed(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * src0,const struct dxil_value * src1,const struct dxil_value * accum)2636 emit_dot4add_packed(struct ntd_context *ctx, nir_alu_instr *alu,
2637 enum dxil_intr intr,
2638 const struct dxil_value *src0,
2639 const struct dxil_value *src1,
2640 const struct dxil_value *accum)
2641 {
2642 const struct dxil_func *f = dxil_get_function(&ctx->mod, "dx.op.dot4AddPacked", DXIL_I32);
2643 if (!f)
2644 return false;
2645 const struct dxil_value *srcs[] = { dxil_module_get_int32_const(&ctx->mod, intr), accum, src0, src1 };
2646 const struct dxil_value *v = dxil_emit_call(&ctx->mod, f, srcs, ARRAY_SIZE(srcs));
2647 if (!v)
2648 return false;
2649
2650 store_alu_dest(ctx, alu, 0, v);
2651 return true;
2652 }
2653
emit_select(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * sel,const struct dxil_value * val_true,const struct dxil_value * val_false)2654 static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu,
2655 const struct dxil_value *sel,
2656 const struct dxil_value *val_true,
2657 const struct dxil_value *val_false)
2658 {
2659 assert(sel);
2660 assert(val_true);
2661 assert(val_false);
2662
2663 const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false);
2664 if (!v)
2665 return false;
2666
2667 store_alu_dest(ctx, alu, 0, v);
2668 return true;
2669 }
2670
2671 static bool
emit_b2f16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2672 emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2673 {
2674 assert(val);
2675
2676 struct dxil_module *m = &ctx->mod;
2677
2678 const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00);
2679 const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0);
2680
2681 if (!c0 || !c1)
2682 return false;
2683
2684 return emit_select(ctx, alu, val, c1, c0);
2685 }
2686
2687 static bool
emit_b2f32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2688 emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2689 {
2690 assert(val);
2691
2692 struct dxil_module *m = &ctx->mod;
2693
2694 const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f);
2695 const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f);
2696
2697 if (!c0 || !c1)
2698 return false;
2699
2700 return emit_select(ctx, alu, val, c1, c0);
2701 }
2702
2703 static bool
emit_b2f64(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2704 emit_b2f64(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2705 {
2706 assert(val);
2707
2708 struct dxil_module *m = &ctx->mod;
2709
2710 const struct dxil_value *c1 = dxil_module_get_double_const(m, 1.0);
2711 const struct dxil_value *c0 = dxil_module_get_double_const(m, 0.0);
2712
2713 if (!c0 || !c1)
2714 return false;
2715
2716 ctx->mod.feats.doubles = 1;
2717 return emit_select(ctx, alu, val, c1, c0);
2718 }
2719
2720 static bool
emit_f16tof32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val,bool shift)2721 emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val, bool shift)
2722 {
2723 if (shift) {
2724 val = dxil_emit_binop(&ctx->mod, DXIL_BINOP_LSHR, val,
2725 dxil_module_get_int32_const(&ctx->mod, 16), 0);
2726 if (!val)
2727 return false;
2728 }
2729
2730 const struct dxil_func *func = dxil_get_function(&ctx->mod,
2731 "dx.op.legacyF16ToF32",
2732 DXIL_NONE);
2733 if (!func)
2734 return false;
2735
2736 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32);
2737 if (!opcode)
2738 return false;
2739
2740 const struct dxil_value *args[] = {
2741 opcode,
2742 val
2743 };
2744
2745 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2746 if (!v)
2747 return false;
2748 store_alu_dest(ctx, alu, 0, v);
2749 return true;
2750 }
2751
2752 static bool
emit_f32tof16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val0,const struct dxil_value * val1)2753 emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val0, const struct dxil_value *val1)
2754 {
2755 const struct dxil_func *func = dxil_get_function(&ctx->mod,
2756 "dx.op.legacyF32ToF16",
2757 DXIL_NONE);
2758 if (!func)
2759 return false;
2760
2761 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16);
2762 if (!opcode)
2763 return false;
2764
2765 const struct dxil_value *args[] = {
2766 opcode,
2767 val0
2768 };
2769
2770 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2771 if (!v)
2772 return false;
2773
2774 if (!nir_src_is_const(alu->src[1].src) || nir_src_as_int(alu->src[1].src) != 0) {
2775 args[1] = val1;
2776 const struct dxil_value *v_high = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2777 if (!v_high)
2778 return false;
2779
2780 v_high = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, v_high,
2781 dxil_module_get_int32_const(&ctx->mod, 16), 0);
2782 if (!v_high)
2783 return false;
2784
2785 v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_OR, v, v_high, 0);
2786 if (!v)
2787 return false;
2788 }
2789
2790 store_alu_dest(ctx, alu, 0, v);
2791 return true;
2792 }
2793
2794 static bool
emit_vec(struct ntd_context * ctx,nir_alu_instr * alu,unsigned num_inputs)2795 emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs)
2796 {
2797 for (unsigned i = 0; i < num_inputs; i++) {
2798 const struct dxil_value *src =
2799 get_src_ssa(ctx, alu->src[i].src.ssa, alu->src[i].swizzle[0]);
2800 if (!src)
2801 return false;
2802
2803 store_alu_dest(ctx, alu, i, src);
2804 }
2805 return true;
2806 }
2807
2808 static bool
emit_make_double(struct ntd_context * ctx,nir_alu_instr * alu)2809 emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu)
2810 {
2811 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64);
2812 if (!func)
2813 return false;
2814
2815 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE);
2816 if (!opcode)
2817 return false;
2818
2819 const struct dxil_value *args[3] = {
2820 opcode,
2821 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_uint32),
2822 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[1], nir_type_uint32),
2823 };
2824 if (!args[1] || !args[2])
2825 return false;
2826
2827 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2828 if (!v)
2829 return false;
2830 store_def(ctx, &alu->def, 0, v);
2831 return true;
2832 }
2833
2834 static bool
emit_split_double(struct ntd_context * ctx,nir_alu_instr * alu)2835 emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu)
2836 {
2837 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64);
2838 if (!func)
2839 return false;
2840
2841 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE);
2842 if (!opcode)
2843 return false;
2844
2845 const struct dxil_value *args[] = {
2846 opcode,
2847 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_float64)
2848 };
2849 if (!args[1])
2850 return false;
2851
2852 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2853 if (!v)
2854 return false;
2855
2856 const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0);
2857 const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1);
2858 if (!hi || !lo)
2859 return false;
2860
2861 store_def(ctx, &alu->def, 0, hi);
2862 store_def(ctx, &alu->def, 1, lo);
2863 return true;
2864 }
2865
2866 static bool
emit_alu(struct ntd_context * ctx,nir_alu_instr * alu)2867 emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
2868 {
2869 /* handle vec-instructions first; they are the only ones that produce
2870 * vector results.
2871 */
2872 switch (alu->op) {
2873 case nir_op_vec2:
2874 case nir_op_vec3:
2875 case nir_op_vec4:
2876 case nir_op_vec8:
2877 case nir_op_vec16:
2878 return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs);
2879 case nir_op_mov: {
2880 assert(alu->def.num_components == 1);
2881 store_ssa_def(ctx, &alu->def, 0, get_src_ssa(ctx,
2882 alu->src->src.ssa, alu->src->swizzle[0]));
2883 return true;
2884 }
2885 case nir_op_pack_double_2x32_dxil:
2886 return emit_make_double(ctx, alu);
2887 case nir_op_unpack_double_2x32_dxil:
2888 return emit_split_double(ctx, alu);
2889 case nir_op_bcsel: {
2890 /* Handled here to avoid type forced bitcast to int, since bcsel is used for ints and floats.
2891 * Ideally, the back-typing got both sources to match, but if it didn't, explicitly get src1's type */
2892 const struct dxil_value *src1 = get_src_ssa(ctx, alu->src[1].src.ssa, alu->src[1].swizzle[0]);
2893 nir_alu_type src1_type = dxil_type_to_nir_type(dxil_value_get_type(src1));
2894 return emit_select(ctx, alu,
2895 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_bool),
2896 src1,
2897 get_src(ctx, &alu->src[2].src, alu->src[2].swizzle[0], src1_type));
2898 }
2899 default:
2900 /* silence warnings */
2901 ;
2902 }
2903
2904 /* other ops should be scalar */
2905 const struct dxil_value *src[4];
2906 assert(nir_op_infos[alu->op].num_inputs <= 4);
2907 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2908 src[i] = get_alu_src(ctx, alu, i);
2909 if (!src[i])
2910 return false;
2911 }
2912
2913 switch (alu->op) {
2914 case nir_op_iadd:
2915 case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]);
2916
2917 case nir_op_isub:
2918 case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]);
2919
2920 case nir_op_imul:
2921 case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]);
2922
2923 case nir_op_fdiv:
2924 if (alu->def.bit_size == 64)
2925 ctx->mod.feats.dx11_1_double_extensions = 1;
2926 return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]);
2927
2928 case nir_op_idiv:
2929 case nir_op_udiv:
2930 if (nir_src_is_const(alu->src[1].src)) {
2931 /* It's illegal to emit a literal divide by 0 in DXIL */
2932 nir_scalar divisor = nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1);
2933 if (nir_scalar_as_int(divisor) == 0) {
2934 store_alu_dest(ctx, alu, 0,
2935 dxil_module_get_int_const(&ctx->mod, 0, alu->def.bit_size));
2936 return true;
2937 }
2938 }
2939 return emit_binop(ctx, alu, alu->op == nir_op_idiv ? DXIL_BINOP_SDIV : DXIL_BINOP_UDIV, src[0], src[1]);
2940
2941 case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]);
2942 case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2943 case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2944 case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]);
2945 case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]);
2946 case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]);
2947 case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]);
2948 case nir_op_ior: return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]);
2949 case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]);
2950 case nir_op_inot: {
2951 unsigned bit_size = alu->def.bit_size;
2952 intmax_t val = bit_size == 1 ? 1 : -1;
2953 const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size);
2954 return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one);
2955 }
2956 case nir_op_ieq: return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]);
2957 case nir_op_ine: return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]);
2958 case nir_op_ige: return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);
2959 case nir_op_uge: return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]);
2960 case nir_op_ilt: return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]);
2961 case nir_op_ult: return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]);
2962 case nir_op_feq: return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]);
2963 case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]);
2964 case nir_op_flt: return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]);
2965 case nir_op_fge: return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]);
2966 case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]);
2967 case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]);
2968 case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]);
2969 case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]);
2970 case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]);
2971 case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]);
2972 case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);
2973 case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);
2974 case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);
2975 case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);
2976 case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);
2977
2978 case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]);
2979 case nir_op_frcp: {
2980 const struct dxil_value *one;
2981 switch (alu->def.bit_size) {
2982 case 16:
2983 one = dxil_module_get_float16_const(&ctx->mod, 0x3C00);
2984 break;
2985 case 32:
2986 one = dxil_module_get_float_const(&ctx->mod, 1.0f);
2987 break;
2988 case 64:
2989 one = dxil_module_get_double_const(&ctx->mod, 1.0);
2990 break;
2991 default: unreachable("Invalid float size");
2992 }
2993 return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]);
2994 }
2995 case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]);
2996 case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]);
2997 case nir_op_bitfield_reverse: return emit_unary_intin(ctx, alu, DXIL_INTR_BFREV, src[0]);
2998 case nir_op_ufind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_HI, src[0]);
2999 case nir_op_ifind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_SHI, src[0]);
3000 case nir_op_find_lsb: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_LO, src[0]);
3001 case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]);
3002 case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]);
3003 case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]);
3004 case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]);
3005 case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]);
3006 case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);
3007 case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
3008 case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
3009 case nir_op_ffma:
3010 if (alu->def.bit_size == 64)
3011 ctx->mod.feats.dx11_1_double_extensions = 1;
3012 return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);
3013
3014 case nir_op_ibfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_IBFE, src[2], src[1], src[0]);
3015 case nir_op_ubfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_UBFE, src[2], src[1], src[0]);
3016 case nir_op_bitfield_insert: return emit_bitfield_insert(ctx, alu, src[0], src[1], src[2], src[3]);
3017
3018 case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0], false);
3019 case nir_op_unpack_half_2x16_split_y: return emit_f16tof32(ctx, alu, src[0], true);
3020 case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0], src[1]);
3021
3022 case nir_op_sdot_4x8_iadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_I8_PACKED, src[0], src[1], src[2]);
3023 case nir_op_udot_4x8_uadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_U8_PACKED, src[0], src[1], src[2]);
3024
3025 case nir_op_i2i1:
3026 case nir_op_u2u1:
3027 case nir_op_b2i16:
3028 case nir_op_i2i16:
3029 case nir_op_i2imp:
3030 case nir_op_f2i16:
3031 case nir_op_f2imp:
3032 case nir_op_f2u16:
3033 case nir_op_f2ump:
3034 case nir_op_u2u16:
3035 case nir_op_u2f16:
3036 case nir_op_u2fmp:
3037 case nir_op_i2f16:
3038 case nir_op_i2fmp:
3039 case nir_op_f2f16_rtz:
3040 case nir_op_f2f16:
3041 case nir_op_f2fmp:
3042 case nir_op_b2i32:
3043 case nir_op_f2f32:
3044 case nir_op_f2i32:
3045 case nir_op_f2u32:
3046 case nir_op_i2f32:
3047 case nir_op_i2i32:
3048 case nir_op_u2f32:
3049 case nir_op_u2u32:
3050 case nir_op_b2i64:
3051 case nir_op_f2f64:
3052 case nir_op_f2i64:
3053 case nir_op_f2u64:
3054 case nir_op_i2f64:
3055 case nir_op_i2i64:
3056 case nir_op_u2f64:
3057 case nir_op_u2u64:
3058 return emit_cast(ctx, alu, src[0]);
3059
3060 case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);
3061 case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);
3062 case nir_op_b2f64: return emit_b2f64(ctx, alu, src[0]);
3063 default:
3064 log_nir_instr_unsupported(ctx->logger, "Unimplemented ALU instruction",
3065 &alu->instr);
3066 return false;
3067 }
3068 }
3069
3070 static const struct dxil_value *
load_ubo(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * offset,enum overload_type overload)3071 load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,
3072 const struct dxil_value *offset, enum overload_type overload)
3073 {
3074 assert(handle && offset);
3075
3076 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY);
3077 if (!opcode)
3078 return NULL;
3079
3080 const struct dxil_value *args[] = {
3081 opcode, handle, offset
3082 };
3083
3084 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload);
3085 if (!func)
3086 return NULL;
3087 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3088 }
3089
3090 static bool
emit_barrier_impl(struct ntd_context * ctx,nir_variable_mode modes,mesa_scope execution_scope,mesa_scope mem_scope)3091 emit_barrier_impl(struct ntd_context *ctx, nir_variable_mode modes, mesa_scope execution_scope, mesa_scope mem_scope)
3092 {
3093 const struct dxil_value *opcode, *mode;
3094 const struct dxil_func *func;
3095 uint32_t flags = 0;
3096
3097 if (execution_scope == SCOPE_WORKGROUP)
3098 flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;
3099
3100 bool is_compute = ctx->mod.shader_kind == DXIL_COMPUTE_SHADER;
3101
3102 if ((modes & (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image)) &&
3103 (mem_scope > SCOPE_WORKGROUP || !is_compute)) {
3104 flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;
3105 } else {
3106 flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
3107 }
3108
3109 if ((modes & nir_var_mem_shared) && is_compute)
3110 flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE;
3111
3112 func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);
3113 if (!func)
3114 return false;
3115
3116 opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);
3117 if (!opcode)
3118 return false;
3119
3120 mode = dxil_module_get_int32_const(&ctx->mod, flags);
3121 if (!mode)
3122 return false;
3123
3124 const struct dxil_value *args[] = { opcode, mode };
3125
3126 return dxil_emit_call_void(&ctx->mod, func,
3127 args, ARRAY_SIZE(args));
3128 }
3129
3130 static bool
emit_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)3131 emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3132 {
3133 return emit_barrier_impl(ctx,
3134 nir_intrinsic_memory_modes(intr),
3135 nir_intrinsic_execution_scope(intr),
3136 nir_intrinsic_memory_scope(intr));
3137 }
3138
3139 static bool
emit_load_global_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3140 emit_load_global_invocation_id(struct ntd_context *ctx,
3141 nir_intrinsic_instr *intr)
3142 {
3143 nir_component_mask_t comps = nir_def_components_read(&intr->def);
3144
3145 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3146 if (comps & (1 << i)) {
3147 const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
3148 if (!idx)
3149 return false;
3150 const struct dxil_value *globalid = emit_threadid_call(ctx, idx);
3151
3152 if (!globalid)
3153 return false;
3154
3155 store_def(ctx, &intr->def, i, globalid);
3156 }
3157 }
3158 return true;
3159 }
3160
3161 static bool
emit_load_local_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3162 emit_load_local_invocation_id(struct ntd_context *ctx,
3163 nir_intrinsic_instr *intr)
3164 {
3165 nir_component_mask_t comps = nir_def_components_read(&intr->def);
3166
3167 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3168 if (comps & (1 << i)) {
3169 const struct dxil_value
3170 *idx = dxil_module_get_int32_const(&ctx->mod, i);
3171 if (!idx)
3172 return false;
3173 const struct dxil_value
3174 *threadidingroup = emit_threadidingroup_call(ctx, idx);
3175 if (!threadidingroup)
3176 return false;
3177 store_def(ctx, &intr->def, i, threadidingroup);
3178 }
3179 }
3180 return true;
3181 }
3182
3183 static bool
emit_load_local_invocation_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)3184 emit_load_local_invocation_index(struct ntd_context *ctx,
3185 nir_intrinsic_instr *intr)
3186 {
3187 const struct dxil_value
3188 *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx);
3189 if (!flattenedthreadidingroup)
3190 return false;
3191 store_def(ctx, &intr->def, 0, flattenedthreadidingroup);
3192
3193 return true;
3194 }
3195
3196 static bool
emit_load_local_workgroup_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3197 emit_load_local_workgroup_id(struct ntd_context *ctx,
3198 nir_intrinsic_instr *intr)
3199 {
3200 nir_component_mask_t comps = nir_def_components_read(&intr->def);
3201
3202 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3203 if (comps & (1 << i)) {
3204 const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
3205 if (!idx)
3206 return false;
3207 const struct dxil_value *groupid = emit_groupid_call(ctx, idx);
3208 if (!groupid)
3209 return false;
3210 store_def(ctx, &intr->def, i, groupid);
3211 }
3212 }
3213 return true;
3214 }
3215
3216 static const struct dxil_value *
call_unary_external_function(struct ntd_context * ctx,const char * name,int32_t dxil_intr,enum overload_type overload)3217 call_unary_external_function(struct ntd_context *ctx,
3218 const char *name,
3219 int32_t dxil_intr,
3220 enum overload_type overload)
3221 {
3222 const struct dxil_func *func =
3223 dxil_get_function(&ctx->mod, name, overload);
3224 if (!func)
3225 return false;
3226
3227 const struct dxil_value *opcode =
3228 dxil_module_get_int32_const(&ctx->mod, dxil_intr);
3229 if (!opcode)
3230 return false;
3231
3232 const struct dxil_value *args[] = {opcode};
3233
3234 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3235 }
3236
3237 static bool
emit_load_unary_external_function(struct ntd_context * ctx,nir_intrinsic_instr * intr,const char * name,int32_t dxil_intr,nir_alu_type type)3238 emit_load_unary_external_function(struct ntd_context *ctx,
3239 nir_intrinsic_instr *intr, const char *name,
3240 int32_t dxil_intr,
3241 nir_alu_type type)
3242 {
3243 const struct dxil_value *value = call_unary_external_function(ctx, name, dxil_intr,
3244 get_overload(type, intr->def.bit_size));
3245 store_def(ctx, &intr->def, 0, value);
3246
3247 return true;
3248 }
3249
3250 static bool
emit_load_sample_mask_in(struct ntd_context * ctx,nir_intrinsic_instr * intr)3251 emit_load_sample_mask_in(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3252 {
3253 const struct dxil_value *value = call_unary_external_function(ctx,
3254 "dx.op.coverage", DXIL_INTR_COVERAGE, DXIL_I32);
3255
3256 /* Mask coverage with (1 << sample index). Note, done as an AND to handle extrapolation cases. */
3257 if (ctx->mod.info.has_per_sample_input) {
3258 value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND, value,
3259 dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL,
3260 dxil_module_get_int32_const(&ctx->mod, 1),
3261 call_unary_external_function(ctx, "dx.op.sampleIndex", DXIL_INTR_SAMPLE_INDEX, DXIL_I32), 0), 0);
3262 }
3263
3264 store_def(ctx, &intr->def, 0, value);
3265 return true;
3266 }
3267
3268 static bool
emit_load_tess_coord(struct ntd_context * ctx,nir_intrinsic_instr * intr)3269 emit_load_tess_coord(struct ntd_context *ctx,
3270 nir_intrinsic_instr *intr)
3271 {
3272 const struct dxil_func *func =
3273 dxil_get_function(&ctx->mod, "dx.op.domainLocation", DXIL_F32);
3274 if (!func)
3275 return false;
3276
3277 const struct dxil_value *opcode =
3278 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DOMAIN_LOCATION);
3279 if (!opcode)
3280 return false;
3281
3282 unsigned num_coords = ctx->shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 3 : 2;
3283 for (unsigned i = 0; i < num_coords; ++i) {
3284 unsigned component_idx = i;
3285
3286 const struct dxil_value *component = dxil_module_get_int8_const(&ctx->mod, component_idx);
3287 if (!component)
3288 return false;
3289
3290 const struct dxil_value *args[] = { opcode, component };
3291
3292 const struct dxil_value *value =
3293 dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3294 store_def(ctx, &intr->def, i, value);
3295 }
3296
3297 for (unsigned i = num_coords; i < intr->def.num_components; ++i) {
3298 const struct dxil_value *value = dxil_module_get_float_const(&ctx->mod, 0.0f);
3299 store_def(ctx, &intr->def, i, value);
3300 }
3301
3302 return true;
3303 }
3304
3305 static const struct dxil_value *
get_int32_undef(struct dxil_module * m)3306 get_int32_undef(struct dxil_module *m)
3307 {
3308 const struct dxil_type *int32_type =
3309 dxil_module_get_int_type(m, 32);
3310 if (!int32_type)
3311 return NULL;
3312
3313 return dxil_module_get_undef(m, int32_type);
3314 }
3315
3316 static const struct dxil_value *
get_resource_handle(struct ntd_context * ctx,nir_src * src,enum dxil_resource_class class,enum dxil_resource_kind kind)3317 get_resource_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class,
3318 enum dxil_resource_kind kind)
3319 {
3320 /* This source might be one of:
3321 * 1. Constant resource index - just look it up in precomputed handle arrays
3322 * If it's null in that array, create a handle
3323 * 2. A handle from load_vulkan_descriptor - just get the stored SSA value
3324 * 3. Dynamic resource index - create a handle for it here
3325 */
3326 assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32);
3327 nir_const_value *const_block_index = nir_src_as_const_value(*src);
3328 const struct dxil_value *handle_entry = NULL;
3329 if (const_block_index) {
3330 assert(ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN);
3331 switch (kind) {
3332 case DXIL_RESOURCE_KIND_CBUFFER:
3333 handle_entry = ctx->cbv_handles[const_block_index->u32];
3334 break;
3335 case DXIL_RESOURCE_KIND_RAW_BUFFER:
3336 if (class == DXIL_RESOURCE_CLASS_UAV)
3337 handle_entry = ctx->ssbo_handles[const_block_index->u32];
3338 else
3339 handle_entry = ctx->srv_handles[const_block_index->u32];
3340 break;
3341 case DXIL_RESOURCE_KIND_SAMPLER:
3342 handle_entry = ctx->sampler_handles[const_block_index->u32];
3343 break;
3344 default:
3345 if (class == DXIL_RESOURCE_CLASS_UAV)
3346 handle_entry = ctx->image_handles[const_block_index->u32];
3347 else
3348 handle_entry = ctx->srv_handles[const_block_index->u32];
3349 break;
3350 }
3351 }
3352
3353 if (handle_entry)
3354 return handle_entry;
3355
3356 if (nir_src_as_deref(*src) ||
3357 ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3358 return get_src_ssa(ctx, src->ssa, 0);
3359 }
3360
3361 unsigned space = 0;
3362 if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
3363 class == DXIL_RESOURCE_CLASS_UAV) {
3364 if (kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
3365 space = 2;
3366 else
3367 space = 1;
3368 }
3369
3370 /* The base binding here will almost always be zero. The only cases where we end
3371 * up in this type of dynamic indexing are:
3372 * 1. GL UBOs
3373 * 2. GL SSBOs
3374 * 3. CL SSBOs
3375 * In all cases except GL UBOs, the resources are a single zero-based array.
3376 * In that case, the base is 1, because uniforms use 0 and cannot by dynamically
3377 * indexed. All other cases should either fall into static indexing (first early return),
3378 * deref-based dynamic handle creation (images, or Vulkan textures/samplers), or
3379 * load_vulkan_descriptor handle creation.
3380 */
3381 unsigned base_binding = 0;
3382 if (ctx->shader->info.first_ubo_is_default_ubo &&
3383 class == DXIL_RESOURCE_CLASS_CBV)
3384 base_binding = 1;
3385
3386 const struct dxil_value *value = get_src(ctx, src, 0, nir_type_uint);
3387 const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, class,
3388 space, base_binding, value, !const_block_index);
3389
3390 return handle;
3391 }
3392
3393 static const struct dxil_value *
create_image_handle(struct ntd_context * ctx,nir_intrinsic_instr * image_intr)3394 create_image_handle(struct ntd_context *ctx, nir_intrinsic_instr *image_intr)
3395 {
3396 const struct dxil_value *unannotated_handle =
3397 emit_createhandle_heap(ctx, get_src(ctx, &image_intr->src[0], 0, nir_type_uint32), false, true /*TODO: divergence*/);
3398 const struct dxil_value *res_props =
3399 dxil_module_get_uav_res_props_const(&ctx->mod, image_intr);
3400
3401 if (!unannotated_handle || !res_props)
3402 return NULL;
3403
3404 return emit_annotate_handle(ctx, unannotated_handle, res_props);
3405 }
3406
3407 static const struct dxil_value *
create_srv_handle(struct ntd_context * ctx,nir_tex_instr * tex,nir_src * src)3408 create_srv_handle(struct ntd_context *ctx, nir_tex_instr *tex, nir_src *src)
3409 {
3410 const struct dxil_value *unannotated_handle =
3411 emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), false, true /*TODO: divergence*/);
3412 const struct dxil_value *res_props =
3413 dxil_module_get_srv_res_props_const(&ctx->mod, tex);
3414
3415 if (!unannotated_handle || !res_props)
3416 return NULL;
3417
3418 return emit_annotate_handle(ctx, unannotated_handle, res_props);
3419 }
3420
3421 static const struct dxil_value *
create_sampler_handle(struct ntd_context * ctx,bool is_shadow,nir_src * src)3422 create_sampler_handle(struct ntd_context *ctx, bool is_shadow, nir_src *src)
3423 {
3424 const struct dxil_value *unannotated_handle =
3425 emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), true, true /*TODO: divergence*/);
3426 const struct dxil_value *res_props =
3427 dxil_module_get_sampler_res_props_const(&ctx->mod, is_shadow);
3428
3429 if (!unannotated_handle || !res_props)
3430 return NULL;
3431
3432 return emit_annotate_handle(ctx, unannotated_handle, res_props);
3433 }
3434
3435 static bool
emit_load_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3436 emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3437 {
3438 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3439
3440 enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
3441 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3442 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
3443 if (var && var->data.access & ACCESS_NON_WRITEABLE)
3444 class = DXIL_RESOURCE_CLASS_SRV;
3445 }
3446
3447 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
3448 const struct dxil_value *offset =
3449 get_src(ctx, &intr->src[1], 0, nir_type_uint);
3450 if (!int32_undef || !handle || !offset)
3451 return false;
3452
3453 assert(nir_src_bit_size(intr->src[0]) == 32);
3454 assert(nir_intrinsic_dest_components(intr) <= 4);
3455
3456 const struct dxil_value *coord[2] = {
3457 offset,
3458 int32_undef
3459 };
3460
3461 enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint);
3462 const struct dxil_value *load = ctx->mod.minor_version >= 2 ?
3463 emit_raw_bufferload_call(ctx, handle, coord,
3464 overload,
3465 nir_intrinsic_dest_components(intr),
3466 intr->def.bit_size / 8) :
3467 emit_bufferload_call(ctx, handle, coord, overload);
3468 if (!load)
3469 return false;
3470
3471 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3472 const struct dxil_value *val =
3473 dxil_emit_extractval(&ctx->mod, load, i);
3474 if (!val)
3475 return false;
3476 store_def(ctx, &intr->def, i, val);
3477 }
3478 if (intr->def.bit_size == 16)
3479 ctx->mod.feats.native_low_precision = true;
3480 return true;
3481 }
3482
3483 static bool
emit_store_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3484 emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3485 {
3486 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
3487 const struct dxil_value *offset =
3488 get_src(ctx, &intr->src[2], 0, nir_type_uint);
3489 if (!handle || !offset)
3490 return false;
3491
3492 unsigned num_components = nir_src_num_components(intr->src[0]);
3493 assert(num_components <= 4);
3494 if (nir_src_bit_size(intr->src[0]) == 16)
3495 ctx->mod.feats.native_low_precision = true;
3496
3497 nir_alu_type type =
3498 dxil_type_to_nir_type(dxil_value_get_type(get_src_ssa(ctx, intr->src[0].ssa, 0)));
3499 const struct dxil_value *value[4] = { 0 };
3500 for (unsigned i = 0; i < num_components; ++i) {
3501 value[i] = get_src(ctx, &intr->src[0], i, type);
3502 if (!value[i])
3503 return false;
3504 }
3505
3506 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3507 if (!int32_undef)
3508 return false;
3509
3510 const struct dxil_value *coord[2] = {
3511 offset,
3512 int32_undef
3513 };
3514
3515 enum overload_type overload = get_overload(type, intr->src[0].ssa->bit_size);
3516 if (num_components < 4) {
3517 const struct dxil_value *value_undef = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0]));
3518 if (!value_undef)
3519 return false;
3520
3521 for (int i = num_components; i < 4; ++i)
3522 value[i] = value_undef;
3523 }
3524
3525 const struct dxil_value *write_mask =
3526 dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
3527 if (!write_mask)
3528 return false;
3529
3530 return ctx->mod.minor_version >= 2 ?
3531 emit_raw_bufferstore_call(ctx, handle, coord, value, write_mask, overload, intr->src[0].ssa->bit_size / 8) :
3532 emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
3533 }
3534
3535 static bool
emit_load_ubo_vec4(struct ntd_context * ctx,nir_intrinsic_instr * intr)3536 emit_load_ubo_vec4(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3537 {
3538 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
3539 const struct dxil_value *offset =
3540 get_src(ctx, &intr->src[1], 0, nir_type_uint);
3541
3542 if (!handle || !offset)
3543 return false;
3544
3545 enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint);
3546 const struct dxil_value *agg = load_ubo(ctx, handle, offset, overload);
3547 if (!agg)
3548 return false;
3549
3550 unsigned first_component = nir_intrinsic_has_component(intr) ?
3551 nir_intrinsic_component(intr) : 0;
3552 for (unsigned i = 0; i < intr->def.num_components; i++)
3553 store_def(ctx, &intr->def, i,
3554 dxil_emit_extractval(&ctx->mod, agg, i + first_component));
3555
3556 if (intr->def.bit_size == 16)
3557 ctx->mod.feats.native_low_precision = true;
3558 return true;
3559 }
3560
3561 /* Need to add patch-ness as a matching parameter, since driver_location is *not* unique
3562 * between control points and patch variables in HS/DS
3563 */
3564 static nir_variable *
find_patch_matching_variable_by_driver_location(nir_shader * s,nir_variable_mode mode,unsigned driver_location,bool patch)3565 find_patch_matching_variable_by_driver_location(nir_shader *s, nir_variable_mode mode, unsigned driver_location, bool patch)
3566 {
3567 nir_foreach_variable_with_modes(var, s, mode) {
3568 if (var->data.driver_location == driver_location &&
3569 var->data.patch == patch)
3570 return var;
3571 }
3572 return NULL;
3573 }
3574
3575 static bool
emit_store_output_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3576 emit_store_output_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3577 {
3578 assert(intr->intrinsic == nir_intrinsic_store_output ||
3579 ctx->mod.shader_kind == DXIL_HULL_SHADER);
3580 bool is_patch_constant = intr->intrinsic == nir_intrinsic_store_output &&
3581 ctx->mod.shader_kind == DXIL_HULL_SHADER;
3582 nir_alu_type out_type = nir_intrinsic_src_type(intr);
3583 enum overload_type overload = get_overload(out_type, intr->src[0].ssa->bit_size);
3584 const struct dxil_func *func = dxil_get_function(&ctx->mod, is_patch_constant ?
3585 "dx.op.storePatchConstant" : "dx.op.storeOutput",
3586 overload);
3587
3588 if (!func)
3589 return false;
3590
3591 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, is_patch_constant ?
3592 DXIL_INTR_STORE_PATCH_CONSTANT : DXIL_INTR_STORE_OUTPUT);
3593 const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr));
3594 unsigned row_index = intr->intrinsic == nir_intrinsic_store_output ? 1 : 2;
3595
3596 /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3597 * generation, so muck with them here too.
3598 */
3599 nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3600 bool is_tess_level = is_patch_constant &&
3601 (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3602 semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3603
3604 const struct dxil_value *row = NULL;
3605 const struct dxil_value *col = NULL;
3606 if (is_tess_level)
3607 col = dxil_module_get_int8_const(&ctx->mod, 0);
3608 else
3609 row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3610
3611 bool success = true;
3612 uint32_t writemask = nir_intrinsic_write_mask(intr);
3613
3614 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_out, nir_intrinsic_base(intr), is_patch_constant);
3615 unsigned var_base_component = var->data.location_frac;
3616 unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3617
3618 if (ctx->mod.minor_validator >= 5) {
3619 struct dxil_signature_record *sig_rec = is_patch_constant ?
3620 &ctx->mod.patch_consts[nir_intrinsic_base(intr)] :
3621 &ctx->mod.outputs[nir_intrinsic_base(intr)];
3622 unsigned comp_size = intr->src[0].ssa->bit_size == 64 ? 2 : 1;
3623 unsigned comp_mask = 0;
3624 if (is_tess_level)
3625 comp_mask = 1;
3626 else if (comp_size == 1)
3627 comp_mask = writemask << var_base_component;
3628 else {
3629 for (unsigned i = 0; i < intr->num_components; ++i)
3630 if ((writemask & (1 << i)))
3631 comp_mask |= 3 << ((i + var_base_component) * comp_size);
3632 }
3633 for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3634 sig_rec->elements[r].never_writes_mask &= ~comp_mask;
3635
3636 if (!nir_src_is_const(intr->src[row_index])) {
3637 struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3638 &ctx->mod.psv_patch_consts[nir_intrinsic_base(intr)] :
3639 &ctx->mod.psv_outputs[nir_intrinsic_base(intr)];
3640 psv_rec->dynamic_mask_and_stream |= comp_mask;
3641 }
3642 }
3643
3644 for (unsigned i = 0; i < intr->num_components && success; ++i) {
3645 if (writemask & (1 << i)) {
3646 if (is_tess_level)
3647 row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3648 else
3649 col = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3650 const struct dxil_value *value = get_src(ctx, &intr->src[0], i, out_type);
3651 if (!col || !row || !value)
3652 return false;
3653
3654 const struct dxil_value *args[] = {
3655 opcode, output_id, row, col, value
3656 };
3657 success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3658 }
3659 }
3660
3661 return success;
3662 }
3663
3664 static bool
emit_load_input_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3665 emit_load_input_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3666 {
3667 bool attr_at_vertex = false;
3668 if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER &&
3669 ctx->opts->interpolate_at_vertex &&
3670 ctx->opts->provoking_vertex != 0 &&
3671 (nir_intrinsic_dest_type(intr) & nir_type_float)) {
3672 nir_variable *var = nir_find_variable_with_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr));
3673
3674 attr_at_vertex = var && var->data.interpolation == INTERP_MODE_FLAT;
3675 }
3676
3677 bool is_patch_constant = (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER &&
3678 intr->intrinsic == nir_intrinsic_load_input) ||
3679 (ctx->mod.shader_kind == DXIL_HULL_SHADER &&
3680 intr->intrinsic == nir_intrinsic_load_output);
3681 bool is_output_control_point = intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3682
3683 unsigned opcode_val;
3684 const char *func_name;
3685 if (attr_at_vertex) {
3686 opcode_val = DXIL_INTR_ATTRIBUTE_AT_VERTEX;
3687 func_name = "dx.op.attributeAtVertex";
3688 if (ctx->mod.minor_validator >= 6)
3689 ctx->mod.feats.barycentrics = 1;
3690 } else if (is_patch_constant) {
3691 opcode_val = DXIL_INTR_LOAD_PATCH_CONSTANT;
3692 func_name = "dx.op.loadPatchConstant";
3693 } else if (is_output_control_point) {
3694 opcode_val = DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT;
3695 func_name = "dx.op.loadOutputControlPoint";
3696 } else {
3697 opcode_val = DXIL_INTR_LOAD_INPUT;
3698 func_name = "dx.op.loadInput";
3699 }
3700
3701 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3702 if (!opcode)
3703 return false;
3704
3705 const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod,
3706 is_patch_constant || is_output_control_point ?
3707 nir_intrinsic_base(intr) :
3708 ctx->mod.input_mappings[nir_intrinsic_base(intr)]);
3709 if (!input_id)
3710 return false;
3711
3712 bool is_per_vertex =
3713 intr->intrinsic == nir_intrinsic_load_per_vertex_input ||
3714 intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3715 int row_index = is_per_vertex ? 1 : 0;
3716 const struct dxil_value *vertex_id = NULL;
3717 if (!is_patch_constant) {
3718 if (is_per_vertex) {
3719 vertex_id = get_src(ctx, &intr->src[0], 0, nir_type_int);
3720 } else if (attr_at_vertex) {
3721 vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex);
3722 } else {
3723 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
3724 if (!int32_type)
3725 return false;
3726
3727 vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
3728 }
3729 if (!vertex_id)
3730 return false;
3731 }
3732
3733 /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3734 * generation, so muck with them here too.
3735 */
3736 nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3737 bool is_tess_level = is_patch_constant &&
3738 (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3739 semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3740
3741 const struct dxil_value *row = NULL;
3742 const struct dxil_value *comp = NULL;
3743 if (is_tess_level)
3744 comp = dxil_module_get_int8_const(&ctx->mod, 0);
3745 else
3746 row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3747
3748 nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3749 enum overload_type overload = get_overload(out_type, intr->def.bit_size);
3750
3751 const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, overload);
3752
3753 if (!func)
3754 return false;
3755
3756 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), is_patch_constant);
3757 unsigned var_base_component = var ? var->data.location_frac : 0;
3758 unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3759
3760 if (ctx->mod.minor_validator >= 5 &&
3761 !is_output_control_point &&
3762 intr->intrinsic != nir_intrinsic_load_output) {
3763 struct dxil_signature_record *sig_rec = is_patch_constant ?
3764 &ctx->mod.patch_consts[nir_intrinsic_base(intr)] :
3765 &ctx->mod.inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3766 unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1;
3767 unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3768 comp_mask <<= (var_base_component * comp_size);
3769 if (is_tess_level)
3770 comp_mask = 1;
3771 for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3772 sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3773
3774 if (!nir_src_is_const(intr->src[row_index])) {
3775 struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3776 &ctx->mod.psv_patch_consts[nir_intrinsic_base(intr)] :
3777 &ctx->mod.psv_inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3778 psv_rec->dynamic_mask_and_stream |= comp_mask;
3779 }
3780 }
3781
3782 for (unsigned i = 0; i < intr->num_components; ++i) {
3783 if (is_tess_level)
3784 row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3785 else
3786 comp = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3787
3788 if (!row || !comp)
3789 return false;
3790
3791 const struct dxil_value *args[] = {
3792 opcode, input_id, row, comp, vertex_id
3793 };
3794
3795 unsigned num_args = ARRAY_SIZE(args) - (is_patch_constant ? 1 : 0);
3796 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3797 if (!retval)
3798 return false;
3799 store_def(ctx, &intr->def, i, retval);
3800 }
3801 return true;
3802 }
3803
3804 static bool
emit_load_interpolated_input(struct ntd_context * ctx,nir_intrinsic_instr * intr)3805 emit_load_interpolated_input(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3806 {
3807 nir_intrinsic_instr *barycentric = nir_src_as_intrinsic(intr->src[0]);
3808
3809 const struct dxil_value *args[6] = { 0 };
3810
3811 unsigned opcode_val;
3812 const char *func_name;
3813 unsigned num_args;
3814 switch (barycentric->intrinsic) {
3815 case nir_intrinsic_load_barycentric_at_offset:
3816 opcode_val = DXIL_INTR_EVAL_SNAPPED;
3817 func_name = "dx.op.evalSnapped";
3818 num_args = 6;
3819 for (unsigned i = 0; i < 2; ++i) {
3820 const struct dxil_value *float_offset = get_src(ctx, &barycentric->src[0], i, nir_type_float);
3821 /* GLSL uses [-0.5f, 0.5f), DXIL uses (-8, 7) */
3822 const struct dxil_value *offset_16 = dxil_emit_binop(&ctx->mod,
3823 DXIL_BINOP_MUL, float_offset, dxil_module_get_float_const(&ctx->mod, 16.0f), 0);
3824 args[i + 4] = dxil_emit_cast(&ctx->mod, DXIL_CAST_FPTOSI,
3825 dxil_module_get_int_type(&ctx->mod, 32), offset_16);
3826 }
3827 break;
3828 case nir_intrinsic_load_barycentric_pixel:
3829 opcode_val = DXIL_INTR_EVAL_SNAPPED;
3830 func_name = "dx.op.evalSnapped";
3831 num_args = 6;
3832 args[4] = args[5] = dxil_module_get_int32_const(&ctx->mod, 0);
3833 break;
3834 case nir_intrinsic_load_barycentric_at_sample:
3835 opcode_val = DXIL_INTR_EVAL_SAMPLE_INDEX;
3836 func_name = "dx.op.evalSampleIndex";
3837 num_args = 5;
3838 args[4] = get_src(ctx, &barycentric->src[0], 0, nir_type_int);
3839 break;
3840 case nir_intrinsic_load_barycentric_centroid:
3841 opcode_val = DXIL_INTR_EVAL_CENTROID;
3842 func_name = "dx.op.evalCentroid";
3843 num_args = 4;
3844 break;
3845 default:
3846 unreachable("Unsupported interpolation barycentric intrinsic");
3847 }
3848 args[0] = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3849 args[1] = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr));
3850 args[2] = get_src(ctx, &intr->src[1], 0, nir_type_int);
3851
3852 const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, DXIL_F32);
3853
3854 if (!func)
3855 return false;
3856
3857 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), false);
3858 unsigned var_base_component = var ? var->data.location_frac : 0;
3859 unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3860
3861 if (ctx->mod.minor_validator >= 5) {
3862 struct dxil_signature_record *sig_rec =
3863 &ctx->mod.inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3864 unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1;
3865 unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3866 comp_mask <<= (var_base_component * comp_size);
3867 for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3868 sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3869
3870 if (!nir_src_is_const(intr->src[1])) {
3871 struct dxil_psv_signature_element *psv_rec =
3872 &ctx->mod.psv_inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3873 psv_rec->dynamic_mask_and_stream |= comp_mask;
3874 }
3875 }
3876
3877 for (unsigned i = 0; i < intr->num_components; ++i) {
3878 args[3] = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3879
3880 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3881 if (!retval)
3882 return false;
3883 store_def(ctx, &intr->def, i, retval);
3884 }
3885 return true;
3886 }
3887
3888 static const struct dxil_value *
deref_to_gep(struct ntd_context * ctx,nir_deref_instr * deref)3889 deref_to_gep(struct ntd_context *ctx, nir_deref_instr *deref)
3890 {
3891 nir_deref_path path;
3892 nir_deref_path_init(&path, deref, ctx->ralloc_ctx);
3893 assert(path.path[0]->deref_type == nir_deref_type_var);
3894 uint32_t count = 0;
3895 while (path.path[count])
3896 ++count;
3897
3898 const struct dxil_value **gep_indices = ralloc_array(ctx->ralloc_ctx,
3899 const struct dxil_value *,
3900 count + 1);
3901 nir_variable *var = path.path[0]->var;
3902 const struct dxil_value **var_array;
3903 switch (deref->modes) {
3904 case nir_var_mem_constant: var_array = ctx->consts; break;
3905 case nir_var_mem_shared: var_array = ctx->sharedvars; break;
3906 case nir_var_function_temp: var_array = ctx->scratchvars; break;
3907 default: unreachable("Invalid deref mode");
3908 }
3909 gep_indices[0] = var_array[var->data.driver_location];
3910
3911 for (uint32_t i = 0; i < count; ++i)
3912 gep_indices[i + 1] = get_src_ssa(ctx, &path.path[i]->def, 0);
3913
3914 return dxil_emit_gep_inbounds(&ctx->mod, gep_indices, count + 1);
3915 }
3916
3917 static bool
emit_load_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3918 emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3919 {
3920 const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3921 if (!ptr)
3922 return false;
3923
3924 const struct dxil_value *retval =
3925 dxil_emit_load(&ctx->mod, ptr, intr->def.bit_size / 8, false);
3926 if (!retval)
3927 return false;
3928
3929 store_def(ctx, &intr->def, 0, retval);
3930 return true;
3931 }
3932
3933 static bool
emit_store_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3934 emit_store_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3935 {
3936 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
3937 const struct dxil_value *ptr = deref_to_gep(ctx, deref);
3938 if (!ptr)
3939 return false;
3940
3941 const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_get_nir_type_for_glsl_type(deref->type));
3942 return dxil_emit_store(&ctx->mod, value, ptr, nir_src_bit_size(intr->src[1]) / 8, false);
3943 }
3944
3945 static bool
emit_atomic_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3946 emit_atomic_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3947 {
3948 const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3949 if (!ptr)
3950 return false;
3951
3952 const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3953 if (!value)
3954 return false;
3955
3956 enum dxil_rmw_op dxil_op = nir_atomic_to_dxil_rmw(nir_intrinsic_atomic_op(intr));
3957 const struct dxil_value *retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, dxil_op, false,
3958 DXIL_ATOMIC_ORDERING_ACQREL,
3959 DXIL_SYNC_SCOPE_CROSSTHREAD);
3960 if (!retval)
3961 return false;
3962
3963 store_def(ctx, &intr->def, 0, retval);
3964 return true;
3965 }
3966
3967 static bool
emit_atomic_deref_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)3968 emit_atomic_deref_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3969 {
3970 const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3971 if (!ptr)
3972 return false;
3973
3974 const struct dxil_value *cmp = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3975 const struct dxil_value *value = get_src(ctx, &intr->src[2], 0, nir_type_uint);
3976 if (!value)
3977 return false;
3978
3979 const struct dxil_value *retval = dxil_emit_cmpxchg(&ctx->mod, cmp, value, ptr, false,
3980 DXIL_ATOMIC_ORDERING_ACQREL,
3981 DXIL_SYNC_SCOPE_CROSSTHREAD);
3982 if (!retval)
3983 return false;
3984
3985 store_def(ctx, &intr->def, 0, retval);
3986 return true;
3987 }
3988
3989 static bool
emit_discard_if_with_value(struct ntd_context * ctx,const struct dxil_value * value)3990 emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value)
3991 {
3992 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD);
3993 if (!opcode)
3994 return false;
3995
3996 const struct dxil_value *args[] = {
3997 opcode,
3998 value
3999 };
4000
4001 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE);
4002 if (!func)
4003 return false;
4004
4005 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4006 }
4007
4008 static bool
emit_discard_if(struct ntd_context * ctx,nir_intrinsic_instr * intr)4009 emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4010 {
4011 const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool);
4012 if (!value)
4013 return false;
4014
4015 return emit_discard_if_with_value(ctx, value);
4016 }
4017
4018 static bool
emit_discard(struct ntd_context * ctx)4019 emit_discard(struct ntd_context *ctx)
4020 {
4021 const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true);
4022 return emit_discard_if_with_value(ctx, value);
4023 }
4024
4025 static bool
emit_emit_vertex(struct ntd_context * ctx,nir_intrinsic_instr * intr)4026 emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4027 {
4028 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM);
4029 const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
4030 if (!opcode || !stream_id)
4031 return false;
4032
4033 const struct dxil_value *args[] = {
4034 opcode,
4035 stream_id
4036 };
4037
4038 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE);
4039 if (!func)
4040 return false;
4041
4042 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4043 }
4044
4045 static bool
emit_end_primitive(struct ntd_context * ctx,nir_intrinsic_instr * intr)4046 emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4047 {
4048 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM);
4049 const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
4050 if (!opcode || !stream_id)
4051 return false;
4052
4053 const struct dxil_value *args[] = {
4054 opcode,
4055 stream_id
4056 };
4057
4058 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE);
4059 if (!func)
4060 return false;
4061
4062 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4063 }
4064
4065 static bool
emit_image_store(struct ntd_context * ctx,nir_intrinsic_instr * intr)4066 emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4067 {
4068 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_store ?
4069 create_image_handle(ctx, intr) :
4070 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4071 if (!handle)
4072 return false;
4073
4074 bool is_array = false;
4075 if (intr->intrinsic == nir_intrinsic_image_deref_store)
4076 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4077 else
4078 is_array = nir_intrinsic_image_array(intr);
4079
4080 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4081 if (!int32_undef)
4082 return false;
4083
4084 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4085 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_store ?
4086 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4087 nir_intrinsic_image_dim(intr);
4088 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4089 if (is_array)
4090 ++num_coords;
4091
4092 assert(num_coords <= nir_src_num_components(intr->src[1]));
4093 for (unsigned i = 0; i < num_coords; ++i) {
4094 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4095 if (!coord[i])
4096 return false;
4097 }
4098
4099 nir_alu_type in_type = nir_intrinsic_src_type(intr);
4100 enum overload_type overload = get_overload(in_type, 32);
4101
4102 assert(nir_src_bit_size(intr->src[3]) == 32);
4103 unsigned num_components = nir_src_num_components(intr->src[3]);
4104 assert(num_components <= 4);
4105 const struct dxil_value *value[4];
4106 for (unsigned i = 0; i < num_components; ++i) {
4107 value[i] = get_src(ctx, &intr->src[3], i, in_type);
4108 if (!value[i])
4109 return false;
4110 }
4111
4112 for (int i = num_components; i < 4; ++i)
4113 value[i] = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0]));
4114
4115 const struct dxil_value *write_mask =
4116 dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
4117 if (!write_mask)
4118 return false;
4119
4120 if (image_dim == GLSL_SAMPLER_DIM_BUF) {
4121 coord[1] = int32_undef;
4122 return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
4123 } else
4124 return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload);
4125 }
4126
4127 static bool
emit_image_load(struct ntd_context * ctx,nir_intrinsic_instr * intr)4128 emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4129 {
4130 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_load ?
4131 create_image_handle(ctx, intr) :
4132 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4133 if (!handle)
4134 return false;
4135
4136 bool is_array = false;
4137 if (intr->intrinsic == nir_intrinsic_image_deref_load)
4138 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4139 else
4140 is_array = nir_intrinsic_image_array(intr);
4141
4142 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4143 if (!int32_undef)
4144 return false;
4145
4146 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4147 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_load ?
4148 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4149 nir_intrinsic_image_dim(intr);
4150 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4151 if (is_array)
4152 ++num_coords;
4153
4154 assert(num_coords <= nir_src_num_components(intr->src[1]));
4155 for (unsigned i = 0; i < num_coords; ++i) {
4156 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4157 if (!coord[i])
4158 return false;
4159 }
4160
4161 nir_alu_type out_type = nir_intrinsic_dest_type(intr);
4162 enum overload_type overload = get_overload(out_type, 32);
4163
4164 const struct dxil_value *load_result;
4165 if (image_dim == GLSL_SAMPLER_DIM_BUF) {
4166 coord[1] = int32_undef;
4167 load_result = emit_bufferload_call(ctx, handle, coord, overload);
4168 } else
4169 load_result = emit_textureload_call(ctx, handle, coord, overload);
4170
4171 if (!load_result)
4172 return false;
4173
4174 assert(intr->def.bit_size == 32);
4175 unsigned num_components = intr->def.num_components;
4176 assert(num_components <= 4);
4177 for (unsigned i = 0; i < num_components; ++i) {
4178 const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i);
4179 if (!component)
4180 return false;
4181 store_def(ctx, &intr->def, i, component);
4182 }
4183
4184 if (util_format_get_nr_components(nir_intrinsic_format(intr)) > 1)
4185 ctx->mod.feats.typed_uav_load_additional_formats = true;
4186
4187 return true;
4188 }
4189
4190 static bool
emit_image_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4191 emit_image_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4192 {
4193 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic ?
4194 create_image_handle(ctx, intr) :
4195 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4196 if (!handle)
4197 return false;
4198
4199 bool is_array = false;
4200 if (intr->intrinsic == nir_intrinsic_image_deref_atomic)
4201 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4202 else
4203 is_array = nir_intrinsic_image_array(intr);
4204
4205 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4206 if (!int32_undef)
4207 return false;
4208
4209 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4210 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic ?
4211 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4212 nir_intrinsic_image_dim(intr);
4213 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4214 if (is_array)
4215 ++num_coords;
4216
4217 assert(num_coords <= nir_src_num_components(intr->src[1]));
4218 for (unsigned i = 0; i < num_coords; ++i) {
4219 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4220 if (!coord[i])
4221 return false;
4222 }
4223
4224 nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr);
4225 enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op);
4226 nir_alu_type type = nir_atomic_op_type(nir_op);
4227 const struct dxil_value *value = get_src(ctx, &intr->src[3], 0, type);
4228 if (!value)
4229 return false;
4230
4231 const struct dxil_value *retval =
4232 emit_atomic_binop(ctx, handle, dxil_op, coord, value);
4233
4234 if (!retval)
4235 return false;
4236
4237 store_def(ctx, &intr->def, 0, retval);
4238 return true;
4239 }
4240
4241 static bool
emit_image_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4242 emit_image_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4243 {
4244 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic_swap ?
4245 create_image_handle(ctx, intr) :
4246 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4247 if (!handle)
4248 return false;
4249
4250 bool is_array = false;
4251 if (intr->intrinsic == nir_intrinsic_image_deref_atomic_swap)
4252 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4253 else
4254 is_array = nir_intrinsic_image_array(intr);
4255
4256 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4257 if (!int32_undef)
4258 return false;
4259
4260 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4261 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ?
4262 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4263 nir_intrinsic_image_dim(intr);
4264 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4265 if (is_array)
4266 ++num_coords;
4267
4268 assert(num_coords <= nir_src_num_components(intr->src[1]));
4269 for (unsigned i = 0; i < num_coords; ++i) {
4270 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4271 if (!coord[i])
4272 return false;
4273 }
4274
4275 const struct dxil_value *cmpval = get_src(ctx, &intr->src[3], 0, nir_type_uint);
4276 const struct dxil_value *newval = get_src(ctx, &intr->src[4], 0, nir_type_uint);
4277 if (!cmpval || !newval)
4278 return false;
4279
4280 const struct dxil_value *retval =
4281 emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
4282
4283 if (!retval)
4284 return false;
4285
4286 store_def(ctx, &intr->def, 0, retval);
4287 return true;
4288 }
4289
4290 struct texop_parameters {
4291 const struct dxil_value *tex;
4292 const struct dxil_value *sampler;
4293 const struct dxil_value *bias, *lod_or_sample, *min_lod;
4294 const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3];
4295 const struct dxil_value *cmp;
4296 enum overload_type overload;
4297 };
4298
4299 static const struct dxil_value *
emit_texture_size(struct ntd_context * ctx,struct texop_parameters * params)4300 emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params)
4301 {
4302 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE);
4303 if (!func)
4304 return false;
4305
4306 const struct dxil_value *args[] = {
4307 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE),
4308 params->tex,
4309 params->lod_or_sample
4310 };
4311
4312 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4313 }
4314
4315 static bool
emit_image_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)4316 emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4317 {
4318 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_size ?
4319 create_image_handle(ctx, intr) :
4320 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4321 if (!handle)
4322 return false;
4323
4324 enum glsl_sampler_dim sampler_dim = intr->intrinsic == nir_intrinsic_image_deref_size ?
4325 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4326 nir_intrinsic_image_dim(intr);
4327 const struct dxil_value *lod = sampler_dim == GLSL_SAMPLER_DIM_BUF ?
4328 dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)) :
4329 get_src(ctx, &intr->src[1], 0, nir_type_uint);
4330 if (!lod)
4331 return false;
4332
4333 struct texop_parameters params = {
4334 .tex = handle,
4335 .lod_or_sample = lod
4336 };
4337 const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms);
4338 if (!dimensions)
4339 return false;
4340
4341 for (unsigned i = 0; i < intr->def.num_components; ++i) {
4342 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i);
4343 store_def(ctx, &intr->def, i, retval);
4344 }
4345
4346 return true;
4347 }
4348
4349 static bool
emit_get_ssbo_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)4350 emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4351 {
4352 enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
4353 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
4354 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4355 if (var && var->data.access & ACCESS_NON_WRITEABLE)
4356 class = DXIL_RESOURCE_CLASS_SRV;
4357 }
4358
4359 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
4360 if (!handle)
4361 return false;
4362
4363 struct texop_parameters params = {
4364 .tex = handle,
4365 .lod_or_sample = dxil_module_get_undef(
4366 &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32))
4367 };
4368
4369 const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms);
4370 if (!dimensions)
4371 return false;
4372
4373 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0);
4374 store_def(ctx, &intr->def, 0, retval);
4375
4376 return true;
4377 }
4378
4379 static bool
emit_ssbo_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4380 emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4381 {
4382 nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr);
4383 enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op);
4384 nir_alu_type type = nir_atomic_op_type(nir_op);
4385 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
4386 const struct dxil_value *offset =
4387 get_src(ctx, &intr->src[1], 0, nir_type_uint);
4388 const struct dxil_value *value =
4389 get_src(ctx, &intr->src[2], 0, type);
4390
4391 if (!value || !handle || !offset)
4392 return false;
4393
4394 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4395 if (!int32_undef)
4396 return false;
4397
4398 const struct dxil_value *coord[3] = {
4399 offset, int32_undef, int32_undef
4400 };
4401
4402 const struct dxil_value *retval =
4403 emit_atomic_binop(ctx, handle, dxil_op, coord, value);
4404
4405 if (!retval)
4406 return false;
4407
4408 store_def(ctx, &intr->def, 0, retval);
4409 return true;
4410 }
4411
4412 static bool
emit_ssbo_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4413 emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4414 {
4415 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
4416 const struct dxil_value *offset =
4417 get_src(ctx, &intr->src[1], 0, nir_type_uint);
4418 const struct dxil_value *cmpval =
4419 get_src(ctx, &intr->src[2], 0, nir_type_int);
4420 const struct dxil_value *newval =
4421 get_src(ctx, &intr->src[3], 0, nir_type_int);
4422
4423 if (!cmpval || !newval || !handle || !offset)
4424 return false;
4425
4426 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4427 if (!int32_undef)
4428 return false;
4429
4430 const struct dxil_value *coord[3] = {
4431 offset, int32_undef, int32_undef
4432 };
4433
4434 const struct dxil_value *retval =
4435 emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
4436
4437 if (!retval)
4438 return false;
4439
4440 store_def(ctx, &intr->def, 0, retval);
4441 return true;
4442 }
4443
4444 static bool
emit_vulkan_resource_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)4445 emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4446 {
4447 unsigned int binding = nir_intrinsic_binding(intr);
4448
4449 bool const_index = nir_src_is_const(intr->src[0]);
4450 if (const_index) {
4451 binding += nir_src_as_const_value(intr->src[0])->u32;
4452 }
4453
4454 const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding);
4455 if (!index_value)
4456 return false;
4457
4458 if (!const_index) {
4459 const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4460 if (!offset)
4461 return false;
4462
4463 index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0);
4464 if (!index_value)
4465 return false;
4466 }
4467
4468 store_def(ctx, &intr->def, 0, index_value);
4469 store_def(ctx, &intr->def, 1, dxil_module_get_int32_const(&ctx->mod, 0));
4470 return true;
4471 }
4472
4473 static bool
emit_load_vulkan_descriptor(struct ntd_context * ctx,nir_intrinsic_instr * intr)4474 emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4475 {
4476 nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]);
4477 const struct dxil_value *handle = NULL;
4478
4479 enum dxil_resource_class resource_class;
4480 enum dxil_resource_kind resource_kind;
4481 switch (nir_intrinsic_desc_type(intr)) {
4482 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4483 resource_class = DXIL_RESOURCE_CLASS_CBV;
4484 resource_kind = DXIL_RESOURCE_KIND_CBUFFER;
4485 break;
4486 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4487 resource_class = DXIL_RESOURCE_CLASS_UAV;
4488 resource_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
4489 break;
4490 default:
4491 unreachable("unknown descriptor type");
4492 return false;
4493 }
4494
4495 if (index && index->intrinsic == nir_intrinsic_vulkan_resource_index) {
4496 unsigned binding = nir_intrinsic_binding(index);
4497 unsigned space = nir_intrinsic_desc_set(index);
4498
4499 /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */
4500 assert(space < 32);
4501
4502 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4503 if (resource_class == DXIL_RESOURCE_CLASS_UAV &&
4504 (var->data.access & ACCESS_NON_WRITEABLE))
4505 resource_class = DXIL_RESOURCE_CLASS_SRV;
4506
4507 const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4508 if (!index_value)
4509 return false;
4510
4511 handle = emit_createhandle_call_dynamic(ctx, resource_class, space, binding, index_value, false);
4512 } else {
4513 const struct dxil_value *heap_index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4514 if (!heap_index_value)
4515 return false;
4516 const struct dxil_value *unannotated_handle = emit_createhandle_heap(ctx, heap_index_value, false, true);
4517 const struct dxil_value *res_props = dxil_module_get_buffer_res_props_const(&ctx->mod, resource_class, resource_kind);
4518 if (!unannotated_handle || !res_props)
4519 return false;
4520 handle = emit_annotate_handle(ctx, unannotated_handle, res_props);
4521 }
4522
4523 store_ssa_def(ctx, &intr->def, 0, handle);
4524 store_def(ctx, &intr->def, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32));
4525
4526 return true;
4527 }
4528
4529 static bool
emit_load_sample_pos_from_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4530 emit_load_sample_pos_from_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4531 {
4532 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.renderTargetGetSamplePosition", DXIL_NONE);
4533 if (!func)
4534 return false;
4535
4536 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION);
4537 if (!opcode)
4538 return false;
4539
4540 const struct dxil_value *args[] = {
4541 opcode,
4542 get_src(ctx, &intr->src[0], 0, nir_type_uint32),
4543 };
4544 if (!args[1])
4545 return false;
4546
4547 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4548 if (!v)
4549 return false;
4550
4551 for (unsigned i = 0; i < 2; ++i) {
4552 /* GL coords go from 0 -> 1, D3D from -0.5 -> 0.5 */
4553 const struct dxil_value *coord = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4554 dxil_emit_extractval(&ctx->mod, v, i),
4555 dxil_module_get_float_const(&ctx->mod, 0.5f), 0);
4556 store_def(ctx, &intr->def, i, coord);
4557 }
4558 return true;
4559 }
4560
4561 static bool
emit_load_sample_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4562 emit_load_sample_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4563 {
4564 assert(ctx->mod.info.has_per_sample_input ||
4565 intr->intrinsic == nir_intrinsic_load_sample_id_no_per_sample);
4566
4567 if (ctx->mod.info.has_per_sample_input)
4568 return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex",
4569 DXIL_INTR_SAMPLE_INDEX, nir_type_int);
4570
4571 store_def(ctx, &intr->def, 0, dxil_module_get_int32_const(&ctx->mod, 0));
4572 return true;
4573 }
4574
4575 static bool
emit_read_first_invocation(struct ntd_context * ctx,nir_intrinsic_instr * intr)4576 emit_read_first_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4577 {
4578 ctx->mod.feats.wave_ops = 1;
4579 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveReadLaneFirst",
4580 get_overload(nir_type_uint, intr->def.bit_size));
4581 const struct dxil_value *args[] = {
4582 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_READ_LANE_FIRST),
4583 get_src(ctx, intr->src, 0, nir_type_uint),
4584 };
4585 if (!func || !args[0] || !args[1])
4586 return false;
4587
4588 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4589 if (!ret)
4590 return false;
4591 store_def(ctx, &intr->def, 0, ret);
4592 return true;
4593 }
4594
4595 static bool
emit_read_invocation(struct ntd_context * ctx,nir_intrinsic_instr * intr)4596 emit_read_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4597 {
4598 ctx->mod.feats.wave_ops = 1;
4599 bool quad = intr->intrinsic == nir_intrinsic_quad_broadcast;
4600 const struct dxil_func *func = dxil_get_function(&ctx->mod, quad ? "dx.op.quadReadLaneAt" : "dx.op.waveReadLaneAt",
4601 get_overload(nir_type_uint, intr->def.bit_size));
4602 const struct dxil_value *args[] = {
4603 dxil_module_get_int32_const(&ctx->mod, quad ? DXIL_INTR_QUAD_READ_LANE_AT : DXIL_INTR_WAVE_READ_LANE_AT),
4604 get_src(ctx, &intr->src[0], 0, nir_type_uint),
4605 get_src(ctx, &intr->src[1], 0, nir_type_uint),
4606 };
4607 if (!func || !args[0] || !args[1] || !args[2])
4608 return false;
4609
4610 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4611 if (!ret)
4612 return false;
4613 store_def(ctx, &intr->def, 0, ret);
4614 return true;
4615 }
4616
4617 static bool
emit_vote_eq(struct ntd_context * ctx,nir_intrinsic_instr * intr)4618 emit_vote_eq(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4619 {
4620 ctx->mod.feats.wave_ops = 1;
4621 nir_alu_type alu_type = intr->intrinsic == nir_intrinsic_vote_ieq ? nir_type_int : nir_type_float;
4622 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveAllEqual",
4623 get_overload(alu_type, intr->src[0].ssa->bit_size));
4624 const struct dxil_value *args[] = {
4625 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL),
4626 get_src(ctx, intr->src, 0, alu_type),
4627 };
4628 if (!func || !args[0] || !args[1])
4629 return false;
4630
4631 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4632 if (!ret)
4633 return false;
4634 store_def(ctx, &intr->def, 0, ret);
4635 return true;
4636 }
4637
4638 static bool
emit_vote(struct ntd_context * ctx,nir_intrinsic_instr * intr)4639 emit_vote(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4640 {
4641 ctx->mod.feats.wave_ops = 1;
4642 bool any = intr->intrinsic == nir_intrinsic_vote_any;
4643 const struct dxil_func *func = dxil_get_function(&ctx->mod,
4644 any ? "dx.op.waveAnyTrue" : "dx.op.waveAllTrue",
4645 DXIL_NONE);
4646 const struct dxil_value *args[] = {
4647 dxil_module_get_int32_const(&ctx->mod, any ? DXIL_INTR_WAVE_ANY_TRUE : DXIL_INTR_WAVE_ALL_TRUE),
4648 get_src(ctx, intr->src, 0, nir_type_bool),
4649 };
4650 if (!func || !args[0] || !args[1])
4651 return false;
4652
4653 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4654 if (!ret)
4655 return false;
4656 store_def(ctx, &intr->def, 0, ret);
4657 return true;
4658 }
4659
4660 static bool
emit_ballot(struct ntd_context * ctx,nir_intrinsic_instr * intr)4661 emit_ballot(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4662 {
4663 ctx->mod.feats.wave_ops = 1;
4664 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBallot", DXIL_NONE);
4665 const struct dxil_value *args[] = {
4666 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BALLOT),
4667 get_src(ctx, intr->src, 0, nir_type_bool),
4668 };
4669 if (!func || !args[0] || !args[1])
4670 return false;
4671
4672 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4673 if (!ret)
4674 return false;
4675 for (uint32_t i = 0; i < 4; ++i)
4676 store_def(ctx, &intr->def, i, dxil_emit_extractval(&ctx->mod, ret, i));
4677 return true;
4678 }
4679
4680 static bool
emit_quad_op(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_quad_op_kind op)4681 emit_quad_op(struct ntd_context *ctx, nir_intrinsic_instr *intr, enum dxil_quad_op_kind op)
4682 {
4683 ctx->mod.feats.wave_ops = 1;
4684 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quadOp",
4685 get_overload(nir_type_uint, intr->def.bit_size));
4686 const struct dxil_value *args[] = {
4687 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_QUAD_OP),
4688 get_src(ctx, intr->src, 0, nir_type_uint),
4689 dxil_module_get_int8_const(&ctx->mod, op),
4690 };
4691 if (!func || !args[0] || !args[1] || !args[2])
4692 return false;
4693
4694 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4695 if (!ret)
4696 return false;
4697 store_def(ctx, &intr->def, 0, ret);
4698 return true;
4699 }
4700
4701 static enum dxil_wave_bit_op_kind
get_reduce_bit_op(nir_op op)4702 get_reduce_bit_op(nir_op op)
4703 {
4704 switch (op) {
4705 case nir_op_ior: return DXIL_WAVE_BIT_OP_OR;
4706 case nir_op_ixor: return DXIL_WAVE_BIT_OP_XOR;
4707 case nir_op_iand: return DXIL_WAVE_BIT_OP_AND;
4708 default:
4709 unreachable("Invalid bit op");
4710 }
4711 }
4712
4713 static bool
emit_reduce_bitwise(struct ntd_context * ctx,nir_intrinsic_instr * intr)4714 emit_reduce_bitwise(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4715 {
4716 enum dxil_wave_bit_op_kind wave_bit_op = get_reduce_bit_op(nir_intrinsic_reduction_op(intr));
4717 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBit",
4718 get_overload(nir_type_uint, intr->def.bit_size));
4719 const struct dxil_value *args[] = {
4720 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BIT),
4721 get_src(ctx, intr->src, 0, nir_type_uint),
4722 dxil_module_get_int8_const(&ctx->mod, wave_bit_op),
4723 };
4724 if (!func || !args[0] || !args[1] || !args[2])
4725 return false;
4726
4727 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4728 if (!ret)
4729 return false;
4730 store_def(ctx, &intr->def, 0, ret);
4731 return true;
4732 }
4733
4734 static enum dxil_wave_op_kind
get_reduce_op(nir_op op)4735 get_reduce_op(nir_op op)
4736 {
4737 switch (op) {
4738 case nir_op_iadd:
4739 case nir_op_fadd:
4740 return DXIL_WAVE_OP_SUM;
4741 case nir_op_imul:
4742 case nir_op_fmul:
4743 return DXIL_WAVE_OP_PRODUCT;
4744 case nir_op_imax:
4745 case nir_op_umax:
4746 case nir_op_fmax:
4747 return DXIL_WAVE_OP_MAX;
4748 case nir_op_imin:
4749 case nir_op_umin:
4750 case nir_op_fmin:
4751 return DXIL_WAVE_OP_MIN;
4752 default:
4753 unreachable("Unexpected reduction op");
4754 }
4755 }
4756
4757 static bool
emit_reduce(struct ntd_context * ctx,nir_intrinsic_instr * intr)4758 emit_reduce(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4759 {
4760 ctx->mod.feats.wave_ops = 1;
4761 bool is_prefix = intr->intrinsic == nir_intrinsic_exclusive_scan;
4762 nir_op reduction_op = (nir_op)nir_intrinsic_reduction_op(intr);
4763 switch (reduction_op) {
4764 case nir_op_ior:
4765 case nir_op_ixor:
4766 case nir_op_iand:
4767 assert(!is_prefix);
4768 return emit_reduce_bitwise(ctx, intr);
4769 default:
4770 break;
4771 }
4772 nir_alu_type alu_type = nir_op_infos[reduction_op].input_types[0];
4773 enum dxil_wave_op_kind wave_op = get_reduce_op(reduction_op);
4774 const struct dxil_func *func = dxil_get_function(&ctx->mod, is_prefix ? "dx.op.wavePrefixOp" : "dx.op.waveActiveOp",
4775 get_overload(alu_type, intr->def.bit_size));
4776 bool is_unsigned = alu_type == nir_type_uint;
4777 const struct dxil_value *args[] = {
4778 dxil_module_get_int32_const(&ctx->mod, is_prefix ? DXIL_INTR_WAVE_PREFIX_OP : DXIL_INTR_WAVE_ACTIVE_OP),
4779 get_src(ctx, intr->src, 0, alu_type),
4780 dxil_module_get_int8_const(&ctx->mod, wave_op),
4781 dxil_module_get_int8_const(&ctx->mod, is_unsigned),
4782 };
4783 if (!func || !args[0] || !args[1] || !args[2] || !args[3])
4784 return false;
4785
4786 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4787 if (!ret)
4788 return false;
4789 store_def(ctx, &intr->def, 0, ret);
4790 return true;
4791 }
4792
4793 static bool
emit_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4794 emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4795 {
4796 switch (intr->intrinsic) {
4797 case nir_intrinsic_load_global_invocation_id:
4798 return emit_load_global_invocation_id(ctx, intr);
4799 case nir_intrinsic_load_local_invocation_id:
4800 return emit_load_local_invocation_id(ctx, intr);
4801 case nir_intrinsic_load_local_invocation_index:
4802 return emit_load_local_invocation_index(ctx, intr);
4803 case nir_intrinsic_load_workgroup_id:
4804 return emit_load_local_workgroup_id(ctx, intr);
4805 case nir_intrinsic_load_ssbo:
4806 return emit_load_ssbo(ctx, intr);
4807 case nir_intrinsic_store_ssbo:
4808 return emit_store_ssbo(ctx, intr);
4809 case nir_intrinsic_load_deref:
4810 return emit_load_deref(ctx, intr);
4811 case nir_intrinsic_store_deref:
4812 return emit_store_deref(ctx, intr);
4813 case nir_intrinsic_deref_atomic:
4814 return emit_atomic_deref(ctx, intr);
4815 case nir_intrinsic_deref_atomic_swap:
4816 return emit_atomic_deref_swap(ctx, intr);
4817 case nir_intrinsic_load_ubo_vec4:
4818 return emit_load_ubo_vec4(ctx, intr);
4819 case nir_intrinsic_load_primitive_id:
4820 return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID",
4821 DXIL_INTR_PRIMITIVE_ID, nir_type_int);
4822 case nir_intrinsic_load_sample_id:
4823 case nir_intrinsic_load_sample_id_no_per_sample:
4824 return emit_load_sample_id(ctx, intr);
4825 case nir_intrinsic_load_invocation_id:
4826 switch (ctx->mod.shader_kind) {
4827 case DXIL_HULL_SHADER:
4828 return emit_load_unary_external_function(ctx, intr, "dx.op.outputControlPointID",
4829 DXIL_INTR_OUTPUT_CONTROL_POINT_ID, nir_type_int);
4830 case DXIL_GEOMETRY_SHADER:
4831 return emit_load_unary_external_function(ctx, intr, "dx.op.gsInstanceID",
4832 DXIL_INTR_GS_INSTANCE_ID, nir_type_int);
4833 default:
4834 unreachable("Unexpected shader kind for invocation ID");
4835 }
4836 case nir_intrinsic_load_view_index:
4837 ctx->mod.feats.view_id = true;
4838 return emit_load_unary_external_function(ctx, intr, "dx.op.viewID",
4839 DXIL_INTR_VIEW_ID, nir_type_int);
4840 case nir_intrinsic_load_sample_mask_in:
4841 return emit_load_sample_mask_in(ctx, intr);
4842 case nir_intrinsic_load_tess_coord:
4843 return emit_load_tess_coord(ctx, intr);
4844 case nir_intrinsic_terminate_if:
4845 case nir_intrinsic_demote_if:
4846 return emit_discard_if(ctx, intr);
4847 case nir_intrinsic_terminate:
4848 case nir_intrinsic_demote:
4849 return emit_discard(ctx);
4850 case nir_intrinsic_emit_vertex:
4851 return emit_emit_vertex(ctx, intr);
4852 case nir_intrinsic_end_primitive:
4853 return emit_end_primitive(ctx, intr);
4854 case nir_intrinsic_barrier:
4855 return emit_barrier(ctx, intr);
4856 case nir_intrinsic_ssbo_atomic:
4857 return emit_ssbo_atomic(ctx, intr);
4858 case nir_intrinsic_ssbo_atomic_swap:
4859 return emit_ssbo_atomic_comp_swap(ctx, intr);
4860 case nir_intrinsic_image_deref_atomic:
4861 case nir_intrinsic_image_atomic:
4862 case nir_intrinsic_bindless_image_atomic:
4863 return emit_image_atomic(ctx, intr);
4864 case nir_intrinsic_image_deref_atomic_swap:
4865 case nir_intrinsic_image_atomic_swap:
4866 case nir_intrinsic_bindless_image_atomic_swap:
4867 return emit_image_atomic_comp_swap(ctx, intr);
4868 case nir_intrinsic_image_store:
4869 case nir_intrinsic_image_deref_store:
4870 case nir_intrinsic_bindless_image_store:
4871 return emit_image_store(ctx, intr);
4872 case nir_intrinsic_image_load:
4873 case nir_intrinsic_image_deref_load:
4874 case nir_intrinsic_bindless_image_load:
4875 return emit_image_load(ctx, intr);
4876 case nir_intrinsic_image_size:
4877 case nir_intrinsic_image_deref_size:
4878 case nir_intrinsic_bindless_image_size:
4879 return emit_image_size(ctx, intr);
4880 case nir_intrinsic_get_ssbo_size:
4881 return emit_get_ssbo_size(ctx, intr);
4882 case nir_intrinsic_load_input:
4883 case nir_intrinsic_load_per_vertex_input:
4884 case nir_intrinsic_load_output:
4885 case nir_intrinsic_load_per_vertex_output:
4886 return emit_load_input_via_intrinsic(ctx, intr);
4887 case nir_intrinsic_store_output:
4888 case nir_intrinsic_store_per_vertex_output:
4889 return emit_store_output_via_intrinsic(ctx, intr);
4890
4891 case nir_intrinsic_load_barycentric_at_offset:
4892 case nir_intrinsic_load_barycentric_at_sample:
4893 case nir_intrinsic_load_barycentric_centroid:
4894 case nir_intrinsic_load_barycentric_pixel:
4895 /* Emit nothing, we only support these as inputs to load_interpolated_input */
4896 return true;
4897 case nir_intrinsic_load_interpolated_input:
4898 return emit_load_interpolated_input(ctx, intr);
4899 break;
4900
4901 case nir_intrinsic_vulkan_resource_index:
4902 return emit_vulkan_resource_index(ctx, intr);
4903 case nir_intrinsic_load_vulkan_descriptor:
4904 return emit_load_vulkan_descriptor(ctx, intr);
4905
4906 case nir_intrinsic_load_sample_pos_from_id:
4907 return emit_load_sample_pos_from_id(ctx, intr);
4908
4909 case nir_intrinsic_is_helper_invocation:
4910 return emit_load_unary_external_function(
4911 ctx, intr, "dx.op.isHelperLane", DXIL_INTR_IS_HELPER_LANE, nir_type_int);
4912 case nir_intrinsic_elect:
4913 ctx->mod.feats.wave_ops = 1;
4914 return emit_load_unary_external_function(
4915 ctx, intr, "dx.op.waveIsFirstLane", DXIL_INTR_WAVE_IS_FIRST_LANE, nir_type_invalid);
4916 case nir_intrinsic_load_subgroup_size:
4917 ctx->mod.feats.wave_ops = 1;
4918 return emit_load_unary_external_function(
4919 ctx, intr, "dx.op.waveGetLaneCount", DXIL_INTR_WAVE_GET_LANE_COUNT, nir_type_invalid);
4920 case nir_intrinsic_load_subgroup_invocation:
4921 ctx->mod.feats.wave_ops = 1;
4922 return emit_load_unary_external_function(
4923 ctx, intr, "dx.op.waveGetLaneIndex", DXIL_INTR_WAVE_GET_LANE_INDEX, nir_type_invalid);
4924
4925 case nir_intrinsic_vote_feq:
4926 case nir_intrinsic_vote_ieq:
4927 return emit_vote_eq(ctx, intr);
4928 case nir_intrinsic_vote_any:
4929 case nir_intrinsic_vote_all:
4930 return emit_vote(ctx, intr);
4931
4932 case nir_intrinsic_ballot:
4933 return emit_ballot(ctx, intr);
4934
4935 case nir_intrinsic_read_first_invocation:
4936 return emit_read_first_invocation(ctx, intr);
4937 case nir_intrinsic_read_invocation:
4938 case nir_intrinsic_shuffle:
4939 case nir_intrinsic_quad_broadcast:
4940 return emit_read_invocation(ctx, intr);
4941
4942 case nir_intrinsic_quad_swap_horizontal:
4943 return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_X);
4944 case nir_intrinsic_quad_swap_vertical:
4945 return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_Y);
4946 case nir_intrinsic_quad_swap_diagonal:
4947 return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_DIAGONAL);
4948
4949 case nir_intrinsic_reduce:
4950 case nir_intrinsic_exclusive_scan:
4951 return emit_reduce(ctx, intr);
4952
4953 case nir_intrinsic_ddx:
4954 case nir_intrinsic_ddx_coarse: return emit_derivative(ctx, intr, DXIL_INTR_DDX_COARSE);
4955 case nir_intrinsic_ddx_fine: return emit_derivative(ctx, intr, DXIL_INTR_DDX_FINE);
4956 case nir_intrinsic_ddy:
4957 case nir_intrinsic_ddy_coarse: return emit_derivative(ctx, intr, DXIL_INTR_DDY_COARSE);
4958 case nir_intrinsic_ddy_fine: return emit_derivative(ctx, intr, DXIL_INTR_DDY_FINE);
4959
4960 case nir_intrinsic_load_first_vertex:
4961 ctx->mod.feats.extended_command_info = true;
4962 return emit_load_unary_external_function(ctx, intr, "dx.op.startVertexLocation",
4963 DXIL_INTR_START_VERTEX_LOCATION, nir_type_int);
4964 case nir_intrinsic_load_base_instance:
4965 ctx->mod.feats.extended_command_info = true;
4966 return emit_load_unary_external_function(ctx, intr, "dx.op.startInstanceLocation",
4967 DXIL_INTR_START_INSTANCE_LOCATION, nir_type_int);
4968
4969 case nir_intrinsic_load_num_workgroups:
4970 case nir_intrinsic_load_workgroup_size:
4971 default:
4972 log_nir_instr_unsupported(
4973 ctx->logger, "Unimplemented intrinsic instruction", &intr->instr);
4974 return false;
4975 }
4976 }
4977
4978 static const struct dxil_type *
dxil_type_for_const(struct ntd_context * ctx,nir_def * def)4979 dxil_type_for_const(struct ntd_context *ctx, nir_def *def)
4980 {
4981 if (BITSET_TEST(ctx->int_types, def->index) ||
4982 !BITSET_TEST(ctx->float_types, def->index))
4983 return dxil_module_get_int_type(&ctx->mod, def->bit_size);
4984 return dxil_module_get_float_type(&ctx->mod, def->bit_size);
4985 }
4986
4987 static bool
emit_load_const(struct ntd_context * ctx,nir_load_const_instr * load_const)4988 emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const)
4989 {
4990 for (uint32_t i = 0; i < load_const->def.num_components; ++i) {
4991 const struct dxil_type *type = dxil_type_for_const(ctx, &load_const->def);
4992 store_ssa_def(ctx, &load_const->def, i, get_value_for_const(&ctx->mod, &load_const->value[i], type));
4993 }
4994 return true;
4995 }
4996
4997 static bool
emit_deref(struct ntd_context * ctx,nir_deref_instr * instr)4998 emit_deref(struct ntd_context* ctx, nir_deref_instr* instr)
4999 {
5000 /* There's two possible reasons we might be walking through derefs:
5001 * 1. Computing an index to be used for a texture/sampler/image binding, which
5002 * can only do array indexing and should compute the indices along the way with
5003 * array-of-array sizes.
5004 * 2. Storing an index to be used in a GEP for access to a variable.
5005 */
5006 nir_variable *var = nir_deref_instr_get_variable(instr);
5007 assert(var);
5008
5009 bool is_aoa_size =
5010 glsl_type_is_sampler(glsl_without_array(var->type)) ||
5011 glsl_type_is_image(glsl_without_array(var->type)) ||
5012 glsl_type_is_texture(glsl_without_array(var->type));
5013
5014 if (!is_aoa_size) {
5015 /* Just store the values, we'll use these to build a GEP in the load or store */
5016 switch (instr->deref_type) {
5017 case nir_deref_type_var:
5018 store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, 0, instr->def.bit_size));
5019 return true;
5020 case nir_deref_type_array:
5021 store_def(ctx, &instr->def, 0, get_src(ctx, &instr->arr.index, 0, nir_type_int));
5022 return true;
5023 case nir_deref_type_struct:
5024 store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, instr->strct.index, 32));
5025 return true;
5026 default:
5027 unreachable("Other deref types not supported");
5028 }
5029 }
5030
5031 /* In the CL environment, there's nothing to emit. Any references to
5032 * derefs will emit the necessary logic to handle scratch/shared GEP addressing
5033 */
5034 if (ctx->opts->environment == DXIL_ENVIRONMENT_CL)
5035 return true;
5036
5037 const struct glsl_type *type = instr->type;
5038 const struct dxil_value *binding;
5039 unsigned binding_val = ctx->opts->environment == DXIL_ENVIRONMENT_GL ?
5040 var->data.driver_location : var->data.binding;
5041
5042 if (instr->deref_type == nir_deref_type_var) {
5043 binding = dxil_module_get_int32_const(&ctx->mod, binding_val);
5044 } else {
5045 const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32);
5046 const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32);
5047 if (!base || !offset)
5048 return false;
5049
5050 if (glsl_type_is_array(instr->type)) {
5051 offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_MUL, offset,
5052 dxil_module_get_int32_const(&ctx->mod, glsl_get_aoa_size(instr->type)), 0);
5053 if (!offset)
5054 return false;
5055 }
5056 binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0);
5057 }
5058
5059 if (!binding)
5060 return false;
5061
5062 /* Haven't finished chasing the deref chain yet, just store the value */
5063 if (glsl_type_is_array(type)) {
5064 store_def(ctx, &instr->def, 0, binding);
5065 return true;
5066 }
5067
5068 assert(glsl_type_is_sampler(type) || glsl_type_is_image(type) || glsl_type_is_texture(type));
5069 enum dxil_resource_class res_class;
5070 if (glsl_type_is_image(type))
5071 res_class = DXIL_RESOURCE_CLASS_UAV;
5072 else if (glsl_type_is_sampler(type))
5073 res_class = DXIL_RESOURCE_CLASS_SAMPLER;
5074 else
5075 res_class = DXIL_RESOURCE_CLASS_SRV;
5076
5077 unsigned descriptor_set = ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN ?
5078 var->data.descriptor_set : (glsl_type_is_image(type) ? 1 : 0);
5079 const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, res_class,
5080 descriptor_set, binding_val, binding, false);
5081 if (!handle)
5082 return false;
5083
5084 store_ssa_def(ctx, &instr->def, 0, handle);
5085 return true;
5086 }
5087
5088 static bool
emit_cond_branch(struct ntd_context * ctx,const struct dxil_value * cond,int true_block,int false_block)5089 emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond,
5090 int true_block, int false_block)
5091 {
5092 assert(cond);
5093 assert(true_block >= 0);
5094 assert(false_block >= 0);
5095 return dxil_emit_branch(&ctx->mod, cond, true_block, false_block);
5096 }
5097
5098 static bool
emit_branch(struct ntd_context * ctx,int block)5099 emit_branch(struct ntd_context *ctx, int block)
5100 {
5101 assert(block >= 0);
5102 return dxil_emit_branch(&ctx->mod, NULL, block, -1);
5103 }
5104
5105 static bool
emit_jump(struct ntd_context * ctx,nir_jump_instr * instr)5106 emit_jump(struct ntd_context *ctx, nir_jump_instr *instr)
5107 {
5108 switch (instr->type) {
5109 case nir_jump_break:
5110 case nir_jump_continue:
5111 assert(instr->instr.block->successors[0]);
5112 assert(!instr->instr.block->successors[1]);
5113 return emit_branch(ctx, instr->instr.block->successors[0]->index);
5114
5115 default:
5116 unreachable("Unsupported jump type\n");
5117 }
5118 }
5119
5120 struct phi_block {
5121 unsigned num_components;
5122 struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS];
5123 };
5124
5125 static bool
emit_phi(struct ntd_context * ctx,nir_phi_instr * instr)5126 emit_phi(struct ntd_context *ctx, nir_phi_instr *instr)
5127 {
5128 const struct dxil_type *type = NULL;
5129 nir_foreach_phi_src(src, instr) {
5130 /* All sources have the same type, just use the first one */
5131 type = dxil_value_get_type(ctx->defs[src->src.ssa->index].chans[0]);
5132 break;
5133 }
5134
5135 struct phi_block *vphi = ralloc(ctx->phis, struct phi_block);
5136 vphi->num_components = instr->def.num_components;
5137
5138 for (unsigned i = 0; i < vphi->num_components; ++i) {
5139 struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type);
5140 if (!phi)
5141 return false;
5142 store_ssa_def(ctx, &instr->def, i, dxil_instr_get_return_value(phi));
5143 }
5144 _mesa_hash_table_insert(ctx->phis, instr, vphi);
5145 return true;
5146 }
5147
5148 static bool
fixup_phi(struct ntd_context * ctx,nir_phi_instr * instr,struct phi_block * vphi)5149 fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr,
5150 struct phi_block *vphi)
5151 {
5152 const struct dxil_value *values[16];
5153 unsigned blocks[16];
5154 for (unsigned i = 0; i < vphi->num_components; ++i) {
5155 size_t num_incoming = 0;
5156 nir_foreach_phi_src(src, instr) {
5157 const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i);
5158 values[num_incoming] = val;
5159 blocks[num_incoming] = src->pred->index;
5160 ++num_incoming;
5161 if (num_incoming == ARRAY_SIZE(values)) {
5162 if (!dxil_phi_add_incoming(vphi->comp[i], values, blocks,
5163 num_incoming))
5164 return false;
5165 num_incoming = 0;
5166 }
5167 }
5168 if (num_incoming > 0 && !dxil_phi_add_incoming(vphi->comp[i], values,
5169 blocks, num_incoming))
5170 return false;
5171 }
5172 return true;
5173 }
5174
5175 static unsigned
get_n_src(struct ntd_context * ctx,const struct dxil_value ** values,unsigned max_components,nir_tex_src * src,nir_alu_type type)5176 get_n_src(struct ntd_context *ctx, const struct dxil_value **values,
5177 unsigned max_components, nir_tex_src *src, nir_alu_type type)
5178 {
5179 unsigned num_components = nir_src_num_components(src->src);
5180 unsigned i = 0;
5181
5182 assert(num_components <= max_components);
5183
5184 for (i = 0; i < num_components; ++i) {
5185 values[i] = get_src(ctx, &src->src, i, type);
5186 if (!values[i])
5187 return 0;
5188 }
5189
5190 return num_components;
5191 }
5192
5193 #define PAD_SRC(ctx, array, components, undef) \
5194 for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \
5195 array[i] = undef; \
5196 }
5197
5198 static const struct dxil_value *
emit_sample(struct ntd_context * ctx,struct texop_parameters * params)5199 emit_sample(struct ntd_context *ctx, struct texop_parameters *params)
5200 {
5201 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload);
5202 if (!func)
5203 return NULL;
5204
5205 const struct dxil_value *args[11] = {
5206 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE),
5207 params->tex, params->sampler,
5208 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5209 params->offset[0], params->offset[1], params->offset[2],
5210 params->min_lod
5211 };
5212
5213 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5214 }
5215
5216 static const struct dxil_value *
emit_sample_bias(struct ntd_context * ctx,struct texop_parameters * params)5217 emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params)
5218 {
5219 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload);
5220 if (!func)
5221 return NULL;
5222
5223 assert(params->bias != NULL);
5224
5225 const struct dxil_value *args[12] = {
5226 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS),
5227 params->tex, params->sampler,
5228 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5229 params->offset[0], params->offset[1], params->offset[2],
5230 params->bias, params->min_lod
5231 };
5232
5233 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5234 }
5235
5236 static const struct dxil_value *
emit_sample_level(struct ntd_context * ctx,struct texop_parameters * params)5237 emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params)
5238 {
5239 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload);
5240 if (!func)
5241 return NULL;
5242
5243 assert(params->lod_or_sample != NULL);
5244
5245 const struct dxil_value *args[11] = {
5246 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL),
5247 params->tex, params->sampler,
5248 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5249 params->offset[0], params->offset[1], params->offset[2],
5250 params->lod_or_sample
5251 };
5252
5253 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5254 }
5255
5256 static const struct dxil_value *
emit_sample_cmp(struct ntd_context * ctx,struct texop_parameters * params)5257 emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params)
5258 {
5259 const struct dxil_func *func;
5260 enum dxil_intr opcode;
5261
5262 func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32);
5263 opcode = DXIL_INTR_SAMPLE_CMP;
5264
5265 if (!func)
5266 return NULL;
5267
5268 const struct dxil_value *args[12] = {
5269 dxil_module_get_int32_const(&ctx->mod, opcode),
5270 params->tex, params->sampler,
5271 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5272 params->offset[0], params->offset[1], params->offset[2],
5273 params->cmp, params->min_lod
5274 };
5275
5276 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5277 }
5278
5279 static const struct dxil_value *
emit_sample_cmp_level_zero(struct ntd_context * ctx,struct texop_parameters * params)5280 emit_sample_cmp_level_zero(struct ntd_context *ctx, struct texop_parameters *params)
5281 {
5282 const struct dxil_func *func;
5283 enum dxil_intr opcode;
5284
5285 func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32);
5286 opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO;
5287
5288 if (!func)
5289 return NULL;
5290
5291 const struct dxil_value *args[11] = {
5292 dxil_module_get_int32_const(&ctx->mod, opcode),
5293 params->tex, params->sampler,
5294 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5295 params->offset[0], params->offset[1], params->offset[2],
5296 params->cmp
5297 };
5298
5299 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5300 }
5301
5302 static const struct dxil_value *
emit_sample_cmp_level(struct ntd_context * ctx,struct texop_parameters * params)5303 emit_sample_cmp_level(struct ntd_context *ctx, struct texop_parameters *params)
5304 {
5305 ctx->mod.feats.advanced_texture_ops = true;
5306 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevel", params->overload);
5307 if (!func)
5308 return NULL;
5309
5310 assert(params->lod_or_sample != NULL);
5311
5312 const struct dxil_value *args[12] = {
5313 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_LEVEL),
5314 params->tex, params->sampler,
5315 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5316 params->offset[0], params->offset[1], params->offset[2],
5317 params->cmp, params->lod_or_sample
5318 };
5319
5320 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5321 }
5322
5323 static const struct dxil_value *
emit_sample_cmp_bias(struct ntd_context * ctx,struct texop_parameters * params)5324 emit_sample_cmp_bias(struct ntd_context *ctx, struct texop_parameters *params)
5325 {
5326 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpBias", params->overload);
5327 if (!func)
5328 return NULL;
5329
5330 assert(params->bias != NULL);
5331 ctx->mod.feats.sample_cmp_bias_gradient = 1;
5332
5333 const struct dxil_value *args[13] = {
5334 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_BIAS),
5335 params->tex, params->sampler,
5336 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5337 params->offset[0], params->offset[1], params->offset[2],
5338 params->cmp, params->bias, params->min_lod
5339 };
5340
5341 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5342 }
5343
5344 static const struct dxil_value *
emit_sample_grad(struct ntd_context * ctx,struct texop_parameters * params)5345 emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params)
5346 {
5347 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload);
5348 if (!func)
5349 return false;
5350
5351 const struct dxil_value *args[17] = {
5352 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD),
5353 params->tex, params->sampler,
5354 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5355 params->offset[0], params->offset[1], params->offset[2],
5356 params->dx[0], params->dx[1], params->dx[2],
5357 params->dy[0], params->dy[1], params->dy[2],
5358 params->min_lod
5359 };
5360
5361 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5362 }
5363
5364 static const struct dxil_value *
emit_sample_cmp_grad(struct ntd_context * ctx,struct texop_parameters * params)5365 emit_sample_cmp_grad(struct ntd_context *ctx, struct texop_parameters *params)
5366 {
5367 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpGrad", params->overload);
5368 if (!func)
5369 return false;
5370
5371 ctx->mod.feats.sample_cmp_bias_gradient = 1;
5372
5373 const struct dxil_value *args[18] = {
5374 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_GRAD),
5375 params->tex, params->sampler,
5376 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5377 params->offset[0], params->offset[1], params->offset[2],
5378 params->cmp,
5379 params->dx[0], params->dx[1], params->dx[2],
5380 params->dy[0], params->dy[1], params->dy[2],
5381 params->min_lod
5382 };
5383
5384 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5385 }
5386
5387 static const struct dxil_value *
emit_texel_fetch(struct ntd_context * ctx,struct texop_parameters * params)5388 emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params)
5389 {
5390 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload);
5391 if (!func)
5392 return false;
5393
5394 if (!params->lod_or_sample)
5395 params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32));
5396
5397 const struct dxil_value *args[] = {
5398 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD),
5399 params->tex,
5400 params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2],
5401 params->offset[0], params->offset[1], params->offset[2]
5402 };
5403
5404 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5405 }
5406
5407 static const struct dxil_value *
emit_texture_lod(struct ntd_context * ctx,struct texop_parameters * params,bool clamped)5408 emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params, bool clamped)
5409 {
5410 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32);
5411 if (!func)
5412 return false;
5413
5414 const struct dxil_value *args[] = {
5415 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD),
5416 params->tex,
5417 params->sampler,
5418 params->coord[0],
5419 params->coord[1],
5420 params->coord[2],
5421 dxil_module_get_int1_const(&ctx->mod, clamped ? 1 : 0)
5422 };
5423
5424 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5425 }
5426
5427 static const struct dxil_value *
emit_texture_gather(struct ntd_context * ctx,struct texop_parameters * params,unsigned component)5428 emit_texture_gather(struct ntd_context *ctx, struct texop_parameters *params, unsigned component)
5429 {
5430 const struct dxil_func *func = dxil_get_function(&ctx->mod,
5431 params->cmp ? "dx.op.textureGatherCmp" : "dx.op.textureGather", params->overload);
5432 if (!func)
5433 return false;
5434
5435 const struct dxil_value *args[] = {
5436 dxil_module_get_int32_const(&ctx->mod, params->cmp ?
5437 DXIL_INTR_TEXTURE_GATHER_CMP : DXIL_INTR_TEXTURE_GATHER),
5438 params->tex,
5439 params->sampler,
5440 params->coord[0],
5441 params->coord[1],
5442 params->coord[2],
5443 params->coord[3],
5444 params->offset[0],
5445 params->offset[1],
5446 dxil_module_get_int32_const(&ctx->mod, component),
5447 params->cmp
5448 };
5449
5450 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args) - (params->cmp ? 0 : 1));
5451 }
5452
5453 static bool
emit_tex(struct ntd_context * ctx,nir_tex_instr * instr)5454 emit_tex(struct ntd_context *ctx, nir_tex_instr *instr)
5455 {
5456 struct texop_parameters params;
5457 memset(¶ms, 0, sizeof(struct texop_parameters));
5458 if (ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN) {
5459 params.tex = ctx->srv_handles[instr->texture_index];
5460 params.sampler = ctx->sampler_handles[instr->sampler_index];
5461 }
5462
5463 const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
5464 const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32);
5465 const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
5466 const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type);
5467
5468 unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0;
5469 params.overload = get_overload(instr->dest_type, 32);
5470
5471 bool lod_is_zero = false;
5472 for (unsigned i = 0; i < instr->num_srcs; i++) {
5473 nir_alu_type type = nir_tex_instr_src_type(instr, i);
5474
5475 switch (instr->src[i].src_type) {
5476 case nir_tex_src_coord:
5477 coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord),
5478 &instr->src[i], type);
5479 if (!coord_components)
5480 return false;
5481 break;
5482
5483 case nir_tex_src_offset:
5484 offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset),
5485 &instr->src[i], nir_type_int);
5486 if (!offset_components)
5487 return false;
5488
5489 /* Dynamic offsets were only allowed with gather, until "advanced texture ops" in SM7 */
5490 if (!nir_src_is_const(instr->src[i].src) && instr->op != nir_texop_tg4)
5491 ctx->mod.feats.advanced_texture_ops = true;
5492 break;
5493
5494 case nir_tex_src_bias:
5495 assert(instr->op == nir_texop_txb);
5496 assert(nir_src_num_components(instr->src[i].src) == 1);
5497 params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
5498 if (!params.bias)
5499 return false;
5500 break;
5501
5502 case nir_tex_src_lod:
5503 assert(nir_src_num_components(instr->src[i].src) == 1);
5504 if (instr->op == nir_texop_txf_ms) {
5505 assert(nir_src_as_int(instr->src[i].src) == 0);
5506 break;
5507 }
5508
5509 /* Buffers don't have a LOD */
5510 if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
5511 params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type);
5512 else
5513 params.lod_or_sample = int_undef;
5514 if (!params.lod_or_sample)
5515 return false;
5516
5517 if (nir_src_is_const(instr->src[i].src) && nir_src_as_float(instr->src[i].src) == 0.0f)
5518 lod_is_zero = true;
5519 break;
5520
5521 case nir_tex_src_min_lod:
5522 assert(nir_src_num_components(instr->src[i].src) == 1);
5523 params.min_lod = get_src(ctx, &instr->src[i].src, 0, type);
5524 if (!params.min_lod)
5525 return false;
5526 break;
5527
5528 case nir_tex_src_comparator:
5529 assert(nir_src_num_components(instr->src[i].src) == 1);
5530 params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
5531 if (!params.cmp)
5532 return false;
5533 break;
5534
5535 case nir_tex_src_ddx:
5536 dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx),
5537 &instr->src[i], nir_type_float);
5538 if (!dx_components)
5539 return false;
5540 break;
5541
5542 case nir_tex_src_ddy:
5543 dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy),
5544 &instr->src[i], nir_type_float);
5545 if (!dy_components)
5546 return false;
5547 break;
5548
5549 case nir_tex_src_ms_index:
5550 params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int);
5551 if (!params.lod_or_sample)
5552 return false;
5553 break;
5554
5555 case nir_tex_src_texture_deref:
5556 assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
5557 params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
5558 break;
5559
5560 case nir_tex_src_sampler_deref:
5561 assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
5562 params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
5563 break;
5564
5565 case nir_tex_src_texture_offset:
5566 params.tex = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SRV,
5567 0, instr->texture_index,
5568 dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
5569 get_src(ctx, &instr->src[i].src, 0, nir_type_uint),
5570 dxil_module_get_int32_const(&ctx->mod, instr->texture_index), 0),
5571 instr->texture_non_uniform);
5572 break;
5573
5574 case nir_tex_src_sampler_offset:
5575 if (nir_tex_instr_need_sampler(instr)) {
5576 params.sampler = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SAMPLER,
5577 0, instr->sampler_index,
5578 dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
5579 get_src(ctx, &instr->src[i].src, 0, nir_type_uint),
5580 dxil_module_get_int32_const(&ctx->mod, instr->sampler_index), 0),
5581 instr->sampler_non_uniform);
5582 }
5583 break;
5584
5585 case nir_tex_src_texture_handle:
5586 params.tex = create_srv_handle(ctx, instr, &instr->src[i].src);
5587 break;
5588
5589 case nir_tex_src_sampler_handle:
5590 if (nir_tex_instr_need_sampler(instr))
5591 params.sampler = create_sampler_handle(ctx, instr->is_shadow, &instr->src[i].src);
5592 break;
5593
5594 case nir_tex_src_projector:
5595 unreachable("Texture projector should have been lowered");
5596
5597 default:
5598 fprintf(stderr, "texture source: %d\n", instr->src[i].src_type);
5599 unreachable("unknown texture source");
5600 }
5601 }
5602
5603 assert(params.tex != NULL);
5604 assert(instr->op == nir_texop_txf ||
5605 instr->op == nir_texop_txf_ms ||
5606 nir_tex_instr_is_query(instr) ||
5607 params.sampler != NULL);
5608
5609 PAD_SRC(ctx, params.coord, coord_components, float_undef);
5610 PAD_SRC(ctx, params.offset, offset_components, int_undef);
5611 if (!params.min_lod) params.min_lod = float_undef;
5612
5613 const struct dxil_value *sample = NULL;
5614 switch (instr->op) {
5615 case nir_texop_txb:
5616 if (params.cmp != NULL && ctx->mod.minor_version >= 8)
5617 sample = emit_sample_cmp_bias(ctx, ¶ms);
5618 else
5619 sample = emit_sample_bias(ctx, ¶ms);
5620 break;
5621
5622 case nir_texop_tex:
5623 if (params.cmp != NULL) {
5624 sample = emit_sample_cmp(ctx, ¶ms);
5625 break;
5626 } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {
5627 sample = emit_sample(ctx, ¶ms);
5628 break;
5629 }
5630 params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0);
5631 lod_is_zero = true;
5632 FALLTHROUGH;
5633 case nir_texop_txl:
5634 if (lod_is_zero && params.cmp != NULL && ctx->mod.minor_version < 7) {
5635 /* Prior to SM 6.7, if the level is constant 0.0, ignore the LOD argument,
5636 * so level-less DXIL instructions are used. This is needed to avoid emitting
5637 * dx.op.sampleCmpLevel, which would not be available.
5638 */
5639 sample = emit_sample_cmp_level_zero(ctx, ¶ms);
5640 } else {
5641 if (params.cmp != NULL)
5642 sample = emit_sample_cmp_level(ctx, ¶ms);
5643 else
5644 sample = emit_sample_level(ctx, ¶ms);
5645 }
5646 break;
5647
5648 case nir_texop_txd:
5649 PAD_SRC(ctx, params.dx, dx_components, float_undef);
5650 PAD_SRC(ctx, params.dy, dy_components,float_undef);
5651 if (params.cmp != NULL && ctx->mod.minor_version >= 8)
5652 sample = emit_sample_cmp_grad(ctx, ¶ms);
5653 else
5654 sample = emit_sample_grad(ctx, ¶ms);
5655 break;
5656
5657 case nir_texop_txf:
5658 case nir_texop_txf_ms:
5659 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
5660 params.coord[1] = int_undef;
5661 sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload);
5662 } else {
5663 PAD_SRC(ctx, params.coord, coord_components, int_undef);
5664 sample = emit_texel_fetch(ctx, ¶ms);
5665 }
5666 break;
5667
5668 case nir_texop_txs:
5669 sample = emit_texture_size(ctx, ¶ms);
5670 break;
5671
5672 case nir_texop_tg4:
5673 sample = emit_texture_gather(ctx, ¶ms, instr->component);
5674 break;
5675
5676 case nir_texop_lod:
5677 sample = emit_texture_lod(ctx, ¶ms, true);
5678 store_def(ctx, &instr->def, 0, sample);
5679 sample = emit_texture_lod(ctx, ¶ms, false);
5680 store_def(ctx, &instr->def, 1, sample);
5681 return true;
5682
5683 case nir_texop_query_levels: {
5684 params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32);
5685 sample = emit_texture_size(ctx, ¶ms);
5686 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5687 store_def(ctx, &instr->def, 0, retval);
5688 return true;
5689 }
5690
5691 case nir_texop_texture_samples: {
5692 params.lod_or_sample = int_undef;
5693 sample = emit_texture_size(ctx, ¶ms);
5694 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5695 store_def(ctx, &instr->def, 0, retval);
5696 return true;
5697 }
5698
5699 default:
5700 fprintf(stderr, "texture op: %d\n", instr->op);
5701 unreachable("unknown texture op");
5702 }
5703
5704 if (!sample)
5705 return false;
5706
5707 for (unsigned i = 0; i < instr->def.num_components; ++i) {
5708 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i);
5709 store_def(ctx, &instr->def, i, retval);
5710 }
5711
5712 return true;
5713 }
5714
5715 static bool
emit_undefined(struct ntd_context * ctx,nir_undef_instr * undef)5716 emit_undefined(struct ntd_context *ctx, nir_undef_instr *undef)
5717 {
5718 for (unsigned i = 0; i < undef->def.num_components; ++i)
5719 store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0));
5720 return true;
5721 }
5722
emit_instr(struct ntd_context * ctx,struct nir_instr * instr)5723 static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr)
5724 {
5725 switch (instr->type) {
5726 case nir_instr_type_alu:
5727 return emit_alu(ctx, nir_instr_as_alu(instr));
5728 case nir_instr_type_intrinsic:
5729 return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
5730 case nir_instr_type_load_const:
5731 return emit_load_const(ctx, nir_instr_as_load_const(instr));
5732 case nir_instr_type_deref:
5733 return emit_deref(ctx, nir_instr_as_deref(instr));
5734 case nir_instr_type_jump:
5735 return emit_jump(ctx, nir_instr_as_jump(instr));
5736 case nir_instr_type_phi:
5737 return emit_phi(ctx, nir_instr_as_phi(instr));
5738 case nir_instr_type_tex:
5739 return emit_tex(ctx, nir_instr_as_tex(instr));
5740 case nir_instr_type_undef:
5741 return emit_undefined(ctx, nir_instr_as_undef(instr));
5742 default:
5743 log_nir_instr_unsupported(ctx->logger, "Unimplemented instruction type",
5744 instr);
5745 return false;
5746 }
5747 }
5748
5749
5750 static bool
emit_block(struct ntd_context * ctx,struct nir_block * block)5751 emit_block(struct ntd_context *ctx, struct nir_block *block)
5752 {
5753 assert(block->index < ctx->mod.cur_emitting_func->num_basic_block_ids);
5754 ctx->mod.cur_emitting_func->basic_block_ids[block->index] = ctx->mod.cur_emitting_func->curr_block;
5755
5756 nir_foreach_instr(instr, block) {
5757 TRACE_CONVERSION(instr);
5758
5759 if (!emit_instr(ctx, instr)) {
5760 return false;
5761 }
5762 }
5763 return true;
5764 }
5765
5766 static bool
5767 emit_cf_list(struct ntd_context *ctx, struct exec_list *list);
5768
5769 static bool
emit_if(struct ntd_context * ctx,struct nir_if * if_stmt)5770 emit_if(struct ntd_context *ctx, struct nir_if *if_stmt)
5771 {
5772 assert(nir_src_num_components(if_stmt->condition) == 1);
5773 const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0,
5774 nir_type_bool);
5775 if (!cond)
5776 return false;
5777
5778 /* prepare blocks */
5779 nir_block *then_block = nir_if_first_then_block(if_stmt);
5780 assert(nir_if_last_then_block(if_stmt)->successors[0]);
5781 assert(!nir_if_last_then_block(if_stmt)->successors[1]);
5782 int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index;
5783
5784 nir_block *else_block = NULL;
5785 int else_succ = -1;
5786 if (!exec_list_is_empty(&if_stmt->else_list)) {
5787 else_block = nir_if_first_else_block(if_stmt);
5788 assert(nir_if_last_else_block(if_stmt)->successors[0]);
5789 assert(!nir_if_last_else_block(if_stmt)->successors[1]);
5790 else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index;
5791 }
5792
5793 if (!emit_cond_branch(ctx, cond, then_block->index,
5794 else_block ? else_block->index : then_succ))
5795 return false;
5796
5797 /* handle then-block */
5798 if (!emit_cf_list(ctx, &if_stmt->then_list) ||
5799 (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) &&
5800 !emit_branch(ctx, then_succ)))
5801 return false;
5802
5803 if (else_block) {
5804 /* handle else-block */
5805 if (!emit_cf_list(ctx, &if_stmt->else_list) ||
5806 (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) &&
5807 !emit_branch(ctx, else_succ)))
5808 return false;
5809 }
5810
5811 return true;
5812 }
5813
5814 static bool
emit_loop(struct ntd_context * ctx,nir_loop * loop)5815 emit_loop(struct ntd_context *ctx, nir_loop *loop)
5816 {
5817 assert(!nir_loop_has_continue_construct(loop));
5818 nir_block *first_block = nir_loop_first_block(loop);
5819 nir_block *last_block = nir_loop_last_block(loop);
5820
5821 assert(last_block->successors[0]);
5822 assert(!last_block->successors[1]);
5823
5824 if (!emit_branch(ctx, first_block->index))
5825 return false;
5826
5827 if (!emit_cf_list(ctx, &loop->body))
5828 return false;
5829
5830 /* If the loop's last block doesn't explicitly jump somewhere, then there's
5831 * an implicit continue that should take it back to the first loop block
5832 */
5833 nir_instr *last_instr = nir_block_last_instr(last_block);
5834 if ((!last_instr || last_instr->type != nir_instr_type_jump) &&
5835 !emit_branch(ctx, first_block->index))
5836 return false;
5837
5838 return true;
5839 }
5840
5841 static bool
emit_cf_list(struct ntd_context * ctx,struct exec_list * list)5842 emit_cf_list(struct ntd_context *ctx, struct exec_list *list)
5843 {
5844 foreach_list_typed(nir_cf_node, node, node, list) {
5845 switch (node->type) {
5846 case nir_cf_node_block:
5847 if (!emit_block(ctx, nir_cf_node_as_block(node)))
5848 return false;
5849 break;
5850
5851 case nir_cf_node_if:
5852 if (!emit_if(ctx, nir_cf_node_as_if(node)))
5853 return false;
5854 break;
5855
5856 case nir_cf_node_loop:
5857 if (!emit_loop(ctx, nir_cf_node_as_loop(node)))
5858 return false;
5859 break;
5860
5861 default:
5862 unreachable("unsupported cf-list node");
5863 break;
5864 }
5865 }
5866 return true;
5867 }
5868
5869 static void
insert_sorted_by_binding(struct exec_list * var_list,nir_variable * new_var)5870 insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var)
5871 {
5872 nir_foreach_variable_in_list(var, var_list) {
5873 if (var->data.binding > new_var->data.binding) {
5874 exec_node_insert_node_before(&var->node, &new_var->node);
5875 return;
5876 }
5877 }
5878 exec_list_push_tail(var_list, &new_var->node);
5879 }
5880
5881
5882 static void
sort_uniforms_by_binding_and_remove_structs(nir_shader * s)5883 sort_uniforms_by_binding_and_remove_structs(nir_shader *s)
5884 {
5885 struct exec_list new_list;
5886 exec_list_make_empty(&new_list);
5887
5888 nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) {
5889 exec_node_remove(&var->node);
5890 const struct glsl_type *type = glsl_without_array(var->type);
5891 if (!glsl_type_is_struct(type))
5892 insert_sorted_by_binding(&new_list, var);
5893 }
5894 exec_list_append(&s->variables, &new_list);
5895 }
5896
5897 static bool
emit_cbvs(struct ntd_context * ctx)5898 emit_cbvs(struct ntd_context *ctx)
5899 {
5900 if (ctx->opts->environment != DXIL_ENVIRONMENT_GL) {
5901 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) {
5902 if (!emit_ubo_var(ctx, var))
5903 return false;
5904 }
5905 } else {
5906 if (ctx->shader->info.num_ubos) {
5907 const unsigned ubo_size = 16384 /*4096 vec4's*/;
5908 uint array_base = ctx->shader->info.first_ubo_is_default_ubo ? 1 : 0;
5909 bool has_ubo0 = ctx->shader->num_uniforms > 0 && ctx->shader->info.first_ubo_is_default_ubo;
5910 bool has_state_vars = ctx->opts->last_ubo_is_not_arrayed;
5911 unsigned ubo1_array_size = ctx->shader->info.num_ubos - array_base -
5912 (has_state_vars ? 1 : 0);
5913
5914 if (has_ubo0 &&
5915 !emit_cbv(ctx, 0, 0, ubo_size, 1, "__ubo_uniforms"))
5916 return false;
5917 if (ubo1_array_size &&
5918 !emit_cbv(ctx, array_base, 0, ubo_size, ubo1_array_size, "__ubos"))
5919 return false;
5920 if (has_state_vars &&
5921 !emit_cbv(ctx, ctx->shader->info.num_ubos - 1, 0, ubo_size, 1, "__ubo_state_vars"))
5922 return false;
5923 }
5924 }
5925
5926 return true;
5927 }
5928
5929 static bool
emit_scratch(struct ntd_context * ctx,nir_function_impl * impl)5930 emit_scratch(struct ntd_context *ctx, nir_function_impl *impl)
5931 {
5932 uint32_t index = 0;
5933 nir_foreach_function_temp_variable(var, impl)
5934 var->data.driver_location = index++;
5935
5936 if (ctx->scratchvars)
5937 ralloc_free((void *)ctx->scratchvars);
5938
5939 ctx->scratchvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
5940
5941 nir_foreach_function_temp_variable(var, impl) {
5942 const struct dxil_type *type = get_type_for_glsl_type(&ctx->mod, var->type);
5943 const struct dxil_value *length = dxil_module_get_int32_const(&ctx->mod, 1);
5944 const struct dxil_value *ptr = dxil_emit_alloca(&ctx->mod, type, length, 16);
5945 if (!ptr)
5946 return false;
5947
5948 ctx->scratchvars[var->data.driver_location] = ptr;
5949 }
5950
5951 return true;
5952 }
5953
5954 static bool
emit_function(struct ntd_context * ctx,nir_function * func,nir_function_impl * impl)5955 emit_function(struct ntd_context *ctx, nir_function *func, nir_function_impl *impl)
5956 {
5957 assert(func->num_params == 0);
5958 nir_metadata_require(impl, nir_metadata_block_index);
5959
5960 const char *attr_keys[2] = { NULL };
5961 const char *attr_values[2] = { NULL };
5962 if (ctx->shader->info.float_controls_execution_mode &
5963 (FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 | FLOAT_CONTROLS_DENORM_PRESERVE_FP32))
5964 attr_keys[0] = "fp32-denorm-mode";
5965 if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32)
5966 attr_values[0] = "ftz";
5967 else if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32)
5968 attr_values[0] = "preserve";
5969
5970 const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod);
5971 const struct dxil_type *func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0);
5972 struct dxil_func_def *func_def = dxil_add_function_def(&ctx->mod, func->name, func_type, impl->num_blocks, attr_keys, attr_values);
5973 if (!func_def)
5974 return false;
5975
5976 if (func->is_entrypoint)
5977 ctx->main_func_def = func_def;
5978 else if (func == ctx->tess_ctrl_patch_constant_func)
5979 ctx->tess_ctrl_patch_constant_func_def = func_def;
5980
5981 ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def, impl->ssa_alloc);
5982 ctx->float_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
5983 ctx->int_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
5984 if (!ctx->defs || !ctx->float_types || !ctx->int_types)
5985 return false;
5986 ctx->num_defs = impl->ssa_alloc;
5987
5988 ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
5989 if (!ctx->phis)
5990 return false;
5991
5992 nir_gather_types(impl, ctx->float_types, ctx->int_types);
5993
5994 if (!emit_scratch(ctx, impl))
5995 return false;
5996
5997 if (!emit_static_indexing_handles(ctx))
5998 return false;
5999
6000 if (!emit_cf_list(ctx, &impl->body))
6001 return false;
6002
6003 hash_table_foreach(ctx->phis, entry) {
6004 if (!fixup_phi(ctx, (nir_phi_instr *)entry->key,
6005 (struct phi_block *)entry->data))
6006 return false;
6007 }
6008
6009 if (!dxil_emit_ret_void(&ctx->mod))
6010 return false;
6011
6012 ralloc_free(ctx->defs);
6013 ctx->defs = NULL;
6014 _mesa_hash_table_destroy(ctx->phis, NULL);
6015 return true;
6016 }
6017
6018 static bool
emit_module(struct ntd_context * ctx,const struct nir_to_dxil_options * opts)6019 emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts)
6020 {
6021 /* The validator forces us to emit resources in a specific order:
6022 * CBVs, Samplers, SRVs, UAVs. While we are at it also remove
6023 * stale struct uniforms, they are lowered but might not have been removed */
6024 sort_uniforms_by_binding_and_remove_structs(ctx->shader);
6025
6026 /* CBVs */
6027 if (!emit_cbvs(ctx))
6028 return false;
6029
6030 /* Samplers */
6031 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
6032 unsigned count = glsl_type_get_sampler_count(var->type);
6033 assert(count == 0 || glsl_type_is_bare_sampler(glsl_without_array(var->type)));
6034 if (count > 0 && !emit_sampler(ctx, var, count))
6035 return false;
6036 }
6037
6038 /* SRVs */
6039 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
6040 unsigned count = glsl_type_get_texture_count(var->type);
6041 assert(count == 0 || glsl_type_is_texture(glsl_without_array(var->type)));
6042 if (count > 0 && !emit_srv(ctx, var, count))
6043 return false;
6044 }
6045
6046 /* Handle read-only SSBOs as SRVs */
6047 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6048 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
6049 if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) {
6050 unsigned count = 1;
6051 if (glsl_type_is_array(var->type))
6052 count = glsl_get_length(var->type);
6053 if (!emit_srv(ctx, var, count))
6054 return false;
6055 }
6056 }
6057 }
6058
6059 if (!emit_shared_vars(ctx))
6060 return false;
6061 if (!emit_global_consts(ctx))
6062 return false;
6063
6064 /* UAVs */
6065 if (ctx->shader->info.stage == MESA_SHADER_KERNEL) {
6066 if (!emit_globals(ctx, opts->num_kernel_globals))
6067 return false;
6068
6069 } else if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6070 /* Handle read/write SSBOs as UAVs */
6071 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
6072 if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) {
6073 unsigned count = 1;
6074 if (glsl_type_is_array(var->type))
6075 count = glsl_get_length(var->type);
6076 if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set,
6077 count, DXIL_COMP_TYPE_INVALID, 1,
6078 DXIL_RESOURCE_KIND_RAW_BUFFER, var->data.access, var->name))
6079 return false;
6080
6081 }
6082 }
6083 } else {
6084 for (unsigned i = 0; i < ctx->shader->info.num_ssbos; ++i) {
6085 char name[64];
6086 snprintf(name, sizeof(name), "__ssbo%d", i);
6087 if (!emit_uav(ctx, i, 0, 1, DXIL_COMP_TYPE_INVALID, 1,
6088 DXIL_RESOURCE_KIND_RAW_BUFFER, 0, name))
6089 return false;
6090 }
6091 /* To work around a WARP bug, bind these descriptors a second time in descriptor
6092 * space 2. Space 0 will be used for static indexing, while space 2 will be used
6093 * for dynamic indexing. Space 0 will be individual SSBOs in the DXIL shader, while
6094 * space 2 will be a single array.
6095 */
6096 if (ctx->shader->info.num_ssbos &&
6097 !emit_uav(ctx, 0, 2, ctx->shader->info.num_ssbos, DXIL_COMP_TYPE_INVALID, 1,
6098 DXIL_RESOURCE_KIND_RAW_BUFFER, 0, "__ssbo_dynamic"))
6099 return false;
6100 }
6101
6102 nir_foreach_image_variable(var, ctx->shader) {
6103 if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type)))
6104 return false;
6105 }
6106
6107 ctx->mod.info.has_per_sample_input =
6108 BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
6109 ctx->shader->info.fs.uses_sample_shading ||
6110 ctx->shader->info.fs.uses_sample_qualifier;
6111 if (!ctx->mod.info.has_per_sample_input && ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
6112 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in | nir_var_system_value) {
6113 if (var->data.sample) {
6114 ctx->mod.info.has_per_sample_input = true;
6115 break;
6116 }
6117 }
6118 }
6119
6120 /* From the Vulkan spec 1.3.238, section 15.8:
6121 * When Sample Shading is enabled, the x and y components of FragCoord reflect the location
6122 * of one of the samples corresponding to the shader invocation.
6123 *
6124 * In other words, if the fragment shader is executing per-sample, then the position variable
6125 * should always be per-sample,
6126 *
6127 * Also:
6128 * The Centroid interpolation decoration is ignored, but allowed, on FragCoord.
6129 */
6130 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6131 nir_variable *pos_var = nir_find_variable_with_location(ctx->shader, nir_var_shader_in, VARYING_SLOT_POS);
6132 if (pos_var) {
6133 if (ctx->mod.info.has_per_sample_input)
6134 pos_var->data.sample = true;
6135 pos_var->data.centroid = false;
6136 }
6137 }
6138
6139 unsigned input_clip_size = ctx->mod.shader_kind == DXIL_PIXEL_SHADER ?
6140 ctx->shader->info.clip_distance_array_size : ctx->opts->input_clip_size;
6141 preprocess_signatures(&ctx->mod, ctx->shader, input_clip_size);
6142
6143 nir_foreach_function_with_impl(func, impl, ctx->shader) {
6144 if (!emit_function(ctx, func, impl))
6145 return false;
6146 }
6147
6148 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
6149 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) {
6150 if (var->data.location == FRAG_RESULT_STENCIL) {
6151 ctx->mod.feats.stencil_ref = true;
6152 }
6153 }
6154 } else if (ctx->shader->info.stage == MESA_SHADER_VERTEX ||
6155 ctx->shader->info.stage == MESA_SHADER_TESS_EVAL) {
6156 if (ctx->shader->info.outputs_written &
6157 (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
6158 ctx->mod.feats.array_layer_from_vs_or_ds = true;
6159 } else if (ctx->shader->info.stage == MESA_SHADER_GEOMETRY ||
6160 ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
6161 if (ctx->shader->info.inputs_read &
6162 (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
6163 ctx->mod.feats.array_layer_from_vs_or_ds = true;
6164 }
6165
6166 if (ctx->mod.feats.native_low_precision && ctx->mod.minor_version < 2) {
6167 ctx->logger->log(ctx->logger->priv,
6168 "Shader uses 16bit, which requires shader model 6.2, but 6.2 is unsupported\n");
6169 return false;
6170 }
6171
6172 return emit_metadata(ctx) &&
6173 dxil_emit_module(&ctx->mod);
6174 }
6175
6176 static unsigned int
get_dxil_shader_kind(struct nir_shader * s)6177 get_dxil_shader_kind(struct nir_shader *s)
6178 {
6179 switch (s->info.stage) {
6180 case MESA_SHADER_VERTEX:
6181 return DXIL_VERTEX_SHADER;
6182 case MESA_SHADER_TESS_CTRL:
6183 return DXIL_HULL_SHADER;
6184 case MESA_SHADER_TESS_EVAL:
6185 return DXIL_DOMAIN_SHADER;
6186 case MESA_SHADER_GEOMETRY:
6187 return DXIL_GEOMETRY_SHADER;
6188 case MESA_SHADER_FRAGMENT:
6189 return DXIL_PIXEL_SHADER;
6190 case MESA_SHADER_KERNEL:
6191 case MESA_SHADER_COMPUTE:
6192 return DXIL_COMPUTE_SHADER;
6193 default:
6194 unreachable("unknown shader stage in nir_to_dxil");
6195 return DXIL_COMPUTE_SHADER;
6196 }
6197 }
6198
6199 static unsigned
lower_bit_size_callback(const nir_instr * instr,void * data)6200 lower_bit_size_callback(const nir_instr* instr, void *data)
6201 {
6202 if (instr->type != nir_instr_type_alu)
6203 return 0;
6204 nir_alu_instr *alu = nir_instr_as_alu(instr);
6205
6206 if (nir_op_infos[alu->op].is_conversion)
6207 return 0;
6208
6209 if (nir_op_is_vec_or_mov(alu->op))
6210 return 0;
6211
6212 unsigned num_inputs = nir_op_infos[alu->op].num_inputs;
6213 const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data;
6214 unsigned min_bit_size = opts->lower_int16 ? 32 : 16;
6215
6216 unsigned ret = 0;
6217 for (unsigned i = 0; i < num_inputs; i++) {
6218 unsigned bit_size = nir_src_bit_size(alu->src[i].src);
6219 if (bit_size != 1 && bit_size < min_bit_size)
6220 ret = min_bit_size;
6221 }
6222
6223 return ret;
6224 }
6225
6226 static bool
vectorize_filter(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)6227 vectorize_filter(
6228 unsigned align_mul,
6229 unsigned align_offset,
6230 unsigned bit_size,
6231 unsigned num_components,
6232 nir_intrinsic_instr *low, nir_intrinsic_instr *high,
6233 void *data)
6234 {
6235 return util_is_power_of_two_nonzero(num_components);
6236 }
6237
6238 struct lower_mem_bit_sizes_data {
6239 const nir_shader_compiler_options *nir_options;
6240 const struct nir_to_dxil_options *dxil_options;
6241 };
6242
6243 static nir_mem_access_size_align
lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size_in,uint32_t align_mul,uint32_t align_offset,bool offset_is_const,const void * cb_data)6244 lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin,
6245 uint8_t bytes,
6246 uint8_t bit_size_in,
6247 uint32_t align_mul,
6248 uint32_t align_offset,
6249 bool offset_is_const,
6250 const void *cb_data)
6251 {
6252 const struct lower_mem_bit_sizes_data *data = cb_data;
6253 unsigned max_bit_size = 32;
6254 unsigned min_bit_size = data->dxil_options->lower_int16 ? 32 : 16;
6255 unsigned closest_bit_size = MAX2(min_bit_size, MIN2(max_bit_size, bit_size_in));
6256 if (intrin == nir_intrinsic_load_ubo) {
6257 /* UBO loads can be done at whatever (supported) bit size, but require 16 byte
6258 * alignment and can load up to 16 bytes per instruction. However this pass requires
6259 * loading 16 bytes of data to get 16-byte alignment. We're going to run lower_ubo_vec4
6260 * which can deal with unaligned vec4s, so for this pass let's just deal with bit size
6261 * and total size restrictions. */
6262 return (nir_mem_access_size_align) {
6263 .align = closest_bit_size / 8,
6264 .bit_size = closest_bit_size,
6265 .num_components = DIV_ROUND_UP(MIN2(bytes, 16) * 8, closest_bit_size),
6266 };
6267 }
6268
6269 assert(intrin == nir_intrinsic_load_ssbo || intrin == nir_intrinsic_store_ssbo);
6270 uint32_t align = nir_combined_align(align_mul, align_offset);
6271 if (align < min_bit_size / 8) {
6272 /* Unaligned load/store, use the minimum bit size, up to 4 components */
6273 unsigned ideal_num_components = intrin == nir_intrinsic_load_ssbo ?
6274 DIV_ROUND_UP(bytes * 8, min_bit_size) :
6275 (32 / min_bit_size);
6276 return (nir_mem_access_size_align) {
6277 .align = min_bit_size / 8,
6278 .bit_size = min_bit_size,
6279 .num_components = MIN2(4, ideal_num_components),
6280 };
6281 }
6282
6283 /* Increase/decrease bit size to try to get closer to the requested byte size/align */
6284 unsigned bit_size = closest_bit_size;
6285 unsigned target = MIN2(bytes, align);
6286 while (target < bit_size / 8 && bit_size > min_bit_size)
6287 bit_size /= 2;
6288 while (target > bit_size / 8 * 4 && bit_size < max_bit_size)
6289 bit_size *= 2;
6290
6291 /* This is the best we can do */
6292 unsigned num_components = intrin == nir_intrinsic_load_ssbo ?
6293 DIV_ROUND_UP(bytes * 8, bit_size) :
6294 MAX2(1, (bytes * 8 / bit_size));
6295 return (nir_mem_access_size_align) {
6296 .align = bit_size / 8,
6297 .bit_size = bit_size,
6298 .num_components = MIN2(4, num_components),
6299 };
6300 }
6301
6302 static void
optimize_nir(struct nir_shader * s,const struct nir_to_dxil_options * opts)6303 optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
6304 {
6305 bool progress;
6306 do {
6307 progress = false;
6308 NIR_PASS_V(s, nir_lower_vars_to_ssa);
6309 NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, 4);
6310 NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
6311 NIR_PASS(progress, s, nir_copy_prop);
6312 NIR_PASS(progress, s, nir_opt_copy_prop_vars);
6313 NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts);
6314 NIR_PASS(progress, s, dxil_nir_lower_8bit_conv);
6315 if (opts->lower_int16)
6316 NIR_PASS(progress, s, dxil_nir_lower_16bit_conv);
6317 NIR_PASS(progress, s, nir_opt_remove_phis);
6318 NIR_PASS(progress, s, nir_opt_dce);
6319 NIR_PASS(progress, s, nir_opt_if,
6320 nir_opt_if_optimize_phi_true_false | nir_opt_if_avoid_64bit_phis);
6321 NIR_PASS(progress, s, nir_opt_dead_cf);
6322 NIR_PASS(progress, s, nir_opt_cse);
6323 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
6324 NIR_PASS(progress, s, nir_opt_algebraic);
6325 NIR_PASS(progress, s, dxil_nir_algebraic);
6326 if (s->options->lower_int64_options)
6327 NIR_PASS(progress, s, nir_lower_int64);
6328 NIR_PASS(progress, s, nir_lower_alu);
6329 NIR_PASS(progress, s, nir_opt_constant_folding);
6330 NIR_PASS(progress, s, nir_opt_undef);
6331 NIR_PASS(progress, s, nir_opt_deref);
6332 NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);
6333 NIR_PASS(progress, s, nir_lower_64bit_phis);
6334 NIR_PASS(progress, s, nir_lower_phis_to_scalar, true);
6335 NIR_PASS(progress, s, nir_opt_loop_unroll);
6336 NIR_PASS(progress, s, nir_lower_pack);
6337 NIR_PASS(progress, s, dxil_nir_remove_oob_array_accesses);
6338 NIR_PASS_V(s, nir_lower_system_values);
6339 } while (progress);
6340
6341 do {
6342 progress = false;
6343 NIR_PASS(progress, s, nir_opt_algebraic_late);
6344 } while (progress);
6345
6346 NIR_PASS_V(s, nir_lower_undef_to_zero);
6347 }
6348
6349 static
dxil_fill_validation_state(struct ntd_context * ctx,struct dxil_validation_state * state)6350 void dxil_fill_validation_state(struct ntd_context *ctx,
6351 struct dxil_validation_state *state)
6352 {
6353 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
6354 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
6355 state->num_resources = ctx->resources.size / resource_element_size;
6356 state->resources.v0 = (struct dxil_resource_v0*)ctx->resources.data;
6357 if (ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_4) {
6358 state->state.psv1.psv0.max_expected_wave_lane_count = ctx->shader->info.subgroup_size;
6359 state->state.psv1.psv0.min_expected_wave_lane_count = ctx->shader->info.subgroup_size;
6360 } else {
6361 state->state.psv1.psv0.max_expected_wave_lane_count = UINT_MAX;
6362 }
6363 state->state.psv1.shader_stage = (uint8_t)ctx->mod.shader_kind;
6364 state->state.psv1.uses_view_id = (uint8_t)ctx->mod.feats.view_id;
6365 state->state.psv1.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs;
6366 state->state.psv1.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs;
6367 state->state.psv1.sig_patch_const_or_prim_elements = (uint8_t)ctx->mod.num_sig_patch_consts;
6368
6369 switch (ctx->mod.shader_kind) {
6370 case DXIL_VERTEX_SHADER:
6371 state->state.psv1.psv0.vs.output_position_present = ctx->mod.info.has_out_position;
6372 break;
6373 case DXIL_PIXEL_SHADER:
6374 /* TODO: handle depth outputs */
6375 state->state.psv1.psv0.ps.depth_output = ctx->mod.info.has_out_depth;
6376 state->state.psv1.psv0.ps.sample_frequency =
6377 ctx->mod.info.has_per_sample_input;
6378 break;
6379 case DXIL_COMPUTE_SHADER:
6380 state->state.num_threads_x = MAX2(ctx->shader->info.workgroup_size[0], 1);
6381 state->state.num_threads_y = MAX2(ctx->shader->info.workgroup_size[1], 1);
6382 state->state.num_threads_z = MAX2(ctx->shader->info.workgroup_size[2], 1);
6383 break;
6384 case DXIL_GEOMETRY_SHADER:
6385 state->state.psv1.max_vertex_count = ctx->shader->info.gs.vertices_out;
6386 state->state.psv1.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive);
6387 state->state.psv1.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive);
6388 state->state.psv1.psv0.gs.output_stream_mask = MAX2(ctx->shader->info.gs.active_stream_mask, 1);
6389 state->state.psv1.psv0.gs.output_position_present = ctx->mod.info.has_out_position;
6390 break;
6391 case DXIL_HULL_SHADER:
6392 state->state.psv1.psv0.hs.input_control_point_count = ctx->tess_input_control_point_count;
6393 state->state.psv1.psv0.hs.output_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
6394 state->state.psv1.psv0.hs.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
6395 state->state.psv1.psv0.hs.tessellator_output_primitive = get_tessellator_output_primitive(&ctx->shader->info);
6396 state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
6397 break;
6398 case DXIL_DOMAIN_SHADER:
6399 state->state.psv1.psv0.ds.input_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
6400 state->state.psv1.psv0.ds.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
6401 state->state.psv1.psv0.ds.output_position_present = ctx->mod.info.has_out_position;
6402 state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
6403 break;
6404 default:
6405 assert(0 && "Shader type not (yet) supported");
6406 }
6407 }
6408
6409 static nir_variable *
add_sysvalue(struct ntd_context * ctx,uint8_t value,char * name,int driver_location)6410 add_sysvalue(struct ntd_context *ctx,
6411 uint8_t value, char *name,
6412 int driver_location)
6413 {
6414
6415 nir_variable *var = rzalloc(ctx->shader, nir_variable);
6416 if (!var)
6417 return NULL;
6418 var->data.driver_location = driver_location;
6419 var->data.location = value;
6420 var->type = glsl_uint_type();
6421 var->name = name;
6422 var->data.mode = nir_var_system_value;
6423 var->data.interpolation = INTERP_MODE_FLAT;
6424 return var;
6425 }
6426
6427 static bool
append_input_or_sysvalue(struct ntd_context * ctx,int input_loc,int sv_slot,char * name,int driver_location)6428 append_input_or_sysvalue(struct ntd_context *ctx,
6429 int input_loc, int sv_slot,
6430 char *name, int driver_location)
6431 {
6432 if (input_loc >= 0) {
6433 /* Check inputs whether a variable is available the corresponds
6434 * to the sysvalue */
6435 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
6436 if (var->data.location == input_loc) {
6437 ctx->system_value[sv_slot] = var;
6438 return true;
6439 }
6440 }
6441 }
6442
6443 ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location);
6444 if (!ctx->system_value[sv_slot])
6445 return false;
6446
6447 nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]);
6448 return true;
6449 }
6450
6451 struct sysvalue_name {
6452 gl_system_value value;
6453 int slot;
6454 char *name;
6455 gl_shader_stage only_in_shader;
6456 } possible_sysvalues[] = {
6457 {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID", MESA_SHADER_NONE},
6458 {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID", MESA_SHADER_NONE},
6459 {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace", MESA_SHADER_NONE},
6460 {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID", MESA_SHADER_GEOMETRY},
6461 {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex", MESA_SHADER_NONE},
6462 };
6463
6464 static bool
allocate_sysvalues(struct ntd_context * ctx)6465 allocate_sysvalues(struct ntd_context *ctx)
6466 {
6467 unsigned driver_location = 0;
6468 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in)
6469 driver_location = MAX2(driver_location, var->data.driver_location + 1);
6470 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value)
6471 driver_location = MAX2(driver_location, var->data.driver_location + 1);
6472
6473 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
6474 !BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID)) {
6475 bool need_sample_id = ctx->shader->info.fs.uses_sample_shading;
6476
6477 /* "var->data.sample = true" sometimes just mean, "I want per-sample
6478 * shading", which explains why we can end up with vars having flat
6479 * interpolation with the per-sample bit set. If there's only such
6480 * type of variables, we need to tell DXIL that we read SV_SampleIndex
6481 * to make DXIL validation happy.
6482 */
6483 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
6484 bool var_can_be_sample_rate = !var->data.centroid && var->data.interpolation != INTERP_MODE_FLAT;
6485 /* If there's an input that will actually force sample-rate shading, then we don't
6486 * need SV_SampleIndex. */
6487 if (var->data.sample && var_can_be_sample_rate) {
6488 need_sample_id = false;
6489 break;
6490 }
6491 /* If there's an input that wants to be sample-rate, but can't be, then we might
6492 * need SV_SampleIndex. */
6493 if (var->data.sample && !var_can_be_sample_rate)
6494 need_sample_id = true;
6495 }
6496
6497 if (need_sample_id)
6498 BITSET_SET(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
6499 }
6500
6501 for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) {
6502 struct sysvalue_name *info = &possible_sysvalues[i];
6503 if (info->only_in_shader != MESA_SHADER_NONE &&
6504 info->only_in_shader != ctx->shader->info.stage)
6505 continue;
6506 if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) {
6507 if (!append_input_or_sysvalue(ctx, info->slot,
6508 info->value, info->name,
6509 driver_location++))
6510 return false;
6511 }
6512 }
6513 return true;
6514 }
6515
6516 static int
type_size_vec4(const struct glsl_type * type,bool bindless)6517 type_size_vec4(const struct glsl_type *type, bool bindless)
6518 {
6519 return glsl_count_attribute_slots(type, false);
6520 }
6521
6522 static const unsigned dxil_validator_min_capable_version = DXIL_VALIDATOR_1_4;
6523 static const unsigned dxil_validator_max_capable_version = DXIL_VALIDATOR_1_8;
6524 static const unsigned dxil_min_shader_model = SHADER_MODEL_6_0;
6525 static const unsigned dxil_max_shader_model = SHADER_MODEL_6_8;
6526
6527 bool
nir_to_dxil(struct nir_shader * s,const struct nir_to_dxil_options * opts,const struct dxil_logger * logger,struct blob * blob)6528 nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
6529 const struct dxil_logger *logger, struct blob *blob)
6530 {
6531 assert(opts);
6532 bool retval = true;
6533 debug_dxil = (int)debug_get_option_debug_dxil();
6534 blob_init(blob);
6535
6536 if (opts->shader_model_max < dxil_min_shader_model) {
6537 debug_printf("D3D12: cannot support emitting shader models lower than %d.%d\n",
6538 dxil_min_shader_model >> 16,
6539 dxil_min_shader_model & 0xffff);
6540 return false;
6541 }
6542
6543 if (opts->shader_model_max > dxil_max_shader_model) {
6544 debug_printf("D3D12: cannot support emitting higher than shader model %d.%d\n",
6545 dxil_max_shader_model >> 16,
6546 dxil_max_shader_model & 0xffff);
6547 return false;
6548 }
6549
6550 if (opts->validator_version_max != NO_DXIL_VALIDATION &&
6551 opts->validator_version_max < dxil_validator_min_capable_version) {
6552 debug_printf("D3D12: Invalid validator version %d.%d, must be 1.4 or greater\n",
6553 opts->validator_version_max >> 16,
6554 opts->validator_version_max & 0xffff);
6555 return false;
6556 }
6557
6558 /* If no validation, write a blob as if it was going to be validated by the newest understood validator.
6559 * Same if the validator is newer than we know how to write for.
6560 */
6561 uint32_t validator_version =
6562 opts->validator_version_max == NO_DXIL_VALIDATION ||
6563 opts->validator_version_max > dxil_validator_max_capable_version ?
6564 dxil_validator_max_capable_version : opts->validator_version_max;
6565
6566 struct ntd_context *ctx = calloc(1, sizeof(*ctx));
6567 if (!ctx)
6568 return false;
6569
6570 ctx->opts = opts;
6571 ctx->shader = s;
6572 ctx->logger = logger ? logger : &default_logger;
6573
6574 ctx->ralloc_ctx = ralloc_context(NULL);
6575 if (!ctx->ralloc_ctx) {
6576 retval = false;
6577 goto out;
6578 }
6579
6580 util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx);
6581 util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx);
6582 util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx);
6583 util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx);
6584 util_dynarray_init(&ctx->resources, ctx->ralloc_ctx);
6585 dxil_module_init(&ctx->mod, ctx->ralloc_ctx);
6586 ctx->mod.shader_kind = get_dxil_shader_kind(s);
6587 ctx->mod.major_version = 6;
6588 /* Use the highest shader model that's supported and can be validated */
6589 ctx->mod.minor_version =
6590 MIN2(opts->shader_model_max & 0xffff, validator_version & 0xffff);
6591 ctx->mod.major_validator = validator_version >> 16;
6592 ctx->mod.minor_validator = validator_version & 0xffff;
6593
6594 if (s->info.stage <= MESA_SHADER_FRAGMENT) {
6595 uint64_t in_mask =
6596 s->info.stage == MESA_SHADER_VERTEX ?
6597 0 : (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
6598 uint64_t out_mask =
6599 s->info.stage == MESA_SHADER_FRAGMENT ?
6600 ((1ull << FRAG_RESULT_STENCIL) | (1ull << FRAG_RESULT_SAMPLE_MASK)) :
6601 (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
6602
6603 NIR_PASS_V(s, dxil_nir_fix_io_uint_type, in_mask, out_mask);
6604 }
6605
6606 NIR_PASS_V(s, dxil_nir_lower_fquantize2f16);
6607 NIR_PASS_V(s, nir_lower_frexp);
6608 NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true);
6609 NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32);
6610 NIR_PASS_V(s, dxil_nir_ensure_position_writes);
6611 NIR_PASS_V(s, dxil_nir_lower_system_values);
6612 NIR_PASS_V(s, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_system_value | nir_var_shader_out, NULL, NULL);
6613
6614 /* Do a round of optimization to try to vectorize loads/stores. Otherwise the addresses used for loads
6615 * might be too opaque for the pass to see that they're next to each other. */
6616 optimize_nir(s, opts);
6617
6618 /* Vectorize UBO/SSBO accesses aggressively. This can help increase alignment to enable us to do better
6619 * chunking of loads and stores after lowering bit sizes. Ignore load/store size limitations here, we'll
6620 * address them with lower_mem_access_bit_sizes */
6621 nir_load_store_vectorize_options vectorize_opts = {
6622 .callback = vectorize_filter,
6623 .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
6624 };
6625 NIR_PASS_V(s, nir_opt_load_store_vectorize, &vectorize_opts);
6626
6627 /* Now that they're bloated to the max, address bit size restrictions and overall size limitations for
6628 * a single load/store op. */
6629 struct lower_mem_bit_sizes_data mem_size_data = { s->options, opts };
6630 nir_lower_mem_access_bit_sizes_options mem_size_options = {
6631 .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
6632 .callback = lower_mem_access_bit_sizes_cb,
6633 .may_lower_unaligned_stores_to_atomics = true,
6634 .cb_data = &mem_size_data
6635 };
6636 NIR_PASS_V(s, nir_lower_mem_access_bit_sizes, &mem_size_options);
6637
6638 /* Lastly, conver byte-address UBO loads to vec-addressed. This pass can also deal with selecting sub-
6639 * components from the load and dealing with vec-straddling loads. */
6640 NIR_PASS_V(s, nir_lower_ubo_vec4);
6641
6642 if (opts->shader_model_max < SHADER_MODEL_6_6) {
6643 /* In a later pass, load_helper_invocation will be lowered to sample mask based fallback,
6644 * so both load- and is- will be emulated eventually.
6645 */
6646 NIR_PASS_V(s, nir_lower_is_helper_invocation);
6647 }
6648
6649 if (ctx->mod.shader_kind == DXIL_HULL_SHADER)
6650 NIR_PASS_V(s, dxil_nir_split_tess_ctrl, &ctx->tess_ctrl_patch_constant_func);
6651
6652 if (ctx->mod.shader_kind == DXIL_HULL_SHADER ||
6653 ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
6654 /* Make sure any derefs are gone after lower_io before updating tess level vars */
6655 NIR_PASS_V(s, nir_opt_dce);
6656 NIR_PASS_V(s, dxil_nir_fixup_tess_level_for_domain);
6657 }
6658
6659 optimize_nir(s, opts);
6660
6661 NIR_PASS_V(s, nir_remove_dead_variables,
6662 nir_var_function_temp | nir_var_mem_constant | nir_var_mem_shared, NULL);
6663
6664 if (!allocate_sysvalues(ctx))
6665 return false;
6666
6667 NIR_PASS_V(s, dxil_nir_lower_sysval_to_load_input, ctx->system_value);
6668 NIR_PASS_V(s, nir_opt_dce);
6669
6670 /* This needs to be after any copy prop is done to prevent these movs from being erased */
6671 NIR_PASS_V(s, dxil_nir_move_consts);
6672 NIR_PASS_V(s, nir_opt_dce);
6673
6674 NIR_PASS_V(s, dxil_nir_guess_image_formats);
6675
6676 if (debug_dxil & DXIL_DEBUG_VERBOSE)
6677 nir_print_shader(s, stderr);
6678
6679 if (!emit_module(ctx, opts)) {
6680 debug_printf("D3D12: dxil_container_add_module failed\n");
6681 retval = false;
6682 goto out;
6683 }
6684
6685 if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) {
6686 struct dxil_dumper *dumper = dxil_dump_create();
6687 dxil_dump_module(dumper, &ctx->mod);
6688 fprintf(stderr, "\n");
6689 dxil_dump_buf_to_file(dumper, stderr);
6690 fprintf(stderr, "\n\n");
6691 dxil_dump_free(dumper);
6692 }
6693
6694 struct dxil_container container;
6695 dxil_container_init(&container);
6696 /* Native low precision disables min-precision */
6697 if (ctx->mod.feats.native_low_precision)
6698 ctx->mod.feats.min_precision = false;
6699 if (!dxil_container_add_features(&container, &ctx->mod.feats)) {
6700 debug_printf("D3D12: dxil_container_add_features failed\n");
6701 retval = false;
6702 goto out;
6703 }
6704
6705 if (!dxil_container_add_io_signature(&container,
6706 DXIL_ISG1,
6707 ctx->mod.num_sig_inputs,
6708 ctx->mod.inputs,
6709 ctx->mod.minor_validator >= 7)) {
6710 debug_printf("D3D12: failed to write input signature\n");
6711 retval = false;
6712 goto out;
6713 }
6714
6715 if (!dxil_container_add_io_signature(&container,
6716 DXIL_OSG1,
6717 ctx->mod.num_sig_outputs,
6718 ctx->mod.outputs,
6719 ctx->mod.minor_validator >= 7)) {
6720 debug_printf("D3D12: failed to write output signature\n");
6721 retval = false;
6722 goto out;
6723 }
6724
6725 if ((ctx->mod.shader_kind == DXIL_HULL_SHADER ||
6726 ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) &&
6727 !dxil_container_add_io_signature(&container,
6728 DXIL_PSG1,
6729 ctx->mod.num_sig_patch_consts,
6730 ctx->mod.patch_consts,
6731 ctx->mod.minor_validator >= 7)) {
6732 debug_printf("D3D12: failed to write patch constant signature\n");
6733 retval = false;
6734 goto out;
6735 }
6736
6737 struct dxil_validation_state validation_state;
6738 memset(&validation_state, 0, sizeof(validation_state));
6739 dxil_fill_validation_state(ctx, &validation_state);
6740
6741 if (!dxil_container_add_state_validation(&container,&ctx->mod,
6742 &validation_state)) {
6743 debug_printf("D3D12: failed to write state-validation\n");
6744 retval = false;
6745 goto out;
6746 }
6747
6748 if (!dxil_container_add_module(&container, &ctx->mod)) {
6749 debug_printf("D3D12: failed to write module\n");
6750 retval = false;
6751 goto out;
6752 }
6753
6754 if (!dxil_container_write(&container, blob)) {
6755 debug_printf("D3D12: dxil_container_write failed\n");
6756 retval = false;
6757 goto out;
6758 }
6759 dxil_container_finish(&container);
6760
6761 if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) {
6762 static int shader_id = 0;
6763 char buffer[64];
6764 snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob",
6765 get_shader_kind_str(ctx->mod.shader_kind), shader_id++);
6766 debug_printf("Try to write blob to %s\n", buffer);
6767 FILE *f = fopen(buffer, "wb");
6768 if (f) {
6769 fwrite(blob->data, 1, blob->size, f);
6770 fclose(f);
6771 }
6772 }
6773
6774 out:
6775 dxil_module_release(&ctx->mod);
6776 ralloc_free(ctx->ralloc_ctx);
6777 free(ctx);
6778 return retval;
6779 }
6780