1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 /* The compiler middle-end architecture: Explaining (non-)monolithic shaders
8 * -------------------------------------------------------------------------
9 *
10 * Typically, there is one-to-one correspondence between API and HW shaders,
11 * that is, for every API shader, there is exactly one shader binary in
12 * the driver.
13 *
14 * The problem with that is that we also have to emulate some API states
15 * (e.g. alpha-test, and many others) in shaders too. The two obvious ways
16 * to deal with it are:
17 * - each shader has multiple variants for each combination of emulated states,
18 * and the variants are compiled on demand, possibly relying on a shader
19 * cache for good performance
20 * - patch shaders at the binary level
21 *
22 * This driver uses something completely different. The emulated states are
23 * usually implemented at the beginning or end of shaders. Therefore, we can
24 * split the shader into 3 parts:
25 * - prolog part (shader code dependent on states)
26 * - main part (the API shader)
27 * - epilog part (shader code dependent on states)
28 *
29 * Each part is compiled as a separate shader and the final binaries are
30 * concatenated. This type of shader is called non-monolithic, because it
31 * consists of multiple independent binaries. Creating a new shader variant
32 * is therefore only a concatenation of shader parts (binaries) and doesn't
33 * involve any compilation. The main shader parts are the only parts that are
34 * compiled when applications create shader objects. The prolog and epilog
35 * parts are compiled on the first use and saved, so that their binaries can
36 * be reused by many other shaders.
37 *
38 * One of the roles of the prolog part is to compute vertex buffer addresses
39 * for vertex shaders. A few of the roles of the epilog part are color buffer
40 * format conversions in pixel shaders that we have to do manually, and write
41 * tessellation factors in tessellation control shaders. The prolog and epilog
42 * have many other important responsibilities in various shader stages.
43 * They don't just "emulate legacy stuff".
44 *
45 * Monolithic shaders are shaders where the parts are combined before LLVM
46 * compilation, and the whole thing is compiled and optimized as one unit with
47 * one binary on the output. The result is the same as the non-monolithic
48 * shader, but the final code can be better, because LLVM can optimize across
49 * all shader parts. Monolithic shaders aren't usually used except for these
50 * special cases:
51 *
52 * 1) Some rarely-used states require modification of the main shader part
53 * itself, and in such cases, only the monolithic shader variant is
54 * compiled, and that's always done on the first use.
55 *
56 * 2) When we do cross-stage optimizations for separate shader objects and
57 * e.g. eliminate unused shader varyings, the resulting optimized shader
58 * variants are always compiled as monolithic shaders, and always
59 * asynchronously (i.e. not stalling ongoing rendering). We call them
60 * "optimized monolithic" shaders. The important property here is that
61 * the non-monolithic unoptimized shader variant is always available for use
62 * when the asynchronous compilation of the optimized shader is not done
63 * yet.
64 *
65 * Starting with GFX9 chips, some shader stages are merged, and the number of
66 * shader parts per shader increased. The complete new list of shader parts is:
67 * - 1st shader: prolog part
68 * - 1st shader: main part
69 * - 2nd shader: main part
70 * - 2nd shader: epilog part
71 */
72
73 /* How linking shader inputs and outputs between vertex, tessellation, and
74 * geometry shaders works.
75 *
76 * Inputs and outputs between shaders are stored in a buffer. This buffer
77 * lives in LDS (typical case for tessellation), but it can also live
78 * in memory (ESGS). Each input or output has a fixed location within a vertex.
79 * The highest used input or output determines the stride between vertices.
80 *
81 * Since GS and tessellation are only possible in the OpenGL core profile,
82 * only these semantics are valid for per-vertex data:
83 *
84 * Name Location
85 *
86 * POSITION 0
87 * VAR0..31 1..32
88 * CLIP_DIST0..1 49..50
89 * PSIZ 51
90 *
91 * For example, a shader only writing GENERIC0 has the output stride of 5.
92 *
93 * Only these semantics are valid for per-patch data:
94 *
95 * Name Location
96 *
97 * TESSOUTER 0
98 * TESSINNER 1
99 * PATCH0..29 2..31
100 *
101 * That's how independent shaders agree on input and output locations.
102 * The si_shader_io_get_unique_index function assigns the locations.
103 *
104 * For tessellation, other required information for calculating the input and
105 * output addresses like the vertex stride, the patch stride, and the offsets
106 * where per-vertex and per-patch data start, is passed to the shader via
107 * user data SGPRs. The offsets and strides are calculated at draw time and
108 * aren't available at compile time.
109 */
110
111 #ifndef SI_SHADER_H
112 #define SI_SHADER_H
113
114 #include "shader_info.h"
115 #include "ac_binary.h"
116 #include "ac_gpu_info.h"
117 #include "util/mesa-blake3.h"
118 #include "util/u_live_shader_cache.h"
119 #include "util/u_queue.h"
120 #include "si_pm4.h"
121
122 #ifdef __cplusplus
123 extern "C" {
124 #endif
125
126 struct nir_shader;
127 struct nir_instr;
128
129 #define SI_NUM_INTERP 32
130 #define SI_MAX_ATTRIBS 16
131 #define SI_MAX_VS_OUTPUTS 40
132 #define SI_USER_CLIP_PLANE_MASK 0x3F
133
134 #define INTERP_MODE_COLOR INTERP_MODE_COUNT
135
136 #define SI_PS_INPUT_CNTL_0000 (S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(0))
137 #define SI_PS_INPUT_CNTL_0001 (S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(3))
138 #define SI_PS_INPUT_CNTL_UNUSED SI_PS_INPUT_CNTL_0000
139 /* D3D9 behaviour for COLOR0 requires 0001. GL is undefined. */
140 #define SI_PS_INPUT_CNTL_UNUSED_COLOR0 SI_PS_INPUT_CNTL_0001
141
142 #define SI_VECTOR_ARG_IS_COLOR BITFIELD_BIT(0)
143 #define SI_VECTOR_ARG_COLOR_COMPONENT(x) (((x) & 0x7) << 1)
144 #define SI_GET_VECTOR_ARG_COLOR_COMPONENT(x) (((x) >> 1) & 0x7)
145
146 /* SGPR user data indices */
147 enum
148 {
149 SI_SGPR_INTERNAL_BINDINGS,
150 SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
151 SI_SGPR_CONST_AND_SHADER_BUFFERS, /* or just a constant buffer 0 pointer */
152 SI_SGPR_SAMPLERS_AND_IMAGES,
153 SI_NUM_RESOURCE_SGPRS,
154
155 /* API VS, TES without GS, GS copy shader */
156 SI_SGPR_VS_STATE_BITS = SI_NUM_RESOURCE_SGPRS,
157 SI_NUM_VS_STATE_RESOURCE_SGPRS,
158
159 /* all VS variants */
160 SI_SGPR_BASE_VERTEX = SI_NUM_VS_STATE_RESOURCE_SGPRS,
161 SI_SGPR_DRAWID,
162 SI_SGPR_START_INSTANCE,
163 SI_VS_NUM_USER_SGPR,
164
165 SI_SGPR_VS_BLIT_DATA = SI_SGPR_CONST_AND_SHADER_BUFFERS,
166
167 /* TES */
168 SI_SGPR_TES_OFFCHIP_LAYOUT = SI_NUM_VS_STATE_RESOURCE_SGPRS,
169 SI_SGPR_TES_OFFCHIP_ADDR,
170 SI_TES_NUM_USER_SGPR,
171
172 /* GFX6-8: TCS only */
173 GFX6_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
174 GFX6_SGPR_TCS_OFFCHIP_ADDR,
175 GFX6_SGPR_TCS_IN_LAYOUT,
176 GFX6_TCS_NUM_USER_SGPR,
177
178 /* GFX9: Merged LS-HS (VS-TCS) only. */
179 GFX9_SGPR_TCS_OFFCHIP_LAYOUT = SI_VS_NUM_USER_SGPR,
180 GFX9_SGPR_TCS_OFFCHIP_ADDR,
181 GFX9_TCS_NUM_USER_SGPR,
182
183 /* GS limits */
184 GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
185 SI_GSCOPY_NUM_USER_SGPR = SI_NUM_VS_STATE_RESOURCE_SGPRS,
186
187 GFX9_SGPR_SMALL_PRIM_CULL_INFO = MAX2(SI_VS_NUM_USER_SGPR, SI_TES_NUM_USER_SGPR),
188 GFX9_SGPR_ATTRIBUTE_RING_ADDR,
189 GFX9_GS_NUM_USER_SGPR,
190
191 /* PS only */
192 SI_SGPR_ALPHA_REF = SI_NUM_RESOURCE_SGPRS,
193 SI_PS_NUM_USER_SGPR,
194
195 /* The value has to be 12, because the hw requires that descriptors
196 * are aligned to 4 SGPRs.
197 */
198 SI_SGPR_VS_VB_DESCRIPTOR_FIRST = 12,
199 };
200
201 /* LLVM function parameter indices */
202 enum
203 {
204 SI_NUM_RESOURCE_PARAMS = 4,
205
206 /* PS only parameters */
207 SI_PARAM_ALPHA_REF = SI_NUM_RESOURCE_PARAMS,
208 SI_PARAM_PRIM_MASK,
209 SI_PARAM_PERSP_SAMPLE,
210 SI_PARAM_PERSP_CENTER,
211 SI_PARAM_PERSP_CENTROID,
212 SI_PARAM_PERSP_PULL_MODEL,
213 SI_PARAM_LINEAR_SAMPLE,
214 SI_PARAM_LINEAR_CENTER,
215 SI_PARAM_LINEAR_CENTROID,
216 SI_PARAM_LINE_STIPPLE_TEX,
217 SI_PARAM_POS_X_FLOAT,
218 SI_PARAM_POS_Y_FLOAT,
219 SI_PARAM_POS_Z_FLOAT,
220 SI_PARAM_POS_W_FLOAT,
221 SI_PARAM_FRONT_FACE,
222 SI_PARAM_ANCILLARY,
223 SI_PARAM_SAMPLE_COVERAGE,
224 SI_PARAM_POS_FIXED_PT,
225
226 SI_NUM_PARAMS = SI_PARAM_POS_FIXED_PT + 9, /* +8 for COLOR[0..1] */
227 };
228
229 /* These fields are only set in current_vs_state (except INDEXED) in si_context, and they are
230 * accessible in the shader via vs_state_bits in VS, TES, and GS.
231 */
232 #define VS_STATE_CLAMP_VERTEX_COLOR__SHIFT 0
233 #define VS_STATE_CLAMP_VERTEX_COLOR__MASK 0x1 /* Shared by VS and GS */
234 #define VS_STATE_INDEXED__SHIFT 1
235 #define VS_STATE_INDEXED__MASK 0x1 /* Shared by VS and GS */
236
237 /* These fields are only set in current_gs_state in si_context, and they are accessible
238 * in the shader via vs_state_bits in legacy GS, the GS copy shader, and any NGG shader.
239 */
240 /* bit gap */
241 /* The number of ES outputs is derived from the last output index of SI_UNIQUE_SLOT_* + 1, which
242 * can be 55 at most. The ESGS vertex stride in dwords is: NUM_ES_OUTPUTS * 4 + 1
243 * Only used by GFX9+ to compute LDS addresses of GS inputs.
244 */
245 #define GS_STATE_NUM_ES_OUTPUTS__SHIFT 13
246 #define GS_STATE_NUM_ES_OUTPUTS__MASK 0x3f
247 /* Small prim filter precision = num_samples / quant_mode, which can only be equal to 1/2^n
248 * where n is between 4 and 12. Knowing that, we only need to store 4 bits of the FP32 exponent.
249 * Set it like this: value = (fui(num_samples / quant_mode) >> 23) & 0xf;
250 * Expand to FP32 like this: ((0x70 | value) << 23);
251 * With 0x70 = 112, we get 2^(112 + value - 127) = 2^(value - 15), which is always a negative
252 * exponent and it's equal to 1/2^(15 - value).
253 */
254 #define GS_STATE_SMALL_PRIM_PRECISION_NO_AA__SHIFT 19
255 #define GS_STATE_SMALL_PRIM_PRECISION_NO_AA__MASK 0xf
256 #define GS_STATE_SMALL_PRIM_PRECISION__SHIFT 23
257 #define GS_STATE_SMALL_PRIM_PRECISION__MASK 0xf
258 #define GS_STATE_STREAMOUT_QUERY_ENABLED__SHIFT 27
259 #define GS_STATE_STREAMOUT_QUERY_ENABLED__MASK 0x1
260 #define GS_STATE_PROVOKING_VTX_FIRST__SHIFT 28
261 #define GS_STATE_PROVOKING_VTX_FIRST__MASK 0x1
262 #define GS_STATE_OUTPRIM__SHIFT 29
263 #define GS_STATE_OUTPRIM__MASK 0x3
264 #define GS_STATE_PIPELINE_STATS_EMU__SHIFT 31
265 #define GS_STATE_PIPELINE_STATS_EMU__MASK 0x1
266
267 #define ENCODE_FIELD(field, value) (((unsigned)(value) & field##__MASK) << field##__SHIFT)
268 #define CLEAR_FIELD(field) (~((unsigned)field##__MASK << field##__SHIFT))
269
270 /* This is called by functions that change states. */
271 #define SET_FIELD(var, field, value) do { \
272 assert((value) == ((unsigned)(value) & field##__MASK)); \
273 (var) &= CLEAR_FIELD(field); \
274 (var) |= ENCODE_FIELD(field, value); \
275 } while (0)
276
277 /* This is called during shader compilation and returns LLVMValueRef. */
278 #define GET_FIELD(ctx, field) si_unpack_param((ctx), (ctx)->args->vs_state_bits, field##__SHIFT, \
279 util_bitcount(field##__MASK))
280
281 enum
282 {
283 /* These represent the number of SGPRs the shader uses. */
284 SI_VS_BLIT_SGPRS_POS = 3,
285 SI_VS_BLIT_SGPRS_POS_COLOR = 7,
286 SI_VS_BLIT_SGPRS_POS_TEXCOORD = 9,
287
288 MAX_SI_VS_BLIT_SGPRS = 10, /* +1 for the attribute ring address */
289 };
290
291 #define SI_NGG_CULL_TRIANGLES (1 << 0) /* this implies W, view.xy, and small prim culling */
292 #define SI_NGG_CULL_BACK_FACE (1 << 1) /* back faces */
293 #define SI_NGG_CULL_FRONT_FACE (1 << 2) /* front faces */
294 #define SI_NGG_CULL_LINES (1 << 3) /* the primitive type is lines */
295 #define SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT (1 << 4) /* cull small lines according to the diamond exit rule */
296 #define SI_NGG_CULL_CLIP_PLANE_ENABLE(enable) (((enable) & 0xff) << 5)
297 #define SI_NGG_CULL_GET_CLIP_PLANE_ENABLE(x) (((x) >> 5) & 0xff)
298
299 struct si_shader_profile {
300 uint32_t blake3[BLAKE3_OUT_LEN32];
301 uint32_t options;
302 };
303
304 extern struct si_shader_profile si_shader_profiles[];
305 unsigned si_get_num_shader_profiles(void);
306
307 #define SI_PROFILE_WAVE32 (1 << 0)
308 #define SI_PROFILE_GFX10_WAVE64 (1 << 1)
309 /* bit gap */
310 #define SI_PROFILE_VS_NO_BINNING (1 << 3)
311 #define SI_PROFILE_GFX9_GFX10_PS_NO_BINNING (1 << 4)
312 #define SI_PROFILE_CLAMP_DIV_BY_ZERO (1 << 5)
313 #define SI_PROFILE_NO_OPT_UNIFORM_VARYINGS (1 << 6)
314
315 enum si_shader_dump_type {
316 SI_DUMP_SHADER_KEY,
317 SI_DUMP_INIT_NIR, /* initial input NIR when shaders are created (before lowering) */
318 SI_DUMP_NIR, /* final NIR after lowering when shader variants are created */
319 SI_DUMP_INIT_LLVM_IR, /* initial LLVM IR before optimizations */
320 SI_DUMP_LLVM_IR, /* final LLVM IR */
321 SI_DUMP_INIT_ACO_IR, /* initial ACO IR before optimizations */
322 SI_DUMP_ACO_IR, /* final ACO IR */
323 SI_DUMP_ASM, /* final asm shaders */
324 SI_DUMP_STATS, /* print statistics as shader-db */
325 SI_DUMP_ALWAYS,
326 };
327
328 enum {
329 SI_UNIQUE_SLOT_POS = 0,
330
331 /* Since some shader stages use the highest used IO index
332 * to determine the size to allocate for inputs/outputs
333 * (in LDS, tess and GS rings). VARn should be placed right
334 * after POSITION to make that size as small as possible.
335 */
336 SI_UNIQUE_SLOT_VAR0 = 1, /* 0..31 */
337
338 /* Put 16-bit GLES varyings after 32-bit varyings. They can use the same indices as
339 * legacy desktop GL varyings because they are mutually exclusive.
340 */
341 SI_UNIQUE_SLOT_VAR0_16BIT = 33, /* 0..15 */
342
343 /* Legacy GL-only varyings can alias GLES-only 16-bit varyings. */
344 SI_UNIQUE_SLOT_FOGC = 33,
345 SI_UNIQUE_SLOT_COL0,
346 SI_UNIQUE_SLOT_COL1,
347 SI_UNIQUE_SLOT_BFC0,
348 SI_UNIQUE_SLOT_BFC1,
349 SI_UNIQUE_SLOT_TEX0,
350 SI_UNIQUE_SLOT_TEX1,
351 SI_UNIQUE_SLOT_TEX2,
352 SI_UNIQUE_SLOT_TEX3,
353 SI_UNIQUE_SLOT_TEX4,
354 SI_UNIQUE_SLOT_TEX5,
355 SI_UNIQUE_SLOT_TEX6,
356 SI_UNIQUE_SLOT_TEX7,
357 SI_UNIQUE_SLOT_CLIP_VERTEX,
358
359 /* Varyings present in both GLES and desktop GL must start at 49 after 16-bit varyings. */
360 SI_UNIQUE_SLOT_CLIP_DIST0 = 49,
361 SI_UNIQUE_SLOT_CLIP_DIST1,
362 SI_UNIQUE_SLOT_PSIZ,
363 /* These can't be written by LS, HS, and ES. */
364 SI_UNIQUE_SLOT_LAYER,
365 SI_UNIQUE_SLOT_VIEWPORT,
366 SI_UNIQUE_SLOT_PRIMITIVE_ID,
367 };
368
369 /**
370 * For VS shader keys, describe any fixups required for vertex fetch.
371 *
372 * \ref log_size, \ref format, and the number of channels are interpreted as
373 * by \ref ac_build_opencoded_load_format.
374 *
375 * Note: all bits 0 (size = 1 byte, num channels = 1, format = float) is an
376 * impossible format and indicates that no fixup is needed (just use
377 * buffer_load_format_xyzw).
378 */
379 union si_vs_fix_fetch {
380 struct {
381 uint8_t log_size : 2; /* 1, 2, 4, 8 or bytes per channel */
382 uint8_t num_channels_m1 : 2; /* number of channels minus 1 */
383 uint8_t format : 3; /* AC_FETCH_FORMAT_xxx */
384 uint8_t reverse : 1; /* reverse XYZ channels */
385 } u;
386 uint8_t bits;
387 };
388
389 struct si_shader;
390
391 /* State of the context creating the shader object. */
392 struct si_compiler_ctx_state {
393 /* Should only be used by si_init_shader_selector_async and
394 * si_build_shader_variant if thread_index == -1 (non-threaded). */
395 struct ac_llvm_compiler *compiler;
396
397 /* Used if thread_index == -1 or if debug.async is true. */
398 struct util_debug_callback debug;
399
400 /* Used for creating the log string for gallium/ddebug. */
401 bool is_debug_context;
402 };
403
404 enum si_color_output_type {
405 SI_TYPE_ANY32,
406 SI_TYPE_FLOAT16,
407 SI_TYPE_INT16,
408 SI_TYPE_UINT16,
409 };
410
411 union si_input_info {
412 struct {
413 uint8_t semantic;
414 uint8_t interpolate;
415 uint8_t fp16_lo_hi_valid;
416 uint8_t usage_mask;
417 };
418 uint32_t _unused; /* this just forces 4-byte alignment */
419 };
420
421 struct si_shader_info {
422 shader_info base;
423
424 uint32_t options; /* bitmask of SI_PROFILE_* */
425
426 uint8_t num_inputs;
427 uint8_t num_outputs;
428 union si_input_info input[PIPE_MAX_SHADER_INPUTS];
429 uint8_t output_semantic[PIPE_MAX_SHADER_OUTPUTS];
430 uint8_t output_usagemask[PIPE_MAX_SHADER_OUTPUTS];
431 uint8_t output_readmask[PIPE_MAX_SHADER_OUTPUTS];
432 uint8_t output_streams[PIPE_MAX_SHADER_OUTPUTS];
433 uint8_t output_type[PIPE_MAX_SHADER_OUTPUTS]; /* enum nir_alu_type */
434
435 uint8_t num_vs_inputs;
436 uint8_t num_vbos_in_user_sgprs;
437 uint8_t num_stream_output_components[4];
438 uint16_t enabled_streamout_buffer_mask;
439
440 uint64_t inputs_read; /* "get_unique_index" bits */
441 uint64_t tcs_vgpr_only_inputs; /* TCS inputs that are only in VGPRs, not LDS. */
442
443 uint64_t outputs_written_before_tes_gs; /* "get_unique_index" bits */
444 uint64_t outputs_written_before_ps; /* "get_unique_index" bits */
445 uint32_t patch_outputs_written; /* "get_unique_index_patch" bits */
446
447 uint8_t clipdist_mask;
448 uint8_t culldist_mask;
449
450 uint16_t esgs_vertex_stride;
451 uint16_t gsvs_vertex_size;
452 uint8_t gs_input_verts_per_prim;
453 unsigned max_gsvs_emit_size;
454
455 /* Set 0xf or 0x0 (4 bits) per each written output.
456 * ANDed with spi_shader_col_format.
457 */
458 unsigned colors_written_4bit;
459
460 int constbuf0_num_slots;
461 uint num_memory_stores;
462 uint8_t color_attr_index[2];
463 uint8_t color_interpolate[2];
464 uint8_t color_interpolate_loc[2];
465 uint8_t colors_read; /**< which color components are read by the FS */
466 uint8_t colors_written;
467 uint16_t output_color_types; /**< Each bit pair is enum si_color_output_type */
468 bool color0_writes_all_cbufs; /**< gl_FragColor */
469 bool reads_samplemask; /**< does fragment shader read sample mask? */
470 bool reads_tess_factors; /**< If TES reads TESSINNER or TESSOUTER */
471 bool writes_z; /**< does fragment shader write Z value? */
472 bool writes_stencil; /**< does fragment shader write stencil value? */
473 bool writes_samplemask; /**< does fragment shader write sample mask? */
474 bool writes_edgeflag; /**< vertex shader outputs edgeflag */
475 bool uses_interp_color;
476 bool uses_persp_center_color;
477 bool uses_persp_centroid_color;
478 bool uses_persp_sample_color;
479 bool uses_persp_center;
480 bool uses_persp_centroid;
481 bool uses_persp_sample;
482 bool uses_linear_center;
483 bool uses_linear_centroid;
484 bool uses_linear_sample;
485 bool uses_interp_at_sample;
486 bool uses_instanceid;
487 bool uses_base_vertex;
488 bool uses_base_instance;
489 bool uses_drawid;
490 bool uses_primid;
491 bool uses_frontface;
492 bool uses_invocationid;
493 bool uses_thread_id[3];
494 bool uses_block_id[3];
495 bool uses_variable_block_size;
496 bool uses_grid_size;
497 bool uses_tg_size;
498 bool uses_atomic_ordered_add;
499 bool writes_position;
500 bool writes_psize;
501 bool writes_clipvertex;
502 bool writes_primid;
503 bool writes_viewport_index;
504 bool writes_layer;
505 bool uses_bindless_samplers;
506 bool uses_bindless_images;
507 bool uses_indirect_descriptor;
508 bool has_divergent_loop;
509 bool uses_sampleid;
510 bool uses_layer_id;
511 bool has_non_uniform_tex_access;
512
513 bool uses_vmem_sampler_or_bvh;
514 bool uses_vmem_load_other; /* all other VMEM loads and atomics with return */
515
516 /** Whether all codepaths write tess factors in all invocations. */
517 bool tessfactors_are_def_in_all_invocs;
518
519 /* A flag to check if vrs2x2 can be enabled to reduce number of
520 * fragment shader invocations if flat shading.
521 */
522 bool allow_flat_shading;
523
524 /* Optimization: if the texture bound to this texunit has been cleared to 1,
525 * then the draw can be skipped (see si_draw_vbo_skip_noop). Initially the
526 * value is 0xff (undetermined) and can be later changed to 0 (= false) or
527 * texunit + 1.
528 */
529 uint8_t writes_1_if_tex_is_1;
530
531 /* frag coord and sample pos per component read mask. */
532 uint8_t reads_frag_coord_mask;
533 uint8_t reads_sample_pos_mask;
534 };
535
536 /* A shader selector is a gallium CSO and contains shader variants and
537 * binaries for one NIR program. This can be shared by multiple contexts.
538 */
539 struct si_shader_selector {
540 struct util_live_shader base;
541 struct si_screen *screen;
542 struct util_queue_fence ready;
543 struct si_compiler_ctx_state compiler_ctx_state;
544 gl_shader_stage stage;
545
546 simple_mtx_t mutex;
547 union si_shader_key *keys;
548 unsigned variants_count;
549 unsigned variants_max_count;
550 struct si_shader **variants;
551
552 /* The compiled NIR shader without a prolog and/or epilog (not
553 * uploaded to a buffer object).
554 *
555 * [0] for wave32, [1] for wave64.
556 */
557 struct si_shader *main_shader_part[2];
558 struct si_shader *main_shader_part_ls[2]; /* as_ls is set in the key */
559 struct si_shader *main_shader_part_es; /* as_es && !as_ngg in the key */
560 struct si_shader *main_shader_part_ngg[2]; /* !as_es && as_ngg in the key */
561 struct si_shader *main_shader_part_ngg_es[2]; /* as_es && as_ngg in the key */
562
563 struct nir_shader *nir;
564 void *nir_binary;
565 unsigned nir_size;
566
567 struct si_shader_info info;
568
569 enum pipe_shader_type pipe_shader_type;
570 uint8_t const_and_shader_buf_descriptors_index;
571 uint8_t sampler_and_images_descriptors_index;
572 uint8_t cs_shaderbufs_sgpr_index;
573 uint8_t cs_num_shaderbufs_in_user_sgprs;
574 uint8_t cs_images_sgpr_index;
575 uint8_t cs_images_num_sgprs;
576 uint8_t cs_num_images_in_user_sgprs;
577 unsigned ngg_cull_vert_threshold; /* UINT32_MAX = disabled */
578 enum mesa_prim rast_prim;
579
580 /* GS parameters. */
581 bool tess_turns_off_ngg;
582
583 /* bitmasks of used descriptor slots */
584 uint64_t active_const_and_shader_buffers;
585 uint64_t active_samplers_and_images;
586 };
587
588 /* Valid shader configurations:
589 *
590 * API shaders VS | TCS | TES | GS |pass| PS
591 * are compiled as: | | | |thru|
592 * | | | | |
593 * Only VS & PS: VS | | | | | PS
594 * GFX6 - with GS: ES | | | GS | VS | PS
595 * - with tess: LS | HS | VS | | | PS
596 * - with both: LS | HS | ES | GS | VS | PS
597 * GFX9 - with GS: -> | | | GS | VS | PS
598 * - with tess: -> | HS | VS | | | PS
599 * - with both: -> | HS | -> | GS | VS | PS
600 * | | | | |
601 * NGG - VS & PS: GS | | | | | PS
602 * (GFX10+) - with GS: -> | | | GS | | PS
603 * - with tess: -> | HS | GS | | | PS
604 * - with both: -> | HS | -> | GS | | PS
605 *
606 * -> = merged with the next stage
607 */
608
609 /* Use the byte alignment for all following structure members for optimal
610 * shader key memory footprint.
611 */
612 #pragma pack(push, 1)
613
614 /* Common PS bits between the shader key and the prolog key. */
615 struct si_ps_prolog_bits {
616 unsigned color_two_side : 1;
617 unsigned flatshade_colors : 1;
618 unsigned poly_stipple : 1;
619 unsigned force_persp_sample_interp : 1;
620 unsigned force_linear_sample_interp : 1;
621 unsigned force_persp_center_interp : 1;
622 unsigned force_linear_center_interp : 1;
623 unsigned bc_optimize_for_persp : 1;
624 unsigned bc_optimize_for_linear : 1;
625 unsigned samplemask_log_ps_iter : 3;
626 };
627
628 /* Common PS bits between the shader key and the epilog key. */
629 struct si_ps_epilog_bits {
630 unsigned spi_shader_col_format;
631 unsigned color_is_int8 : 8;
632 unsigned color_is_int10 : 8;
633 unsigned last_cbuf : 3;
634 unsigned alpha_func : 3;
635 unsigned alpha_to_one : 1;
636 unsigned alpha_to_coverage_via_mrtz : 1; /* gfx11+ */
637 unsigned clamp_color : 1;
638 unsigned dual_src_blend_swizzle : 1; /* gfx11+ */
639 unsigned rbplus_depth_only_opt:1;
640 unsigned kill_samplemask:1;
641 };
642
643 union si_shader_part_key {
644 struct {
645 struct si_ps_prolog_bits states;
646 unsigned use_aco : 1;
647 unsigned wave32 : 1;
648 unsigned num_input_sgprs : 6;
649 /* Color interpolation and two-side color selection. */
650 unsigned colors_read : 8; /* color input components read */
651 unsigned num_interp_inputs : 5; /* BCOLOR is at this location */
652 unsigned num_fragcoord_components : 3;
653 unsigned wqm : 1;
654 char color_attr_index[2];
655 signed char color_interp_vgpr_index[2]; /* -1 == constant */
656 } ps_prolog;
657 struct {
658 struct si_ps_epilog_bits states;
659 unsigned use_aco : 1;
660 unsigned wave32 : 1;
661 unsigned uses_discard : 1;
662 unsigned colors_written : 8;
663 unsigned color_types : 16;
664 unsigned writes_z : 1;
665 unsigned writes_stencil : 1;
666 unsigned writes_samplemask : 1;
667 } ps_epilog;
668 };
669
670 /* The shader key for geometry stages (VS, TCS, TES, GS) */
671 struct si_shader_key_ge {
672 /* Prolog and epilog flags. */
673 union {
674 struct {
675 struct si_shader_selector *ls; /* for merged LS-HS */
676 } tcs; /* tessellation control shader */
677 struct {
678 struct si_shader_selector *es; /* for merged ES-GS */
679 } gs;
680 } part;
681
682 /* These three are initially set according to the NEXT_SHADER property,
683 * or guessed if the property doesn't seem correct.
684 */
685 unsigned as_es : 1; /* whether it's a shader before GS */
686 unsigned as_ls : 1; /* whether it's VS before TCS */
687 unsigned as_ngg : 1; /* whether it's the last GE stage and NGG is enabled,
688 also set for the stage right before GS */
689
690 /* Flags for monolithic compilation only. */
691 struct {
692 /* - If neither "is_one" nor "is_fetched" has a bit set, the instance
693 * divisor is 0.
694 * - If "is_one" has a bit set, the instance divisor is 1.
695 * - If "is_fetched" has a bit set, the instance divisor will be loaded
696 * from the constant buffer.
697 */
698 uint16_t instance_divisor_is_one; /* bitmask of inputs */
699 uint16_t instance_divisor_is_fetched; /* bitmask of inputs */
700
701 /* Whether fetch should be opencoded according to vs_fix_fetch.
702 * Otherwise, if vs_fix_fetch is non-zero, buffer_load_format_xyzw
703 * with minimal fixups is used. */
704 uint16_t vs_fetch_opencode;
705 union si_vs_fix_fetch vs_fix_fetch[SI_MAX_ATTRIBS];
706
707 union {
708 /* When PS needs PrimID and GS is disabled. */
709 unsigned vs_export_prim_id : 1; /* VS and TES only */
710 unsigned gs_tri_strip_adj_fix : 1; /* GS only */
711 } u;
712
713 /* Gfx12: When no streamout buffers are bound, streamout must be disabled. */
714 unsigned remove_streamout : 1;
715 } mono;
716
717 /* Optimization flags for asynchronous compilation only. */
718 struct {
719 /* For HW VS (it can be VS, TES, GS) */
720 uint64_t kill_outputs; /* "get_unique_index" bits */
721 unsigned kill_clip_distances : 8;
722 unsigned kill_pointsize : 1;
723 unsigned kill_layer : 1;
724 unsigned remove_streamout : 1;
725
726 /* For NGG VS and TES. */
727 unsigned ngg_culling : 13; /* SI_NGG_CULL_* */
728
729
730 /* For shaders where monolithic variants have better code.
731 *
732 * This is a flag that has no effect on code generation,
733 * but forces monolithic shaders to be used as soon as
734 * possible, because it's in the "opt" group.
735 */
736 unsigned prefer_mono : 1;
737
738 /* VS and TCS have the same number of patch vertices. */
739 unsigned same_patch_vertices:1;
740
741 /* For TCS. */
742 unsigned tes_prim_mode : 3;
743 unsigned tes_reads_tess_factors : 1;
744
745 unsigned inline_uniforms:1;
746
747 /* This must be kept last to limit the number of variants
748 * depending only on the uniform values.
749 */
750 uint32_t inlined_uniform_values[MAX_INLINABLE_UNIFORMS];
751 } opt;
752 };
753
754 struct si_shader_key_ps {
755 struct {
756 /* Prolog and epilog flags. */
757 struct si_ps_prolog_bits prolog;
758 struct si_ps_epilog_bits epilog;
759 } part;
760
761 /* Flags for monolithic compilation only. */
762 struct {
763 unsigned poly_line_smoothing : 1;
764 unsigned point_smoothing : 1;
765 unsigned interpolate_at_sample_force_center : 1;
766 unsigned fbfetch_msaa : 1;
767 unsigned fbfetch_is_1D : 1;
768 unsigned fbfetch_layered : 1;
769 } mono;
770
771 /* Optimization flags for asynchronous compilation only. */
772 struct {
773 /* For shaders where monolithic variants have better code.
774 *
775 * This is a flag that has no effect on code generation,
776 * but forces monolithic shaders to be used as soon as
777 * possible, because it's in the "opt" group.
778 */
779 unsigned prefer_mono : 1;
780 unsigned inline_uniforms:1;
781
782 /* This eliminates the FRONT_FACE input VGPR as well as shader code using it. */
783 int force_front_face_input : 2; /* 0 = gl_FrontFacing, 1 = true, -1 = false */
784
785 /* This must be kept last to limit the number of variants
786 * depending only on the uniform values.
787 */
788 uint32_t inlined_uniform_values[MAX_INLINABLE_UNIFORMS];
789 } opt;
790 };
791
792 union si_shader_key {
793 struct si_shader_key_ge ge; /* geometry engine shaders */
794 struct si_shader_key_ps ps;
795 };
796
797 /* Restore the pack alignment to default. */
798 #pragma pack(pop)
799
800 /* GCN-specific shader info. */
801 struct si_shader_binary_info {
802 uint8_t vs_output_param_offset[NUM_TOTAL_VARYING_SLOTS];
803 uint32_t vs_output_ps_input_cntl[NUM_TOTAL_VARYING_SLOTS];
804 union si_input_info ps_inputs[SI_NUM_INTERP];
805 uint8_t num_ps_inputs;
806 uint8_t ps_colors_read;
807 uint8_t num_input_sgprs;
808 uint8_t num_input_vgprs;
809 bool uses_vmem_load_other; /* all other VMEM loads and atomics with return */
810 bool uses_vmem_sampler_or_bvh;
811 uint8_t num_fragcoord_components;
812 bool uses_instanceid;
813 uint8_t nr_pos_exports;
814 uint8_t nr_param_exports;
815 unsigned private_mem_vgprs;
816 unsigned max_simd_waves;
817 };
818
819 enum si_shader_binary_type {
820 SI_SHADER_BINARY_ELF,
821 SI_SHADER_BINARY_RAW,
822 };
823
824 struct si_shader_binary {
825 enum si_shader_binary_type type;
826
827 /* Depends on binary type, either ELF or raw buffer. */
828 const char *code_buffer;
829 size_t code_size;
830 uint32_t exec_size;
831
832 char *uploaded_code;
833 size_t uploaded_code_size;
834
835 char *llvm_ir_string;
836
837 const char *disasm_string;
838 size_t disasm_size;
839
840 const unsigned *symbols;
841 unsigned num_symbols;
842 };
843
844 struct gfx9_gs_info {
845 unsigned es_verts_per_subgroup;
846 unsigned gs_prims_per_subgroup;
847 unsigned gs_inst_prims_in_subgroup;
848 unsigned max_prims_per_subgroup;
849 unsigned esgs_ring_size; /* in bytes */
850 };
851
852 struct si_shader {
853 struct si_pm4_state pm4; /* base class */
854 struct si_compiler_ctx_state compiler_ctx_state;
855
856 struct si_shader_selector *selector;
857 struct si_shader_selector *previous_stage_sel; /* for refcounting */
858 struct si_shader *next_shader; /* Only used during compilation of LS and ES when merged. */
859
860 struct si_shader_part *prolog;
861 struct si_shader *previous_stage; /* for GFX9 */
862 struct si_shader_part *epilog;
863 struct si_shader *gs_copy_shader;
864
865 struct si_resource *bo;
866 /* gpu_address should be bo->gpu_address except if SQTT is
867 * in use.
868 */
869 uint64_t gpu_address;
870 /* Only used on GFX6-10 where the scratch address must be inserted into the shader binary.
871 * This is the scratch address that the current shader binary contains.
872 */
873 uint64_t scratch_va;
874 union si_shader_key key;
875 struct util_queue_fence ready;
876 bool compilation_failed;
877 bool is_monolithic;
878 bool is_optimized;
879 bool is_binary_shared;
880 bool is_gs_copy_shader;
881 uint8_t wave_size;
882 unsigned complete_shader_binary_size;
883
884 /* The following data is all that's needed for binary shaders. */
885 struct si_shader_binary binary;
886 struct ac_shader_config config;
887 struct si_shader_binary_info info;
888
889 /* SI_SGPR_VS_STATE_BITS */
890 bool uses_vs_state_provoking_vertex;
891 bool uses_gs_state_outprim;
892
893 bool uses_base_instance;
894
895 /* Shader key + LLVM IR + disassembly + statistics.
896 * Generated for debug contexts only.
897 */
898 char *shader_log;
899 size_t shader_log_size;
900
901 struct gfx9_gs_info gs_info;
902
903 /* Precomputed register values. */
904 union {
905 struct {
906 unsigned vgt_gsvs_ring_offset_1;
907 unsigned vgt_gsvs_ring_offset_2;
908 unsigned vgt_gsvs_ring_offset_3;
909 unsigned vgt_gsvs_ring_itemsize;
910 unsigned vgt_gs_max_vert_out;
911 unsigned vgt_gs_vert_itemsize;
912 unsigned vgt_gs_vert_itemsize_1;
913 unsigned vgt_gs_vert_itemsize_2;
914 unsigned vgt_gs_vert_itemsize_3;
915 unsigned vgt_gs_instance_cnt;
916 unsigned vgt_gs_onchip_cntl;
917 unsigned vgt_gs_max_prims_per_subgroup;
918 unsigned vgt_esgs_ring_itemsize;
919 unsigned spi_shader_pgm_rsrc3_gs;
920 unsigned spi_shader_pgm_rsrc4_gs;
921 } gs;
922
923 struct {
924 /* Computed by gfx10_ngg_calculate_subgroup_info. */
925 uint16_t ngg_emit_size; /* in dwords */
926 uint16_t hw_max_esverts;
927 uint16_t max_gsprims;
928 uint16_t max_out_verts;
929 bool max_vert_out_per_gs_instance;
930 /* Register values. */
931 unsigned ge_max_output_per_subgroup;
932 unsigned ge_ngg_subgrp_cntl;
933 unsigned vgt_primitiveid_en;
934 unsigned vgt_gs_onchip_cntl;
935 unsigned vgt_gs_instance_cnt;
936 unsigned esgs_vertex_stride;
937 unsigned spi_vs_out_config;
938 unsigned spi_shader_pos_format;
939 unsigned pa_cl_vte_cntl;
940 unsigned vgt_gs_max_vert_out; /* for API GS */
941 unsigned ge_pc_alloc; /* uconfig register */
942 unsigned spi_shader_pgm_rsrc3_gs;
943 unsigned spi_shader_pgm_rsrc4_gs;
944 unsigned vgt_shader_stages_en;
945 } ngg;
946
947 struct {
948 unsigned vgt_gs_mode;
949 unsigned vgt_primitiveid_en;
950 unsigned vgt_reuse_off;
951 unsigned spi_vs_out_config;
952 unsigned spi_shader_pos_format;
953 unsigned pa_cl_vte_cntl;
954 unsigned ge_pc_alloc; /* uconfig register */
955 } vs;
956
957 struct {
958 unsigned spi_ps_input_ena;
959 unsigned spi_ps_input_addr;
960 unsigned spi_baryc_cntl;
961 unsigned spi_ps_in_control;
962 unsigned spi_shader_z_format;
963 unsigned spi_shader_col_format;
964 unsigned cb_shader_mask;
965 unsigned db_shader_control;
966 unsigned num_interp;
967 unsigned spi_gs_out_config_ps;
968 unsigned pa_sc_hisz_control;
969 bool writes_samplemask;
970 } ps;
971 };
972
973 /* Precomputed register values. */
974 unsigned vgt_tf_param; /* VGT_TF_PARAM */
975 unsigned vgt_vertex_reuse_block_cntl; /* VGT_VERTEX_REUSE_BLOCK_CNTL */
976 unsigned pa_cl_vs_out_cntl;
977 unsigned ge_cntl;
978 };
979
980 struct si_shader_part {
981 struct si_shader_part *next;
982 union si_shader_part_key key;
983 struct si_shader_binary binary;
984 struct ac_shader_config config;
985 };
986
987 /* si_shader.c */
988 struct ac_rtld_binary;
989
990 void si_update_shader_binary_info(struct si_shader *shader, struct nir_shader *nir);
991 bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
992 struct si_shader *shader, struct util_debug_callback *debug);
993 bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
994 struct si_shader *shader, struct util_debug_callback *debug);
995 void si_shader_destroy(struct si_shader *shader);
996 unsigned si_shader_io_get_unique_index(unsigned semantic);
997 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader,
998 uint64_t scratch_va);
999 int si_shader_binary_upload_at(struct si_screen *sscreen, struct si_shader *shader,
1000 uint64_t scratch_va, int64_t bo_offset);
1001 bool si_can_dump_shader(struct si_screen *sscreen, gl_shader_stage stage,
1002 enum si_shader_dump_type dump_type);
1003 void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
1004 struct util_debug_callback *debug, FILE *f, bool check_debug_option);
1005 void si_shader_dump_stats_for_shader_db(struct si_screen *screen, struct si_shader *shader,
1006 struct util_debug_callback *debug);
1007 void si_multiwave_lds_size_workaround(struct si_screen *sscreen, unsigned *lds_size);
1008 const char *si_get_shader_name(const struct si_shader *shader);
1009 void si_shader_binary_clean(struct si_shader_binary *binary);
1010 struct nir_shader *si_deserialize_shader(struct si_shader_selector *sel);
1011 unsigned si_get_ps_num_interp(struct si_shader *ps);
1012 bool si_shader_binary_open(struct si_screen *screen, struct si_shader *shader,
1013 struct ac_rtld_binary *rtld);
1014 bool si_get_external_symbol(enum amd_gfx_level gfx_level, void *data, const char *name,
1015 uint64_t *value);
1016 unsigned si_get_shader_prefetch_size(struct si_shader *shader);
1017 unsigned si_get_shader_binary_size(struct si_screen *screen, struct si_shader *shader);
1018
1019 /* si_shader_info.c */
1020 void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir,
1021 struct si_shader_info *info);
1022
1023 /* si_shader_nir.c */
1024 void si_lower_mediump_io(struct nir_shader *nir);
1025
1026 bool si_alu_to_scalar_packed_math_filter(const struct nir_instr *instr, const void *data);
1027 void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool first);
1028 void si_nir_late_opts(struct nir_shader *nir);
1029 char *si_finalize_nir(struct pipe_screen *screen, void *nirptr);
1030
1031 /* si_state_shaders.cpp */
1032 unsigned si_shader_num_alloc_param_exports(struct si_shader *shader);
1033 unsigned si_determine_wave_size(struct si_screen *sscreen, struct si_shader *shader);
1034 void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *gs,
1035 struct gfx9_gs_info *out);
1036 bool gfx10_is_ngg_passthrough(struct si_shader *shader);
1037 unsigned si_shader_lshs_vertex_stride(struct si_shader *ls);
1038 bool si_should_clear_lds(struct si_screen *sscreen, const struct nir_shader *shader);
1039
1040 /* Inline helpers. */
1041
1042 /* Return the pointer to the main shader part's pointer. */
si_get_main_shader_part(struct si_shader_selector * sel,const union si_shader_key * key,unsigned wave_size)1043 static inline struct si_shader **si_get_main_shader_part(struct si_shader_selector *sel,
1044 const union si_shader_key *key,
1045 unsigned wave_size)
1046 {
1047 assert(wave_size == 32 || wave_size == 64);
1048 unsigned index = wave_size / 32 - 1;
1049
1050 if (sel->stage <= MESA_SHADER_GEOMETRY) {
1051 if (key->ge.as_ls)
1052 return &sel->main_shader_part_ls[index];
1053 if (key->ge.as_es && key->ge.as_ngg)
1054 return &sel->main_shader_part_ngg_es[index];
1055 if (key->ge.as_es) {
1056 /* legacy GS only support wave 64 */
1057 assert(wave_size == 64);
1058 return &sel->main_shader_part_es;
1059 }
1060 if (key->ge.as_ngg)
1061 return &sel->main_shader_part_ngg[index];
1062 }
1063 return &sel->main_shader_part[index];
1064 }
1065
si_shader_uses_bindless_samplers(struct si_shader_selector * selector)1066 static inline bool si_shader_uses_bindless_samplers(struct si_shader_selector *selector)
1067 {
1068 return selector ? selector->info.uses_bindless_samplers : false;
1069 }
1070
si_shader_uses_bindless_images(struct si_shader_selector * selector)1071 static inline bool si_shader_uses_bindless_images(struct si_shader_selector *selector)
1072 {
1073 return selector ? selector->info.uses_bindless_images : false;
1074 }
1075
gfx10_edgeflags_have_effect(struct si_shader * shader)1076 static inline bool gfx10_edgeflags_have_effect(struct si_shader *shader)
1077 {
1078 if (shader->selector->stage == MESA_SHADER_VERTEX &&
1079 !shader->selector->info.base.vs.blit_sgprs_amd &&
1080 !(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES))
1081 return true;
1082
1083 return false;
1084 }
1085
gfx10_ngg_writes_user_edgeflags(struct si_shader * shader)1086 static inline bool gfx10_ngg_writes_user_edgeflags(struct si_shader *shader)
1087 {
1088 return gfx10_edgeflags_have_effect(shader) &&
1089 shader->selector->info.writes_edgeflag;
1090 }
1091
si_shader_uses_streamout(const struct si_shader * shader)1092 static inline bool si_shader_uses_streamout(const struct si_shader *shader)
1093 {
1094 return shader->selector->stage <= MESA_SHADER_GEOMETRY &&
1095 shader->selector->info.enabled_streamout_buffer_mask &&
1096 !shader->key.ge.opt.remove_streamout &&
1097 !shader->key.ge.mono.remove_streamout;
1098 }
1099
si_shader_uses_discard(struct si_shader * shader)1100 static inline bool si_shader_uses_discard(struct si_shader *shader)
1101 {
1102 /* Changes to this should also update ps_modifies_zs. */
1103 return shader->selector->info.base.fs.uses_discard ||
1104 shader->key.ps.part.prolog.poly_stipple ||
1105 shader->key.ps.mono.point_smoothing ||
1106 shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS;
1107 }
1108
1109 #ifdef __cplusplus
1110 }
1111 #endif
1112
1113 #endif
1114