1/* 2 * Copyright 2023 Alyssa Rosenzweig 3 * Copyright 2023 Valve Corporation 4 * SPDX-License-Identifier: MIT 5 */ 6 7#include "shaders/tessellator.h" 8#include "geometry.h" 9 10/* Compatible with util/u_math.h */ 11static inline uint 12util_logbase2_ceil(uint n) 13{ 14 if (n <= 1) 15 return 0; 16 else 17 return 32 - clz(n - 1); 18} 19 20/* Swap the two non-provoking vertices third vert in odd triangles. This 21 * generates a vertex ID list with a consistent winding order. 22 * 23 * With prim and flatshade_first, the map : [0, 1, 2] -> [0, 1, 2] is its own 24 * inverse. This lets us reuse it for both vertex fetch and transform feedback. 25 */ 26uint 27libagx_map_vertex_in_tri_strip(uint prim, uint vert, bool flatshade_first) 28{ 29 unsigned pv = flatshade_first ? 0 : 2; 30 31 bool even = (prim & 1) == 0; 32 bool provoking = vert == pv; 33 34 return (provoking || even) ? vert : ((3 - pv) - vert); 35} 36 37uint64_t 38libagx_xfb_vertex_address(global struct agx_geometry_params *p, uint base_index, 39 uint vert, uint buffer, uint stride, 40 uint output_offset) 41{ 42 uint index = base_index + vert; 43 uint xfb_offset = (index * stride) + output_offset; 44 45 return (uintptr_t)(p->xfb_base[buffer]) + xfb_offset; 46} 47 48uint 49libagx_vertex_id_for_line_loop(uint prim, uint vert, uint num_prims) 50{ 51 /* (0, 1), (1, 2), (2, 0) */ 52 if (prim == (num_prims - 1) && vert == 1) 53 return 0; 54 else 55 return prim + vert; 56} 57 58uint 59libagx_vertex_id_for_line_class(enum mesa_prim mode, uint prim, uint vert, 60 uint num_prims) 61{ 62 /* Line list, line strip, or line loop */ 63 if (mode == MESA_PRIM_LINE_LOOP && prim == (num_prims - 1) && vert == 1) 64 return 0; 65 66 if (mode == MESA_PRIM_LINES) 67 prim *= 2; 68 69 return prim + vert; 70} 71 72uint 73libagx_vertex_id_for_tri_fan(uint prim, uint vert, bool flatshade_first) 74{ 75 /* Vulkan spec section 20.1.7 gives (i + 1, i + 2, 0) for a provoking 76 * first. OpenGL instead wants (0, i + 1, i + 2) with a provoking last. 77 * Piglit clipflat expects us to switch between these orders depending on 78 * provoking vertex, to avoid trivializing the fan. 79 * 80 * Rotate accordingly. 81 */ 82 if (flatshade_first) { 83 vert = (vert == 2) ? 0 : (vert + 1); 84 } 85 86 /* The simpler form assuming last is provoking. */ 87 return (vert == 0) ? 0 : prim + vert; 88} 89 90uint 91libagx_vertex_id_for_tri_class(enum mesa_prim mode, uint prim, uint vert, 92 bool flatshade_first) 93{ 94 if (flatshade_first && mode == MESA_PRIM_TRIANGLE_FAN) { 95 vert = vert + 1; 96 vert = (vert == 3) ? 0 : vert; 97 } 98 99 if (mode == MESA_PRIM_TRIANGLE_FAN && vert == 0) 100 return 0; 101 102 if (mode == MESA_PRIM_TRIANGLES) 103 prim *= 3; 104 105 /* Triangle list, triangle strip, or triangle fan */ 106 if (mode == MESA_PRIM_TRIANGLE_STRIP) { 107 unsigned pv = flatshade_first ? 0 : 2; 108 109 bool even = (prim & 1) == 0; 110 bool provoking = vert == pv; 111 112 vert = ((provoking || even) ? vert : ((3 - pv) - vert)); 113 } 114 115 return prim + vert; 116} 117 118uint 119libagx_vertex_id_for_line_adj_class(enum mesa_prim mode, uint prim, uint vert) 120{ 121 /* Line list adj or line strip adj */ 122 if (mode == MESA_PRIM_LINES_ADJACENCY) 123 prim *= 4; 124 125 return prim + vert; 126} 127 128uint 129libagx_vertex_id_for_tri_strip_adj(uint prim, uint vert, uint num_prims, 130 bool flatshade_first) 131{ 132 /* See Vulkan spec section 20.1.11 "Triangle Strips With Adjancency". 133 * 134 * There are different cases for first/middle/last/only primitives and for 135 * odd/even primitives. Determine which case we're in. 136 */ 137 bool last = prim == (num_prims - 1); 138 bool first = prim == 0; 139 bool even = (prim & 1) == 0; 140 bool even_or_first = even || first; 141 142 /* When the last vertex is provoking, we rotate the primitives 143 * accordingly. This seems required for OpenGL. 144 */ 145 if (!flatshade_first && !even_or_first) { 146 vert = (vert + 4u) % 6u; 147 } 148 149 /* Offsets per the spec. The spec lists 6 cases with 6 offsets. Luckily, 150 * there are lots of patterns we can exploit, avoiding a full 6x6 LUT. 151 * 152 * Here we assume the first vertex is provoking, the Vulkan default. 153 */ 154 uint offsets[6] = { 155 0, 156 first ? 1 : (even ? -2 : 3), 157 even_or_first ? 2 : 4, 158 last ? 5 : 6, 159 even_or_first ? 4 : 2, 160 even_or_first ? 3 : -2, 161 }; 162 163 /* Ensure NIR can see thru the local array */ 164 uint offset = 0; 165 for (uint i = 1; i < 6; ++i) { 166 if (i == vert) 167 offset = offsets[i]; 168 } 169 170 /* Finally add to the base of the primitive */ 171 return (prim * 2) + offset; 172} 173 174uint 175libagx_vertex_id_for_tri_adj_class(enum mesa_prim mode, uint prim, uint vert, 176 uint nr, bool flatshade_first) 177{ 178 /* Tri adj list or tri adj strip */ 179 if (mode == MESA_PRIM_TRIANGLE_STRIP_ADJACENCY) { 180 return libagx_vertex_id_for_tri_strip_adj(prim, vert, nr, 181 flatshade_first); 182 } else { 183 return (6 * prim) + vert; 184 } 185} 186 187uint 188libagx_vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first, 189 uint prim, uint vert, uint num_prims) 190{ 191 switch (mode) { 192 case MESA_PRIM_POINTS: 193 case MESA_PRIM_LINES: 194 case MESA_PRIM_TRIANGLES: 195 case MESA_PRIM_LINES_ADJACENCY: 196 case MESA_PRIM_TRIANGLES_ADJACENCY: 197 /* Regular primitive: every N vertices defines a primitive */ 198 return (prim * mesa_vertices_per_prim(mode)) + vert; 199 200 case MESA_PRIM_LINE_LOOP: 201 return libagx_vertex_id_for_line_loop(prim, vert, num_prims); 202 203 case MESA_PRIM_LINE_STRIP: 204 case MESA_PRIM_LINE_STRIP_ADJACENCY: 205 /* (i, i + 1) or (i, ..., i + 3) */ 206 return prim + vert; 207 208 case MESA_PRIM_TRIANGLE_STRIP: { 209 /* Order depends on the provoking vert. 210 * 211 * First: (0, 1, 2), (1, 3, 2), (2, 3, 4). 212 * Last: (0, 1, 2), (2, 1, 3), (2, 3, 4). 213 * 214 * Pull the (maybe swapped) vert from the corresponding primitive 215 */ 216 return prim + libagx_map_vertex_in_tri_strip(prim, vert, flatshade_first); 217 } 218 219 case MESA_PRIM_TRIANGLE_FAN: 220 return libagx_vertex_id_for_tri_fan(prim, vert, flatshade_first); 221 222 case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY: 223 return libagx_vertex_id_for_tri_strip_adj(prim, vert, num_prims, 224 flatshade_first); 225 226 default: 227 return 0; 228 } 229} 230 231uint 232libagx_load_index_buffer_internal(uintptr_t index_buffer, 233 uint32_t index_buffer_range_el, uint id, 234 uint index_size) 235{ 236 bool oob = id >= index_buffer_range_el; 237 238 /* If the load would be out-of-bounds, load the first element which is 239 * assumed valid. If the application index buffer is empty with robustness2, 240 * index_buffer will point to a zero sink where only the first is valid. 241 */ 242 if (oob) { 243 id = 0; 244 } 245 246 uint el; 247 if (index_size == 1) { 248 el = ((constant uint8_t *)index_buffer)[id]; 249 } else if (index_size == 2) { 250 el = ((constant uint16_t *)index_buffer)[id]; 251 } else { 252 el = ((constant uint32_t *)index_buffer)[id]; 253 } 254 255 /* D3D robustness semantics. TODO: Optimize? */ 256 if (oob) { 257 el = 0; 258 } 259 260 return el; 261} 262 263uint 264libagx_load_index_buffer(constant struct agx_ia_state *p, uint id, 265 uint index_size) 266{ 267 return libagx_load_index_buffer_internal( 268 p->index_buffer, p->index_buffer_range_el, id, index_size); 269} 270 271/* 272 * Return the ID of the first thread in the workgroup where cond is true, or 273 * 1024 if cond is false across the workgroup. 274 */ 275static uint 276first_true_thread_in_workgroup(bool cond, local uint *scratch) 277{ 278 barrier(CLK_LOCAL_MEM_FENCE); 279 scratch[get_sub_group_id()] = ballot(cond); 280 barrier(CLK_LOCAL_MEM_FENCE); 281 282 uint first_group = ctz(ballot(scratch[get_sub_group_local_id()])); 283 uint off = ctz(first_group < 32 ? scratch[first_group] : 0); 284 return (first_group * 32) + off; 285} 286 287/* 288 * Allocate memory from the heap (thread-safe). Returns the offset into the 289 * heap. The allocation will be word-aligned. 290 */ 291static inline uint 292libagx_atomic_alloc(global struct agx_geometry_state *heap, uint size_B) 293{ 294 return atomic_fetch_add((volatile atomic_uint *)(&heap->heap_bottom), 295 align(size_B, 8)); 296} 297 298/* 299 * When unrolling the index buffer for a draw, we translate the old indirect 300 * draws to new indirect draws. This routine allocates the new index buffer and 301 * sets up most of the new draw descriptor. 302 */ 303static global void * 304setup_unroll_for_draw(global struct agx_restart_unroll_params *p, 305 constant uint *in_draw, uint draw, enum mesa_prim mode, 306 uint index_size_B) 307{ 308 /* Determine an upper bound on the memory required for the index buffer. 309 * Restarts only decrease the unrolled index buffer size, so the maximum size 310 * is the unrolled size when the input has no restarts. 311 */ 312 uint max_prims = u_decomposed_prims_for_vertices(mode, in_draw[0]); 313 uint max_verts = max_prims * mesa_vertices_per_prim(mode); 314 uint alloc_size = max_verts * index_size_B; 315 316 /* Allocate unrolled index buffer. Atomic since multiple threads may be 317 * running to handle multidraw in parallel. 318 */ 319 global struct agx_geometry_state *heap = p->heap; 320 uint old_heap_bottom_B = libagx_atomic_alloc(p->heap, alloc_size); 321 322 /* Regardless of the input stride, we use tightly packed output draws */ 323 global uint *out = &p->out_draws[5 * draw]; 324 325 /* Setup most of the descriptor. Count will be determined after unroll. */ 326 out[1] = in_draw[1]; /* instance count */ 327 out[2] = old_heap_bottom_B / index_size_B; /* index offset */ 328 out[3] = in_draw[3]; /* index bias */ 329 out[4] = in_draw[4]; /* base instance */ 330 331 /* Return the index buffer we allocated */ 332 return (global uchar *)heap->heap + old_heap_bottom_B; 333} 334 335#define UNROLL(INDEX, suffix) \ 336 kernel void libagx_unroll_restart_##suffix( \ 337 global struct agx_restart_unroll_params *p, enum mesa_prim mode, \ 338 uint draw, uint tid) \ 339 { \ 340 /* For an indirect multidraw, we are dispatched maxDraws times and \ 341 * terminate trailing invocations. \ 342 */ \ 343 if (p->count && draw >= *(p->count)) \ 344 return; \ 345 \ 346 constant uint *in_draw = \ 347 (constant uint *)(p->draws + (draw * p->draw_stride)); \ 348 \ 349 uint count = in_draw[0]; \ 350 \ 351 local uintptr_t out_ptr, in_ptr; \ 352 if (tid == 0) { \ 353 out_ptr = (uintptr_t)setup_unroll_for_draw(p, in_draw, draw, mode, \ 354 sizeof(INDEX)); \ 355 \ 356 /* Accessed thru local mem because NIR deref is too aggressive */ \ 357 in_ptr = (uintptr_t)(libagx_index_buffer( \ 358 p->index_buffer, p->index_buffer_size_el, in_draw[2], \ 359 sizeof(INDEX), p->zero_sink)); \ 360 } \ 361 \ 362 barrier(CLK_LOCAL_MEM_FENCE); \ 363 global INDEX *out = (global INDEX *)out_ptr; \ 364 \ 365 local uint scratch[32]; \ 366 \ 367 uint out_prims = 0; \ 368 INDEX restart_idx = p->restart_index; \ 369 bool flatshade_first = p->flatshade_first; \ 370 \ 371 uint needle = 0; \ 372 uint per_prim = mesa_vertices_per_prim(mode); \ 373 while (needle < count) { \ 374 /* Search for next restart or the end. Lanes load in parallel. */ \ 375 uint next_restart = needle; \ 376 for (;;) { \ 377 uint idx = next_restart + tid; \ 378 bool restart = \ 379 idx >= count || libagx_load_index_buffer_internal( \ 380 in_ptr, p->index_buffer_size_el, idx, \ 381 sizeof(INDEX)) == restart_idx; \ 382 \ 383 uint next_offs = first_true_thread_in_workgroup(restart, scratch); \ 384 \ 385 next_restart += next_offs; \ 386 if (next_offs < 1024) \ 387 break; \ 388 } \ 389 \ 390 /* Emit up to the next restart. Lanes output in parallel */ \ 391 uint subcount = next_restart - needle; \ 392 uint subprims = u_decomposed_prims_for_vertices(mode, subcount); \ 393 uint out_prims_base = out_prims; \ 394 for (uint i = tid; i < subprims; i += 1024) { \ 395 for (uint vtx = 0; vtx < per_prim; ++vtx) { \ 396 uint id = libagx_vertex_id_for_topology(mode, flatshade_first, \ 397 i, vtx, subprims); \ 398 uint offset = needle + id; \ 399 \ 400 out[((out_prims_base + i) * per_prim) + vtx] = \ 401 libagx_load_index_buffer_internal( \ 402 in_ptr, p->index_buffer_size_el, offset, sizeof(INDEX)); \ 403 } \ 404 } \ 405 \ 406 out_prims += subprims; \ 407 needle = next_restart + 1; \ 408 } \ 409 \ 410 if (tid == 0) \ 411 p->out_draws[(5 * draw) + 0] = out_prims * per_prim; \ 412 } 413 414UNROLL(uchar, u8) 415UNROLL(ushort, u16) 416UNROLL(uint, u32) 417 418uint 419libagx_setup_xfb_buffer(global struct agx_geometry_params *p, uint i) 420{ 421 global uint *off_ptr = p->xfb_offs_ptrs[i]; 422 if (!off_ptr) 423 return 0; 424 425 uint off = *off_ptr; 426 p->xfb_base[i] = p->xfb_base_original[i] + off; 427 return off; 428} 429 430/* 431 * Translate EndPrimitive for LINE_STRIP or TRIANGLE_STRIP output prims into 432 * writes into the 32-bit output index buffer. We write the sequence (b, b + 1, 433 * b + 2, ..., b + n - 1, -1), where b (base) is the first vertex in the prim, n 434 * (count) is the number of verts in the prims, and -1 is the prim restart index 435 * used to signal the end of the prim. 436 * 437 * For points, we write index buffers without restart, just as a sideband to 438 * pass data into the vertex shader. 439 */ 440void 441libagx_end_primitive(global int *index_buffer, uint total_verts, 442 uint verts_in_prim, uint total_prims, 443 uint invocation_vertex_base, uint invocation_prim_base, 444 uint geometry_base, bool restart) 445{ 446 /* Previous verts/prims are from previous invocations plus earlier 447 * prims in this invocation. For the intra-invocation counts, we 448 * subtract the count for this prim from the inclusive sum NIR gives us. 449 */ 450 uint previous_verts_in_invoc = (total_verts - verts_in_prim); 451 uint previous_verts = invocation_vertex_base + previous_verts_in_invoc; 452 uint previous_prims = restart ? invocation_prim_base + (total_prims - 1) : 0; 453 454 /* The indices are encoded as: (unrolled ID * output vertices) + vertex. */ 455 uint index_base = geometry_base + previous_verts_in_invoc; 456 457 /* Index buffer contains 1 index for each vertex and 1 for each prim */ 458 global int *out = &index_buffer[previous_verts + previous_prims]; 459 460 /* Write out indices for the strip */ 461 for (uint i = 0; i < verts_in_prim; ++i) { 462 out[i] = index_base + i; 463 } 464 465 if (restart) 466 out[verts_in_prim] = -1; 467} 468 469void 470libagx_build_gs_draw(global struct agx_geometry_params *p, uint vertices, 471 uint primitives) 472{ 473 global uint *descriptor = p->indirect_desc; 474 global struct agx_geometry_state *state = p->state; 475 476 /* Setup the indirect draw descriptor */ 477 uint indices = vertices + primitives; /* includes restart indices */ 478 479 /* Allocate the index buffer */ 480 uint index_buffer_offset_B = state->heap_bottom; 481 p->output_index_buffer = 482 (global uint *)(state->heap + index_buffer_offset_B); 483 state->heap_bottom += (indices * 4); 484 485 descriptor[0] = indices; /* count */ 486 descriptor[1] = 1; /* instance count */ 487 descriptor[2] = index_buffer_offset_B / 4; /* start */ 488 descriptor[3] = 0; /* index bias */ 489 descriptor[4] = 0; /* start instance */ 490 491 if (state->heap_bottom > state->heap_size) { 492 global uint *foo = (global uint *)(uintptr_t)0xdeadbeef; 493 *foo = 0x1234; 494 } 495} 496 497void 498libagx_gs_setup_indirect(global struct agx_gs_setup_indirect_params *gsi, 499 enum mesa_prim mode, uint local_id) 500{ 501 global struct agx_geometry_params *p = gsi->geom; 502 global struct agx_ia_state *ia = gsi->ia; 503 504 /* Determine the (primitives, instances) grid size. */ 505 uint vertex_count = gsi->draw[0]; 506 uint instance_count = gsi->draw[1]; 507 508 ia->verts_per_instance = vertex_count; 509 510 /* Calculate number of primitives input into the GS */ 511 uint prim_per_instance = u_decomposed_prims_for_vertices(mode, vertex_count); 512 p->input_primitives = prim_per_instance * instance_count; 513 514 /* Invoke VS as (vertices, instances); GS as (primitives, instances) */ 515 p->vs_grid[0] = vertex_count; 516 p->vs_grid[1] = instance_count; 517 518 p->gs_grid[0] = prim_per_instance; 519 p->gs_grid[1] = instance_count; 520 521 p->primitives_log2 = util_logbase2_ceil(prim_per_instance); 522 523 /* If indexing is enabled, the third word is the offset into the index buffer 524 * in elements. Apply that offset now that we have it. For a hardware 525 * indirect draw, the hardware would do this for us, but for software input 526 * assembly we need to do it ourselves. 527 */ 528 if (gsi->index_size_B) { 529 ia->index_buffer = 530 libagx_index_buffer(gsi->index_buffer, gsi->index_buffer_range_el, 531 gsi->draw[2], gsi->index_size_B, gsi->zero_sink); 532 533 ia->index_buffer_range_el = 534 libagx_index_buffer_range_el(gsi->index_buffer_range_el, gsi->draw[2]); 535 } 536 537 /* We need to allocate VS and GS count buffers, do so now */ 538 global struct agx_geometry_state *state = p->state; 539 540 uint vertex_buffer_size = 541 libagx_tcs_in_size(vertex_count * instance_count, gsi->vs_outputs); 542 543 p->count_buffer = (global uint *)(state->heap + state->heap_bottom); 544 state->heap_bottom += 545 align(p->input_primitives * p->count_buffer_stride, 16); 546 547 p->input_buffer = (uintptr_t)(state->heap + state->heap_bottom); 548 *(gsi->vertex_buffer) = p->input_buffer; 549 state->heap_bottom += align(vertex_buffer_size, 4); 550 551 p->input_mask = gsi->vs_outputs; 552 553 if (state->heap_bottom > state->heap_size) { 554 global uint *foo = (global uint *)(uintptr_t)0x1deadbeef; 555 *foo = 0x1234; 556 } 557} 558 559/* 560 * Returns (work_group_scan_inclusive_add(x), work_group_sum(x)). Implemented 561 * manually with subgroup ops and local memory since Mesa doesn't do those 562 * lowerings yet. 563 */ 564static uint2 565libagx_work_group_scan_inclusive_add(uint x, local uint *scratch) 566{ 567 uint sg_id = get_sub_group_id(); 568 569 /* Partial prefix sum of the subgroup */ 570 uint sg = sub_group_scan_inclusive_add(x); 571 572 /* Reduction (sum) for the subgroup */ 573 uint sg_sum = sub_group_broadcast(sg, 31); 574 575 /* Write out all the subgroups sums */ 576 barrier(CLK_LOCAL_MEM_FENCE); 577 scratch[sg_id] = sg_sum; 578 barrier(CLK_LOCAL_MEM_FENCE); 579 580 /* Read all the subgroup sums. Thread T in subgroup G reads the sum of all 581 * threads in subgroup T. 582 */ 583 uint other_sum = scratch[get_sub_group_local_id()]; 584 585 /* Exclusive sum the subgroup sums to get the total before the current group, 586 * which can be added to the total for the current group. 587 */ 588 uint other_sums = sub_group_scan_exclusive_add(other_sum); 589 uint base = sub_group_broadcast(other_sums, sg_id); 590 uint prefix = base + sg; 591 592 /* Reduce the workgroup using the prefix sum we already did */ 593 uint reduction = sub_group_broadcast(other_sums + other_sum, 31); 594 595 return (uint2)(prefix, reduction); 596} 597 598kernel void 599libagx_prefix_sum(global uint *buffer, uint len, uint words, uint word) 600{ 601 local uint scratch[32]; 602 uint tid = get_local_id(0); 603 604 /* Main loop: complete workgroups processing 1024 values at once */ 605 uint i, count = 0; 606 uint len_remainder = len % 1024; 607 uint len_rounded_down = len - len_remainder; 608 609 for (i = tid; i < len_rounded_down; i += 1024) { 610 global uint *ptr = &buffer[(i * words) + word]; 611 uint value = *ptr; 612 uint2 sums = libagx_work_group_scan_inclusive_add(value, scratch); 613 614 *ptr = count + sums[0]; 615 count += sums[1]; 616 } 617 618 /* The last iteration is special since we won't have a full subgroup unless 619 * the length is divisible by the subgroup size, and we don't advance count. 620 */ 621 global uint *ptr = &buffer[(i * words) + word]; 622 uint value = (tid < len_remainder) ? *ptr : 0; 623 uint scan = libagx_work_group_scan_inclusive_add(value, scratch)[0]; 624 625 if (tid < len_remainder) { 626 *ptr = count + scan; 627 } 628} 629 630kernel void 631libagx_prefix_sum_tess(global struct libagx_tess_args *p) 632{ 633 libagx_prefix_sum(p->counts, p->nr_patches, 1 /* words */, 0 /* word */); 634 635 /* After prefix summing, we know the total # of indices, so allocate the 636 * index buffer now. Elect a thread for the allocation. 637 */ 638 barrier(CLK_LOCAL_MEM_FENCE); 639 if (get_local_id(0) != 0) 640 return; 641 642 /* The last element of an inclusive prefix sum is the total sum */ 643 uint total = p->counts[p->nr_patches - 1]; 644 645 /* Allocate 4-byte indices */ 646 uint32_t elsize_B = sizeof(uint32_t); 647 uint32_t size_B = total * elsize_B; 648 uint alloc_B = p->heap->heap_bottom; 649 p->heap->heap_bottom += size_B; 650 p->heap->heap_bottom = align(p->heap->heap_bottom, 8); 651 652 p->index_buffer = (global uint32_t *)(((uintptr_t)p->heap->heap) + alloc_B); 653 654 /* ...and now we can generate the API indexed draw */ 655 global uint32_t *desc = p->out_draws; 656 657 desc[0] = total; /* count */ 658 desc[1] = 1; /* instance_count */ 659 desc[2] = alloc_B / elsize_B; /* start */ 660 desc[3] = 0; /* index_bias */ 661 desc[4] = 0; /* start_instance */ 662} 663 664uintptr_t 665libagx_vertex_output_address(uintptr_t buffer, uint64_t mask, uint vtx, 666 gl_varying_slot location) 667{ 668 return buffer + libagx_tcs_in_offs(vtx, location, mask); 669} 670 671uintptr_t 672libagx_geometry_input_address(constant struct agx_geometry_params *p, uint vtx, 673 gl_varying_slot location) 674{ 675 return libagx_vertex_output_address(p->input_buffer, p->input_mask, vtx, 676 location); 677} 678 679unsigned 680libagx_input_vertices(constant struct agx_ia_state *ia) 681{ 682 return ia->verts_per_instance; 683} 684