xref: /aosp_15_r20/external/mesa3d/src/gallium/frontends/lavapipe/lvp_nir_ray_tracing.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Google
3  * Copyright © 2023 Valve Corporation
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "lvp_nir_ray_tracing.h"
8 #include "lvp_acceleration_structure.h"
9 #include "lvp_private.h"
10 
11 #include "compiler/spirv/spirv.h"
12 
13 #include <float.h>
14 #include <math.h>
15 
16 nir_def *
lvp_mul_vec3_mat(nir_builder * b,nir_def * vec,nir_def * matrix[],bool translation)17 lvp_mul_vec3_mat(nir_builder *b, nir_def *vec, nir_def *matrix[], bool translation)
18 {
19    nir_def *result_components[3] = {
20       nir_channel(b, matrix[0], 3),
21       nir_channel(b, matrix[1], 3),
22       nir_channel(b, matrix[2], 3),
23    };
24    for (unsigned i = 0; i < 3; ++i) {
25       for (unsigned j = 0; j < 3; ++j) {
26          nir_def *v =
27             nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[i], 1 << j));
28          result_components[i] = (translation || j) ? nir_fadd(b, result_components[i], v) : v;
29       }
30    }
31    return nir_vec(b, result_components, 3);
32 }
33 
34 void
lvp_load_wto_matrix(nir_builder * b,nir_def * instance_addr,nir_def ** out)35 lvp_load_wto_matrix(nir_builder *b, nir_def *instance_addr, nir_def **out)
36 {
37    unsigned offset = offsetof(struct lvp_bvh_instance_node, wto_matrix);
38    for (unsigned i = 0; i < 3; ++i) {
39       out[i] = nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_addr, offset + i * 16));
40    }
41 }
42 
43 nir_def *
lvp_load_vertex_position(nir_builder * b,nir_def * instance_addr,nir_def * primitive_id,uint32_t index)44 lvp_load_vertex_position(nir_builder *b, nir_def *instance_addr, nir_def *primitive_id,
45                          uint32_t index)
46 {
47    nir_def *bvh_addr = nir_build_load_global(
48       b, 1, 64, nir_iadd_imm(b, instance_addr, offsetof(struct lvp_bvh_instance_node, bvh_ptr)));
49 
50    nir_def *leaf_nodes_offset = nir_build_load_global(
51       b, 1, 32, nir_iadd_imm(b, bvh_addr, offsetof(struct lvp_bvh_header, leaf_nodes_offset)));
52 
53    nir_def *offset = nir_imul_imm(b, primitive_id, sizeof(struct lvp_bvh_triangle_node));
54    offset = nir_iadd(b, offset, leaf_nodes_offset);
55    offset = nir_iadd_imm(b, offset, index * 3 * sizeof(float));
56 
57    return nir_build_load_global(b, 3, 32, nir_iadd(b, bvh_addr, nir_u2u64(b, offset)));
58 }
59 
60 static nir_def *
lvp_build_intersect_ray_box(nir_builder * b,nir_def * node_addr,nir_def * ray_tmax,nir_def * origin,nir_def * dir,nir_def * inv_dir)61 lvp_build_intersect_ray_box(nir_builder *b, nir_def *node_addr, nir_def *ray_tmax,
62                             nir_def *origin, nir_def *dir, nir_def *inv_dir)
63 {
64    const struct glsl_type *vec2_type = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
65    const struct glsl_type *uvec2_type = glsl_vector_type(GLSL_TYPE_UINT, 2);
66 
67    nir_variable *distances =
68       nir_variable_create(b->shader, nir_var_shader_temp, vec2_type, "distances");
69    nir_store_var(b, distances, nir_imm_vec2(b, INFINITY, INFINITY), 0xf);
70 
71    nir_variable *child_indices =
72       nir_variable_create(b->shader, nir_var_shader_temp, uvec2_type, "child_indices");
73    nir_store_var(b, child_indices, nir_imm_ivec2(b, 0xffffffffu, 0xffffffffu), 0xf);
74 
75    inv_dir = nir_bcsel(b, nir_feq_imm(b, dir, 0), nir_imm_float(b, FLT_MAX), inv_dir);
76 
77    for (int i = 0; i < 2; i++) {
78       const uint32_t child_offset = offsetof(struct lvp_bvh_box_node, children[i]);
79       const uint32_t coord_offsets[2] = {
80          offsetof(struct lvp_bvh_box_node, bounds[i].min.x),
81          offsetof(struct lvp_bvh_box_node, bounds[i].max.x),
82       };
83 
84       nir_def *child_index =
85          nir_build_load_global(b, 1, 32, nir_iadd_imm(b, node_addr, child_offset));
86 
87       nir_def *node_coords[2] = {
88          nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0])),
89          nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1])),
90       };
91 
92       /* If x of the aabb min is NaN, then this is an inactive aabb.
93        * We don't need to care about any other components being NaN as that is UB.
94        * https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap36.html#VkAabbPositionsKHR
95        */
96       nir_def *min_x = nir_channel(b, node_coords[0], 0);
97       nir_def *min_x_is_not_nan =
98          nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */
99 
100       nir_def *bound0 = nir_fmul(b, nir_fsub(b, node_coords[0], origin), inv_dir);
101       nir_def *bound1 = nir_fmul(b, nir_fsub(b, node_coords[1], origin), inv_dir);
102 
103       nir_def *tmin =
104          nir_fmax(b,
105                   nir_fmax(b, nir_fmin(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
106                            nir_fmin(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
107                   nir_fmin(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
108 
109       nir_def *tmax =
110          nir_fmin(b,
111                   nir_fmin(b, nir_fmax(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
112                            nir_fmax(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
113                   nir_fmax(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
114 
115       nir_push_if(b,
116                   nir_iand(b, min_x_is_not_nan,
117                            nir_iand(b, nir_fge(b, tmax, nir_fmax(b, nir_imm_float(b, 0.0f), tmin)),
118                                     nir_flt(b, tmin, ray_tmax))));
119       {
120          nir_def *new_child_indices[2] = {child_index, child_index};
121          nir_store_var(b, child_indices, nir_vec(b, new_child_indices, 2), 1u << i);
122 
123          nir_def *new_distances[2] = {tmin, tmin};
124          nir_store_var(b, distances, nir_vec(b, new_distances, 2), 1u << i);
125       }
126       nir_pop_if(b, NULL);
127    }
128 
129    nir_def *ssa_distances = nir_load_var(b, distances);
130    nir_def *ssa_indices = nir_load_var(b, child_indices);
131    nir_push_if(b, nir_flt(b, nir_channel(b, ssa_distances, 1), nir_channel(b, ssa_distances, 0)));
132    {
133       nir_store_var(b, child_indices,
134                     nir_vec2(b, nir_channel(b, ssa_indices, 1), nir_channel(b, ssa_indices, 0)),
135                     0b11);
136    }
137    nir_pop_if(b, NULL);
138 
139    return nir_load_var(b, child_indices);
140 }
141 
142 static nir_def *
lvp_build_intersect_ray_tri(nir_builder * b,nir_def * node_addr,nir_def * ray_tmax,nir_def * origin,nir_def * dir,nir_def * inv_dir)143 lvp_build_intersect_ray_tri(nir_builder *b, nir_def *node_addr, nir_def *ray_tmax,
144                             nir_def *origin, nir_def *dir, nir_def *inv_dir)
145 {
146    const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
147 
148    const uint32_t coord_offsets[3] = {
149       offsetof(struct lvp_bvh_triangle_node, coords[0]),
150       offsetof(struct lvp_bvh_triangle_node, coords[1]),
151       offsetof(struct lvp_bvh_triangle_node, coords[2]),
152    };
153 
154    nir_def *node_coords[3] = {
155       nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0])),
156       nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1])),
157       nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[2])),
158    };
159 
160    nir_variable *result = nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "result");
161    nir_store_var(b, result, nir_imm_vec4(b, INFINITY, 1.0f, 0.0f, 0.0f), 0xf);
162 
163    /* Based on watertight Ray/Triangle intersection from
164     * http://jcgt.org/published/0002/01/05/paper.pdf */
165 
166    /* Calculate the dimension where the ray direction is largest */
167    nir_def *abs_dir = nir_fabs(b, dir);
168 
169    nir_def *abs_dirs[3] = {
170       nir_channel(b, abs_dir, 0),
171       nir_channel(b, abs_dir, 1),
172       nir_channel(b, abs_dir, 2),
173    };
174    /* Find index of greatest value of abs_dir and put that as kz. */
175    nir_def *kz = nir_bcsel(
176       b, nir_fge(b, abs_dirs[0], abs_dirs[1]),
177       nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[2]), nir_imm_int(b, 0), nir_imm_int(b, 2)),
178       nir_bcsel(b, nir_fge(b, abs_dirs[1], abs_dirs[2]), nir_imm_int(b, 1), nir_imm_int(b, 2)));
179    nir_def *kx = nir_imod_imm(b, nir_iadd_imm(b, kz, 1), 3);
180    nir_def *ky = nir_imod_imm(b, nir_iadd_imm(b, kx, 1), 3);
181    nir_def *k_indices[3] = {kx, ky, kz};
182    nir_def *k = nir_vec(b, k_indices, 3);
183 
184    /* Swap kx and ky dimensions to preserve winding order */
185    unsigned swap_xy_swizzle[4] = {1, 0, 2, 3};
186    k = nir_bcsel(b, nir_flt_imm(b, nir_vector_extract(b, dir, kz), 0.0f),
187                  nir_swizzle(b, k, swap_xy_swizzle, 3), k);
188 
189    kx = nir_channel(b, k, 0);
190    ky = nir_channel(b, k, 1);
191    kz = nir_channel(b, k, 2);
192 
193    /* Calculate shear constants */
194    nir_def *sz = nir_frcp(b, nir_vector_extract(b, dir, kz));
195    nir_def *sx = nir_fmul(b, nir_vector_extract(b, dir, kx), sz);
196    nir_def *sy = nir_fmul(b, nir_vector_extract(b, dir, ky), sz);
197 
198    /* Calculate vertices relative to ray origin */
199    nir_def *v_a = nir_fsub(b, node_coords[0], origin);
200    nir_def *v_b = nir_fsub(b, node_coords[1], origin);
201    nir_def *v_c = nir_fsub(b, node_coords[2], origin);
202 
203    /* Perform shear and scale */
204    nir_def *ax =
205       nir_fsub(b, nir_vector_extract(b, v_a, kx), nir_fmul(b, sx, nir_vector_extract(b, v_a, kz)));
206    nir_def *ay =
207       nir_fsub(b, nir_vector_extract(b, v_a, ky), nir_fmul(b, sy, nir_vector_extract(b, v_a, kz)));
208    nir_def *bx =
209       nir_fsub(b, nir_vector_extract(b, v_b, kx), nir_fmul(b, sx, nir_vector_extract(b, v_b, kz)));
210    nir_def *by =
211       nir_fsub(b, nir_vector_extract(b, v_b, ky), nir_fmul(b, sy, nir_vector_extract(b, v_b, kz)));
212    nir_def *cx =
213       nir_fsub(b, nir_vector_extract(b, v_c, kx), nir_fmul(b, sx, nir_vector_extract(b, v_c, kz)));
214    nir_def *cy =
215       nir_fsub(b, nir_vector_extract(b, v_c, ky), nir_fmul(b, sy, nir_vector_extract(b, v_c, kz)));
216 
217    ax = nir_f2f64(b, ax);
218    ay = nir_f2f64(b, ay);
219    bx = nir_f2f64(b, bx);
220    by = nir_f2f64(b, by);
221    cx = nir_f2f64(b, cx);
222    cy = nir_f2f64(b, cy);
223 
224    nir_def *u = nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx));
225    nir_def *v = nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx));
226    nir_def *w = nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax));
227 
228    /* Perform edge tests. */
229    nir_def *cond_back = nir_ior(b, nir_ior(b, nir_flt_imm(b, u, 0.0f), nir_flt_imm(b, v, 0.0f)),
230                                     nir_flt_imm(b, w, 0.0f));
231 
232    nir_def *cond_front = nir_ior(
233       b, nir_ior(b, nir_fgt_imm(b, u, 0.0f), nir_fgt_imm(b, v, 0.0f)), nir_fgt_imm(b, w, 0.0f));
234 
235    nir_def *cond = nir_inot(b, nir_iand(b, cond_back, cond_front));
236 
237    nir_push_if(b, cond);
238    {
239       nir_def *det = nir_fadd(b, u, nir_fadd(b, v, w));
240 
241       sz = nir_f2f64(b, sz);
242 
243       v_a = nir_f2f64(b, v_a);
244       v_b = nir_f2f64(b, v_b);
245       v_c = nir_f2f64(b, v_c);
246 
247       nir_def *az = nir_fmul(b, sz, nir_vector_extract(b, v_a, kz));
248       nir_def *bz = nir_fmul(b, sz, nir_vector_extract(b, v_b, kz));
249       nir_def *cz = nir_fmul(b, sz, nir_vector_extract(b, v_c, kz));
250 
251       nir_def *t =
252          nir_fadd(b, nir_fadd(b, nir_fmul(b, u, az), nir_fmul(b, v, bz)), nir_fmul(b, w, cz));
253 
254       nir_def *t_signed = nir_fmul(b, nir_fsign(b, det), t);
255 
256       nir_def *det_cond_front = nir_inot(b, nir_flt_imm(b, t_signed, 0.0f));
257 
258       nir_push_if(b, det_cond_front);
259       {
260          t = nir_f2f32(b, nir_fdiv(b, t, det));
261          det = nir_f2f32(b, det);
262          v = nir_fdiv(b, nir_f2f32(b, v), det);
263          w = nir_fdiv(b, nir_f2f32(b, w), det);
264 
265          nir_def *indices[4] = {t, det, v, w};
266          nir_store_var(b, result, nir_vec(b, indices, 4), 0xf);
267       }
268       nir_pop_if(b, NULL);
269    }
270    nir_pop_if(b, NULL);
271 
272    return nir_load_var(b, result);
273 }
274 
275 static nir_def *
lvp_build_hit_is_opaque(nir_builder * b,nir_def * sbt_offset_and_flags,const struct lvp_ray_flags * ray_flags,nir_def * geometry_id_and_flags)276 lvp_build_hit_is_opaque(nir_builder *b, nir_def *sbt_offset_and_flags,
277                         const struct lvp_ray_flags *ray_flags, nir_def *geometry_id_and_flags)
278 {
279    nir_def *opaque = nir_uge_imm(b, nir_ior(b, geometry_id_and_flags, sbt_offset_and_flags),
280                                      LVP_INSTANCE_FORCE_OPAQUE | LVP_INSTANCE_NO_FORCE_NOT_OPAQUE);
281    opaque = nir_bcsel(b, ray_flags->force_opaque, nir_imm_true(b), opaque);
282    opaque = nir_bcsel(b, ray_flags->force_not_opaque, nir_imm_false(b), opaque);
283    return opaque;
284 }
285 
286 static void
lvp_build_triangle_case(nir_builder * b,const struct lvp_ray_traversal_args * args,const struct lvp_ray_flags * ray_flags,nir_def * result,nir_def * node_addr)287 lvp_build_triangle_case(nir_builder *b, const struct lvp_ray_traversal_args *args,
288                         const struct lvp_ray_flags *ray_flags, nir_def *result,
289                         nir_def *node_addr)
290 {
291    if (!args->triangle_cb)
292       return;
293 
294    struct lvp_triangle_intersection intersection;
295    intersection.t = nir_channel(b, result, 0);
296    intersection.barycentrics = nir_channels(b, result, 0xc);
297 
298    nir_push_if(b, nir_flt(b, intersection.t, nir_load_deref(b, args->vars.tmax)));
299    {
300       intersection.frontface = nir_fgt_imm(b, nir_channel(b, result, 1), 0);
301       nir_def *switch_ccw = nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags),
302                                               LVP_INSTANCE_TRIANGLE_FLIP_FACING);
303       intersection.frontface = nir_ixor(b, intersection.frontface, switch_ccw);
304 
305       nir_def *not_cull = ray_flags->no_skip_triangles;
306       nir_def *not_facing_cull =
307          nir_bcsel(b, intersection.frontface, ray_flags->no_cull_front, ray_flags->no_cull_back);
308 
309       not_cull =
310          nir_iand(b, not_cull,
311                   nir_ior(b, not_facing_cull,
312                           nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags),
313                                         LVP_INSTANCE_TRIANGLE_FACING_CULL_DISABLE)));
314 
315       nir_push_if(b, nir_iand(b, nir_flt(b, args->tmin, intersection.t), not_cull));
316       {
317          intersection.base.node_addr = node_addr;
318          nir_def *triangle_info = nir_build_load_global(
319             b, 2, 32,
320             nir_iadd_imm(b, intersection.base.node_addr,
321                          offsetof(struct lvp_bvh_triangle_node, primitive_id)));
322          intersection.base.primitive_id = nir_channel(b, triangle_info, 0);
323          intersection.base.geometry_id_and_flags = nir_channel(b, triangle_info, 1);
324          intersection.base.opaque =
325             lvp_build_hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), ray_flags,
326                                     intersection.base.geometry_id_and_flags);
327 
328          not_cull = nir_bcsel(b, intersection.base.opaque, ray_flags->no_cull_opaque,
329                               ray_flags->no_cull_no_opaque);
330          nir_push_if(b, not_cull);
331          {
332             args->triangle_cb(b, &intersection, args, ray_flags);
333          }
334          nir_pop_if(b, NULL);
335       }
336       nir_pop_if(b, NULL);
337    }
338    nir_pop_if(b, NULL);
339 }
340 
341 static void
lvp_build_aabb_case(nir_builder * b,const struct lvp_ray_traversal_args * args,const struct lvp_ray_flags * ray_flags,nir_def * node_addr)342 lvp_build_aabb_case(nir_builder *b, const struct lvp_ray_traversal_args *args,
343                            const struct lvp_ray_flags *ray_flags, nir_def *node_addr)
344 {
345    if (!args->aabb_cb)
346       return;
347 
348    struct lvp_leaf_intersection intersection;
349    intersection.node_addr = node_addr;
350    nir_def *triangle_info = nir_build_load_global(
351       b, 2, 32,
352       nir_iadd_imm(b, intersection.node_addr, offsetof(struct lvp_bvh_aabb_node, primitive_id)));
353    intersection.primitive_id = nir_channel(b, triangle_info, 0);
354    intersection.geometry_id_and_flags = nir_channel(b, triangle_info, 1);
355    intersection.opaque = lvp_build_hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags),
356                                                  ray_flags, intersection.geometry_id_and_flags);
357 
358    nir_def *not_cull =
359       nir_bcsel(b, intersection.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque);
360    not_cull = nir_iand(b, not_cull, ray_flags->no_skip_aabbs);
361    nir_push_if(b, not_cull);
362    {
363       args->aabb_cb(b, &intersection, args, ray_flags);
364    }
365    nir_pop_if(b, NULL);
366 }
367 
368 static void
lvp_build_push_stack(nir_builder * b,const struct lvp_ray_traversal_args * args,nir_def * node)369 lvp_build_push_stack(nir_builder *b, const struct lvp_ray_traversal_args *args, nir_def *node)
370 {
371    nir_def *stack_ptr = nir_load_deref(b, args->vars.stack_ptr);
372    nir_store_deref(b, nir_build_deref_array(b, args->vars.stack, stack_ptr), node, 0x1);
373    nir_store_deref(b, args->vars.stack_ptr, nir_iadd_imm(b, nir_load_deref(b, args->vars.stack_ptr), 1), 0x1);
374 }
375 
376 static nir_def *
lvp_build_pop_stack(nir_builder * b,const struct lvp_ray_traversal_args * args)377 lvp_build_pop_stack(nir_builder *b, const struct lvp_ray_traversal_args *args)
378 {
379    nir_def *stack_ptr = nir_iadd_imm(b, nir_load_deref(b, args->vars.stack_ptr), -1);
380    nir_store_deref(b, args->vars.stack_ptr, stack_ptr, 0x1);
381    return nir_load_deref(b, nir_build_deref_array(b, args->vars.stack, stack_ptr));
382 }
383 
384 nir_def *
lvp_build_ray_traversal(nir_builder * b,const struct lvp_ray_traversal_args * args)385 lvp_build_ray_traversal(nir_builder *b, const struct lvp_ray_traversal_args *args)
386 {
387    nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
388    nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
389 
390    nir_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
391 
392    struct lvp_ray_flags ray_flags = {
393       .force_opaque = nir_test_mask(b, args->flags, SpvRayFlagsOpaqueKHRMask),
394       .force_not_opaque = nir_test_mask(b, args->flags, SpvRayFlagsNoOpaqueKHRMask),
395       .terminate_on_first_hit =
396          nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask),
397       .no_cull_front = nir_ieq_imm(
398          b, nir_iand_imm(b, args->flags, SpvRayFlagsCullFrontFacingTrianglesKHRMask), 0),
399       .no_cull_back =
400          nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullBackFacingTrianglesKHRMask), 0),
401       .no_cull_opaque =
402          nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullOpaqueKHRMask), 0),
403       .no_cull_no_opaque =
404          nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullNoOpaqueKHRMask), 0),
405       .no_skip_triangles =
406          nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask), 0),
407       .no_skip_aabbs = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipAABBsKHRMask), 0),
408    };
409 
410    nir_push_loop(b);
411    {
412       nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), LVP_BVH_INVALID_NODE));
413       {
414          nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.stack_ptr), 0));
415          {
416             nir_store_var(b, incomplete, nir_imm_false(b), 0x1);
417             nir_jump(b, nir_jump_break);
418          }
419          nir_pop_if(b, NULL);
420 
421          nir_push_if(b, nir_ige(b, nir_load_deref(b, args->vars.stack_base), nir_load_deref(b, args->vars.stack_ptr)));
422          {
423             nir_store_deref(b, args->vars.stack_base, nir_imm_int(b, -1), 1);
424 
425             nir_store_deref(b, args->vars.bvh_base, args->root_bvh_base, 1);
426             nir_store_deref(b, args->vars.origin, args->origin, 7);
427             nir_store_deref(b, args->vars.dir, args->dir, 7);
428             nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, args->dir), 7);
429          }
430          nir_pop_if(b, NULL);
431 
432          nir_store_deref(b, args->vars.current_node, lvp_build_pop_stack(b, args), 0x1);
433       }
434       nir_pop_if(b, NULL);
435 
436       nir_def *bvh_node = nir_load_deref(b, args->vars.current_node);
437       nir_store_deref(b, args->vars.current_node, nir_imm_int(b, LVP_BVH_INVALID_NODE), 0x1);
438 
439       nir_def *node_addr = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, nir_iand_imm(b, bvh_node, ~3u)));
440 
441       nir_def *node_type = nir_iand_imm(b, bvh_node, 3);
442       nir_push_if(b, nir_uge_imm(b, node_type, lvp_bvh_node_internal));
443       {
444          nir_push_if(b, nir_uge_imm(b, node_type, lvp_bvh_node_instance));
445          {
446             nir_push_if(b, nir_ieq_imm(b, node_type, lvp_bvh_node_aabb));
447             {
448                lvp_build_aabb_case(b, args, &ray_flags, node_addr);
449             }
450             nir_push_else(b, NULL);
451             {
452                /* instance */
453                nir_store_deref(b, args->vars.instance_addr, node_addr, 1);
454 
455                nir_def *instance_data = nir_build_load_global(
456                   b, 4, 32,
457                   nir_iadd_imm(b, node_addr, offsetof(struct lvp_bvh_instance_node, bvh_ptr)));
458 
459                nir_def *wto_matrix[3];
460                lvp_load_wto_matrix(b, node_addr, wto_matrix);
461 
462                nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3),
463                                1);
464 
465                nir_def *instance_and_mask = nir_channel(b, instance_data, 2);
466                nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask),
467                                       nir_imm_int(b, 1 << 24)));
468                {
469                   nir_jump(b, nir_jump_continue);
470                }
471                nir_pop_if(b, NULL);
472 
473                nir_store_deref(b, args->vars.bvh_base,
474                                nir_pack_64_2x32(b, nir_trim_vector(b, instance_data, 2)), 1);
475 
476                nir_store_deref(b, args->vars.stack_base, nir_load_deref(b, args->vars.stack_ptr), 0x1);
477 
478                /* Push the instance root node onto the stack */
479                nir_store_deref(b, args->vars.current_node, nir_imm_int(b, LVP_BVH_ROOT_NODE), 0x1);
480 
481                /* Transform the ray into object space */
482                nir_store_deref(b, args->vars.origin,
483                                lvp_mul_vec3_mat(b, args->origin, wto_matrix, true), 7);
484                nir_store_deref(b, args->vars.dir,
485                                lvp_mul_vec3_mat(b, args->dir, wto_matrix, false), 7);
486                nir_store_deref(b, args->vars.inv_dir,
487                                nir_fdiv(b, vec3ones, nir_load_deref(b, args->vars.dir)), 7);
488             }
489             nir_pop_if(b, NULL);
490          }
491          nir_push_else(b, NULL);
492          {
493             nir_def *result = lvp_build_intersect_ray_box(
494                b, node_addr, nir_load_deref(b, args->vars.tmax),
495                nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
496                nir_load_deref(b, args->vars.inv_dir));
497 
498             nir_store_deref(b, args->vars.current_node, nir_channel(b, result, 0), 0x1);
499 
500             nir_push_if(b, nir_ine_imm(b, nir_channel(b, result, 1), LVP_BVH_INVALID_NODE));
501             {
502                lvp_build_push_stack(b, args, nir_channel(b, result, 1));
503             }
504             nir_pop_if(b, NULL);
505          }
506          nir_pop_if(b, NULL);
507       }
508       nir_push_else(b, NULL);
509       {
510          nir_def *result = lvp_build_intersect_ray_tri(
511             b, node_addr, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
512             nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir));
513 
514          lvp_build_triangle_case(b, args, &ray_flags, result, node_addr);
515       }
516       nir_pop_if(b, NULL);
517    }
518    nir_pop_loop(b, NULL);
519 
520    return nir_load_var(b, incomplete);
521 }
522