1 /*
2 * Copyright © 2021 Google
3 * Copyright © 2023 Valve Corporation
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "lvp_nir_ray_tracing.h"
8 #include "lvp_acceleration_structure.h"
9 #include "lvp_private.h"
10
11 #include "compiler/spirv/spirv.h"
12
13 #include <float.h>
14 #include <math.h>
15
16 nir_def *
lvp_mul_vec3_mat(nir_builder * b,nir_def * vec,nir_def * matrix[],bool translation)17 lvp_mul_vec3_mat(nir_builder *b, nir_def *vec, nir_def *matrix[], bool translation)
18 {
19 nir_def *result_components[3] = {
20 nir_channel(b, matrix[0], 3),
21 nir_channel(b, matrix[1], 3),
22 nir_channel(b, matrix[2], 3),
23 };
24 for (unsigned i = 0; i < 3; ++i) {
25 for (unsigned j = 0; j < 3; ++j) {
26 nir_def *v =
27 nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[i], 1 << j));
28 result_components[i] = (translation || j) ? nir_fadd(b, result_components[i], v) : v;
29 }
30 }
31 return nir_vec(b, result_components, 3);
32 }
33
34 void
lvp_load_wto_matrix(nir_builder * b,nir_def * instance_addr,nir_def ** out)35 lvp_load_wto_matrix(nir_builder *b, nir_def *instance_addr, nir_def **out)
36 {
37 unsigned offset = offsetof(struct lvp_bvh_instance_node, wto_matrix);
38 for (unsigned i = 0; i < 3; ++i) {
39 out[i] = nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_addr, offset + i * 16));
40 }
41 }
42
43 nir_def *
lvp_load_vertex_position(nir_builder * b,nir_def * instance_addr,nir_def * primitive_id,uint32_t index)44 lvp_load_vertex_position(nir_builder *b, nir_def *instance_addr, nir_def *primitive_id,
45 uint32_t index)
46 {
47 nir_def *bvh_addr = nir_build_load_global(
48 b, 1, 64, nir_iadd_imm(b, instance_addr, offsetof(struct lvp_bvh_instance_node, bvh_ptr)));
49
50 nir_def *leaf_nodes_offset = nir_build_load_global(
51 b, 1, 32, nir_iadd_imm(b, bvh_addr, offsetof(struct lvp_bvh_header, leaf_nodes_offset)));
52
53 nir_def *offset = nir_imul_imm(b, primitive_id, sizeof(struct lvp_bvh_triangle_node));
54 offset = nir_iadd(b, offset, leaf_nodes_offset);
55 offset = nir_iadd_imm(b, offset, index * 3 * sizeof(float));
56
57 return nir_build_load_global(b, 3, 32, nir_iadd(b, bvh_addr, nir_u2u64(b, offset)));
58 }
59
60 static nir_def *
lvp_build_intersect_ray_box(nir_builder * b,nir_def * node_addr,nir_def * ray_tmax,nir_def * origin,nir_def * dir,nir_def * inv_dir)61 lvp_build_intersect_ray_box(nir_builder *b, nir_def *node_addr, nir_def *ray_tmax,
62 nir_def *origin, nir_def *dir, nir_def *inv_dir)
63 {
64 const struct glsl_type *vec2_type = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
65 const struct glsl_type *uvec2_type = glsl_vector_type(GLSL_TYPE_UINT, 2);
66
67 nir_variable *distances =
68 nir_variable_create(b->shader, nir_var_shader_temp, vec2_type, "distances");
69 nir_store_var(b, distances, nir_imm_vec2(b, INFINITY, INFINITY), 0xf);
70
71 nir_variable *child_indices =
72 nir_variable_create(b->shader, nir_var_shader_temp, uvec2_type, "child_indices");
73 nir_store_var(b, child_indices, nir_imm_ivec2(b, 0xffffffffu, 0xffffffffu), 0xf);
74
75 inv_dir = nir_bcsel(b, nir_feq_imm(b, dir, 0), nir_imm_float(b, FLT_MAX), inv_dir);
76
77 for (int i = 0; i < 2; i++) {
78 const uint32_t child_offset = offsetof(struct lvp_bvh_box_node, children[i]);
79 const uint32_t coord_offsets[2] = {
80 offsetof(struct lvp_bvh_box_node, bounds[i].min.x),
81 offsetof(struct lvp_bvh_box_node, bounds[i].max.x),
82 };
83
84 nir_def *child_index =
85 nir_build_load_global(b, 1, 32, nir_iadd_imm(b, node_addr, child_offset));
86
87 nir_def *node_coords[2] = {
88 nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0])),
89 nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1])),
90 };
91
92 /* If x of the aabb min is NaN, then this is an inactive aabb.
93 * We don't need to care about any other components being NaN as that is UB.
94 * https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap36.html#VkAabbPositionsKHR
95 */
96 nir_def *min_x = nir_channel(b, node_coords[0], 0);
97 nir_def *min_x_is_not_nan =
98 nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */
99
100 nir_def *bound0 = nir_fmul(b, nir_fsub(b, node_coords[0], origin), inv_dir);
101 nir_def *bound1 = nir_fmul(b, nir_fsub(b, node_coords[1], origin), inv_dir);
102
103 nir_def *tmin =
104 nir_fmax(b,
105 nir_fmax(b, nir_fmin(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
106 nir_fmin(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
107 nir_fmin(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
108
109 nir_def *tmax =
110 nir_fmin(b,
111 nir_fmin(b, nir_fmax(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
112 nir_fmax(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
113 nir_fmax(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
114
115 nir_push_if(b,
116 nir_iand(b, min_x_is_not_nan,
117 nir_iand(b, nir_fge(b, tmax, nir_fmax(b, nir_imm_float(b, 0.0f), tmin)),
118 nir_flt(b, tmin, ray_tmax))));
119 {
120 nir_def *new_child_indices[2] = {child_index, child_index};
121 nir_store_var(b, child_indices, nir_vec(b, new_child_indices, 2), 1u << i);
122
123 nir_def *new_distances[2] = {tmin, tmin};
124 nir_store_var(b, distances, nir_vec(b, new_distances, 2), 1u << i);
125 }
126 nir_pop_if(b, NULL);
127 }
128
129 nir_def *ssa_distances = nir_load_var(b, distances);
130 nir_def *ssa_indices = nir_load_var(b, child_indices);
131 nir_push_if(b, nir_flt(b, nir_channel(b, ssa_distances, 1), nir_channel(b, ssa_distances, 0)));
132 {
133 nir_store_var(b, child_indices,
134 nir_vec2(b, nir_channel(b, ssa_indices, 1), nir_channel(b, ssa_indices, 0)),
135 0b11);
136 }
137 nir_pop_if(b, NULL);
138
139 return nir_load_var(b, child_indices);
140 }
141
142 static nir_def *
lvp_build_intersect_ray_tri(nir_builder * b,nir_def * node_addr,nir_def * ray_tmax,nir_def * origin,nir_def * dir,nir_def * inv_dir)143 lvp_build_intersect_ray_tri(nir_builder *b, nir_def *node_addr, nir_def *ray_tmax,
144 nir_def *origin, nir_def *dir, nir_def *inv_dir)
145 {
146 const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
147
148 const uint32_t coord_offsets[3] = {
149 offsetof(struct lvp_bvh_triangle_node, coords[0]),
150 offsetof(struct lvp_bvh_triangle_node, coords[1]),
151 offsetof(struct lvp_bvh_triangle_node, coords[2]),
152 };
153
154 nir_def *node_coords[3] = {
155 nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0])),
156 nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1])),
157 nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[2])),
158 };
159
160 nir_variable *result = nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "result");
161 nir_store_var(b, result, nir_imm_vec4(b, INFINITY, 1.0f, 0.0f, 0.0f), 0xf);
162
163 /* Based on watertight Ray/Triangle intersection from
164 * http://jcgt.org/published/0002/01/05/paper.pdf */
165
166 /* Calculate the dimension where the ray direction is largest */
167 nir_def *abs_dir = nir_fabs(b, dir);
168
169 nir_def *abs_dirs[3] = {
170 nir_channel(b, abs_dir, 0),
171 nir_channel(b, abs_dir, 1),
172 nir_channel(b, abs_dir, 2),
173 };
174 /* Find index of greatest value of abs_dir and put that as kz. */
175 nir_def *kz = nir_bcsel(
176 b, nir_fge(b, abs_dirs[0], abs_dirs[1]),
177 nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[2]), nir_imm_int(b, 0), nir_imm_int(b, 2)),
178 nir_bcsel(b, nir_fge(b, abs_dirs[1], abs_dirs[2]), nir_imm_int(b, 1), nir_imm_int(b, 2)));
179 nir_def *kx = nir_imod_imm(b, nir_iadd_imm(b, kz, 1), 3);
180 nir_def *ky = nir_imod_imm(b, nir_iadd_imm(b, kx, 1), 3);
181 nir_def *k_indices[3] = {kx, ky, kz};
182 nir_def *k = nir_vec(b, k_indices, 3);
183
184 /* Swap kx and ky dimensions to preserve winding order */
185 unsigned swap_xy_swizzle[4] = {1, 0, 2, 3};
186 k = nir_bcsel(b, nir_flt_imm(b, nir_vector_extract(b, dir, kz), 0.0f),
187 nir_swizzle(b, k, swap_xy_swizzle, 3), k);
188
189 kx = nir_channel(b, k, 0);
190 ky = nir_channel(b, k, 1);
191 kz = nir_channel(b, k, 2);
192
193 /* Calculate shear constants */
194 nir_def *sz = nir_frcp(b, nir_vector_extract(b, dir, kz));
195 nir_def *sx = nir_fmul(b, nir_vector_extract(b, dir, kx), sz);
196 nir_def *sy = nir_fmul(b, nir_vector_extract(b, dir, ky), sz);
197
198 /* Calculate vertices relative to ray origin */
199 nir_def *v_a = nir_fsub(b, node_coords[0], origin);
200 nir_def *v_b = nir_fsub(b, node_coords[1], origin);
201 nir_def *v_c = nir_fsub(b, node_coords[2], origin);
202
203 /* Perform shear and scale */
204 nir_def *ax =
205 nir_fsub(b, nir_vector_extract(b, v_a, kx), nir_fmul(b, sx, nir_vector_extract(b, v_a, kz)));
206 nir_def *ay =
207 nir_fsub(b, nir_vector_extract(b, v_a, ky), nir_fmul(b, sy, nir_vector_extract(b, v_a, kz)));
208 nir_def *bx =
209 nir_fsub(b, nir_vector_extract(b, v_b, kx), nir_fmul(b, sx, nir_vector_extract(b, v_b, kz)));
210 nir_def *by =
211 nir_fsub(b, nir_vector_extract(b, v_b, ky), nir_fmul(b, sy, nir_vector_extract(b, v_b, kz)));
212 nir_def *cx =
213 nir_fsub(b, nir_vector_extract(b, v_c, kx), nir_fmul(b, sx, nir_vector_extract(b, v_c, kz)));
214 nir_def *cy =
215 nir_fsub(b, nir_vector_extract(b, v_c, ky), nir_fmul(b, sy, nir_vector_extract(b, v_c, kz)));
216
217 ax = nir_f2f64(b, ax);
218 ay = nir_f2f64(b, ay);
219 bx = nir_f2f64(b, bx);
220 by = nir_f2f64(b, by);
221 cx = nir_f2f64(b, cx);
222 cy = nir_f2f64(b, cy);
223
224 nir_def *u = nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx));
225 nir_def *v = nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx));
226 nir_def *w = nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax));
227
228 /* Perform edge tests. */
229 nir_def *cond_back = nir_ior(b, nir_ior(b, nir_flt_imm(b, u, 0.0f), nir_flt_imm(b, v, 0.0f)),
230 nir_flt_imm(b, w, 0.0f));
231
232 nir_def *cond_front = nir_ior(
233 b, nir_ior(b, nir_fgt_imm(b, u, 0.0f), nir_fgt_imm(b, v, 0.0f)), nir_fgt_imm(b, w, 0.0f));
234
235 nir_def *cond = nir_inot(b, nir_iand(b, cond_back, cond_front));
236
237 nir_push_if(b, cond);
238 {
239 nir_def *det = nir_fadd(b, u, nir_fadd(b, v, w));
240
241 sz = nir_f2f64(b, sz);
242
243 v_a = nir_f2f64(b, v_a);
244 v_b = nir_f2f64(b, v_b);
245 v_c = nir_f2f64(b, v_c);
246
247 nir_def *az = nir_fmul(b, sz, nir_vector_extract(b, v_a, kz));
248 nir_def *bz = nir_fmul(b, sz, nir_vector_extract(b, v_b, kz));
249 nir_def *cz = nir_fmul(b, sz, nir_vector_extract(b, v_c, kz));
250
251 nir_def *t =
252 nir_fadd(b, nir_fadd(b, nir_fmul(b, u, az), nir_fmul(b, v, bz)), nir_fmul(b, w, cz));
253
254 nir_def *t_signed = nir_fmul(b, nir_fsign(b, det), t);
255
256 nir_def *det_cond_front = nir_inot(b, nir_flt_imm(b, t_signed, 0.0f));
257
258 nir_push_if(b, det_cond_front);
259 {
260 t = nir_f2f32(b, nir_fdiv(b, t, det));
261 det = nir_f2f32(b, det);
262 v = nir_fdiv(b, nir_f2f32(b, v), det);
263 w = nir_fdiv(b, nir_f2f32(b, w), det);
264
265 nir_def *indices[4] = {t, det, v, w};
266 nir_store_var(b, result, nir_vec(b, indices, 4), 0xf);
267 }
268 nir_pop_if(b, NULL);
269 }
270 nir_pop_if(b, NULL);
271
272 return nir_load_var(b, result);
273 }
274
275 static nir_def *
lvp_build_hit_is_opaque(nir_builder * b,nir_def * sbt_offset_and_flags,const struct lvp_ray_flags * ray_flags,nir_def * geometry_id_and_flags)276 lvp_build_hit_is_opaque(nir_builder *b, nir_def *sbt_offset_and_flags,
277 const struct lvp_ray_flags *ray_flags, nir_def *geometry_id_and_flags)
278 {
279 nir_def *opaque = nir_uge_imm(b, nir_ior(b, geometry_id_and_flags, sbt_offset_and_flags),
280 LVP_INSTANCE_FORCE_OPAQUE | LVP_INSTANCE_NO_FORCE_NOT_OPAQUE);
281 opaque = nir_bcsel(b, ray_flags->force_opaque, nir_imm_true(b), opaque);
282 opaque = nir_bcsel(b, ray_flags->force_not_opaque, nir_imm_false(b), opaque);
283 return opaque;
284 }
285
286 static void
lvp_build_triangle_case(nir_builder * b,const struct lvp_ray_traversal_args * args,const struct lvp_ray_flags * ray_flags,nir_def * result,nir_def * node_addr)287 lvp_build_triangle_case(nir_builder *b, const struct lvp_ray_traversal_args *args,
288 const struct lvp_ray_flags *ray_flags, nir_def *result,
289 nir_def *node_addr)
290 {
291 if (!args->triangle_cb)
292 return;
293
294 struct lvp_triangle_intersection intersection;
295 intersection.t = nir_channel(b, result, 0);
296 intersection.barycentrics = nir_channels(b, result, 0xc);
297
298 nir_push_if(b, nir_flt(b, intersection.t, nir_load_deref(b, args->vars.tmax)));
299 {
300 intersection.frontface = nir_fgt_imm(b, nir_channel(b, result, 1), 0);
301 nir_def *switch_ccw = nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags),
302 LVP_INSTANCE_TRIANGLE_FLIP_FACING);
303 intersection.frontface = nir_ixor(b, intersection.frontface, switch_ccw);
304
305 nir_def *not_cull = ray_flags->no_skip_triangles;
306 nir_def *not_facing_cull =
307 nir_bcsel(b, intersection.frontface, ray_flags->no_cull_front, ray_flags->no_cull_back);
308
309 not_cull =
310 nir_iand(b, not_cull,
311 nir_ior(b, not_facing_cull,
312 nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags),
313 LVP_INSTANCE_TRIANGLE_FACING_CULL_DISABLE)));
314
315 nir_push_if(b, nir_iand(b, nir_flt(b, args->tmin, intersection.t), not_cull));
316 {
317 intersection.base.node_addr = node_addr;
318 nir_def *triangle_info = nir_build_load_global(
319 b, 2, 32,
320 nir_iadd_imm(b, intersection.base.node_addr,
321 offsetof(struct lvp_bvh_triangle_node, primitive_id)));
322 intersection.base.primitive_id = nir_channel(b, triangle_info, 0);
323 intersection.base.geometry_id_and_flags = nir_channel(b, triangle_info, 1);
324 intersection.base.opaque =
325 lvp_build_hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), ray_flags,
326 intersection.base.geometry_id_and_flags);
327
328 not_cull = nir_bcsel(b, intersection.base.opaque, ray_flags->no_cull_opaque,
329 ray_flags->no_cull_no_opaque);
330 nir_push_if(b, not_cull);
331 {
332 args->triangle_cb(b, &intersection, args, ray_flags);
333 }
334 nir_pop_if(b, NULL);
335 }
336 nir_pop_if(b, NULL);
337 }
338 nir_pop_if(b, NULL);
339 }
340
341 static void
lvp_build_aabb_case(nir_builder * b,const struct lvp_ray_traversal_args * args,const struct lvp_ray_flags * ray_flags,nir_def * node_addr)342 lvp_build_aabb_case(nir_builder *b, const struct lvp_ray_traversal_args *args,
343 const struct lvp_ray_flags *ray_flags, nir_def *node_addr)
344 {
345 if (!args->aabb_cb)
346 return;
347
348 struct lvp_leaf_intersection intersection;
349 intersection.node_addr = node_addr;
350 nir_def *triangle_info = nir_build_load_global(
351 b, 2, 32,
352 nir_iadd_imm(b, intersection.node_addr, offsetof(struct lvp_bvh_aabb_node, primitive_id)));
353 intersection.primitive_id = nir_channel(b, triangle_info, 0);
354 intersection.geometry_id_and_flags = nir_channel(b, triangle_info, 1);
355 intersection.opaque = lvp_build_hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags),
356 ray_flags, intersection.geometry_id_and_flags);
357
358 nir_def *not_cull =
359 nir_bcsel(b, intersection.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque);
360 not_cull = nir_iand(b, not_cull, ray_flags->no_skip_aabbs);
361 nir_push_if(b, not_cull);
362 {
363 args->aabb_cb(b, &intersection, args, ray_flags);
364 }
365 nir_pop_if(b, NULL);
366 }
367
368 static void
lvp_build_push_stack(nir_builder * b,const struct lvp_ray_traversal_args * args,nir_def * node)369 lvp_build_push_stack(nir_builder *b, const struct lvp_ray_traversal_args *args, nir_def *node)
370 {
371 nir_def *stack_ptr = nir_load_deref(b, args->vars.stack_ptr);
372 nir_store_deref(b, nir_build_deref_array(b, args->vars.stack, stack_ptr), node, 0x1);
373 nir_store_deref(b, args->vars.stack_ptr, nir_iadd_imm(b, nir_load_deref(b, args->vars.stack_ptr), 1), 0x1);
374 }
375
376 static nir_def *
lvp_build_pop_stack(nir_builder * b,const struct lvp_ray_traversal_args * args)377 lvp_build_pop_stack(nir_builder *b, const struct lvp_ray_traversal_args *args)
378 {
379 nir_def *stack_ptr = nir_iadd_imm(b, nir_load_deref(b, args->vars.stack_ptr), -1);
380 nir_store_deref(b, args->vars.stack_ptr, stack_ptr, 0x1);
381 return nir_load_deref(b, nir_build_deref_array(b, args->vars.stack, stack_ptr));
382 }
383
384 nir_def *
lvp_build_ray_traversal(nir_builder * b,const struct lvp_ray_traversal_args * args)385 lvp_build_ray_traversal(nir_builder *b, const struct lvp_ray_traversal_args *args)
386 {
387 nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
388 nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
389
390 nir_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
391
392 struct lvp_ray_flags ray_flags = {
393 .force_opaque = nir_test_mask(b, args->flags, SpvRayFlagsOpaqueKHRMask),
394 .force_not_opaque = nir_test_mask(b, args->flags, SpvRayFlagsNoOpaqueKHRMask),
395 .terminate_on_first_hit =
396 nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask),
397 .no_cull_front = nir_ieq_imm(
398 b, nir_iand_imm(b, args->flags, SpvRayFlagsCullFrontFacingTrianglesKHRMask), 0),
399 .no_cull_back =
400 nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullBackFacingTrianglesKHRMask), 0),
401 .no_cull_opaque =
402 nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullOpaqueKHRMask), 0),
403 .no_cull_no_opaque =
404 nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullNoOpaqueKHRMask), 0),
405 .no_skip_triangles =
406 nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask), 0),
407 .no_skip_aabbs = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipAABBsKHRMask), 0),
408 };
409
410 nir_push_loop(b);
411 {
412 nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), LVP_BVH_INVALID_NODE));
413 {
414 nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.stack_ptr), 0));
415 {
416 nir_store_var(b, incomplete, nir_imm_false(b), 0x1);
417 nir_jump(b, nir_jump_break);
418 }
419 nir_pop_if(b, NULL);
420
421 nir_push_if(b, nir_ige(b, nir_load_deref(b, args->vars.stack_base), nir_load_deref(b, args->vars.stack_ptr)));
422 {
423 nir_store_deref(b, args->vars.stack_base, nir_imm_int(b, -1), 1);
424
425 nir_store_deref(b, args->vars.bvh_base, args->root_bvh_base, 1);
426 nir_store_deref(b, args->vars.origin, args->origin, 7);
427 nir_store_deref(b, args->vars.dir, args->dir, 7);
428 nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, args->dir), 7);
429 }
430 nir_pop_if(b, NULL);
431
432 nir_store_deref(b, args->vars.current_node, lvp_build_pop_stack(b, args), 0x1);
433 }
434 nir_pop_if(b, NULL);
435
436 nir_def *bvh_node = nir_load_deref(b, args->vars.current_node);
437 nir_store_deref(b, args->vars.current_node, nir_imm_int(b, LVP_BVH_INVALID_NODE), 0x1);
438
439 nir_def *node_addr = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, nir_iand_imm(b, bvh_node, ~3u)));
440
441 nir_def *node_type = nir_iand_imm(b, bvh_node, 3);
442 nir_push_if(b, nir_uge_imm(b, node_type, lvp_bvh_node_internal));
443 {
444 nir_push_if(b, nir_uge_imm(b, node_type, lvp_bvh_node_instance));
445 {
446 nir_push_if(b, nir_ieq_imm(b, node_type, lvp_bvh_node_aabb));
447 {
448 lvp_build_aabb_case(b, args, &ray_flags, node_addr);
449 }
450 nir_push_else(b, NULL);
451 {
452 /* instance */
453 nir_store_deref(b, args->vars.instance_addr, node_addr, 1);
454
455 nir_def *instance_data = nir_build_load_global(
456 b, 4, 32,
457 nir_iadd_imm(b, node_addr, offsetof(struct lvp_bvh_instance_node, bvh_ptr)));
458
459 nir_def *wto_matrix[3];
460 lvp_load_wto_matrix(b, node_addr, wto_matrix);
461
462 nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3),
463 1);
464
465 nir_def *instance_and_mask = nir_channel(b, instance_data, 2);
466 nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask),
467 nir_imm_int(b, 1 << 24)));
468 {
469 nir_jump(b, nir_jump_continue);
470 }
471 nir_pop_if(b, NULL);
472
473 nir_store_deref(b, args->vars.bvh_base,
474 nir_pack_64_2x32(b, nir_trim_vector(b, instance_data, 2)), 1);
475
476 nir_store_deref(b, args->vars.stack_base, nir_load_deref(b, args->vars.stack_ptr), 0x1);
477
478 /* Push the instance root node onto the stack */
479 nir_store_deref(b, args->vars.current_node, nir_imm_int(b, LVP_BVH_ROOT_NODE), 0x1);
480
481 /* Transform the ray into object space */
482 nir_store_deref(b, args->vars.origin,
483 lvp_mul_vec3_mat(b, args->origin, wto_matrix, true), 7);
484 nir_store_deref(b, args->vars.dir,
485 lvp_mul_vec3_mat(b, args->dir, wto_matrix, false), 7);
486 nir_store_deref(b, args->vars.inv_dir,
487 nir_fdiv(b, vec3ones, nir_load_deref(b, args->vars.dir)), 7);
488 }
489 nir_pop_if(b, NULL);
490 }
491 nir_push_else(b, NULL);
492 {
493 nir_def *result = lvp_build_intersect_ray_box(
494 b, node_addr, nir_load_deref(b, args->vars.tmax),
495 nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
496 nir_load_deref(b, args->vars.inv_dir));
497
498 nir_store_deref(b, args->vars.current_node, nir_channel(b, result, 0), 0x1);
499
500 nir_push_if(b, nir_ine_imm(b, nir_channel(b, result, 1), LVP_BVH_INVALID_NODE));
501 {
502 lvp_build_push_stack(b, args, nir_channel(b, result, 1));
503 }
504 nir_pop_if(b, NULL);
505 }
506 nir_pop_if(b, NULL);
507 }
508 nir_push_else(b, NULL);
509 {
510 nir_def *result = lvp_build_intersect_ray_tri(
511 b, node_addr, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
512 nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir));
513
514 lvp_build_triangle_case(b, args, &ray_flags, result, node_addr);
515 }
516 nir_pop_if(b, NULL);
517 }
518 nir_pop_loop(b, NULL);
519
520 return nir_load_var(b, incomplete);
521 }
522