xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/grl/gpu/morton/pre_sort.cl (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1//
2// Copyright (C) 2009-2022 Intel Corporation
3//
4// SPDX-License-Identifier: MIT
5//
6//
7
8#include "morton/morton_common.h"
9
10GRL_INLINE uint get_morton_shift( uint numPrimitives )
11{
12    return 32 - clz( numPrimitives );
13}
14
15GRL_INLINE uint get_morton_shift_mask( uint numPrimitives )
16{
17    uint shift = get_morton_shift( numPrimitives );
18    uint mask =(uint)(((ulong)1 << shift));
19    return mask - 1; // separated due to problems in DX
20}
21
22__attribute__((reqd_work_group_size(1, 1, 1))) void kernel init( global struct Globals *globals )
23{
24    /* variable shift for putting morton code + index to 64 bit */
25    const uint shift = 32 - clz(globals->numPrimitives);
26    globals->shift = shift;
27    globals->shift_mask = (uint)(((ulong)1 << shift));
28    globals->shift_mask -= 1; // separated due to problems in DX
29    globals->binary_hierarchy_root = 0;
30    globals->morton_sort_in_flight = 0;
31    globals->sort_iterations = get_morton_sort_lsb_req_iterations(shift);
32}
33
34/*
35
36  This kernel create a morton code array containing a morton code and
37  index into the primref array.
38
39  The code uses the maximal number of bits for the morton code, such
40  that the morton code and index can still both get stored in 64 bits.
41
42  The algorithm first maps the centroids of the primitives and their
43  bounding box diagonal into a 4D grid, and then interleaves all 4
44  grid coordinates to construct the to morton code.
45
46 */
47
48__attribute__( (reqd_work_group_size( MAX_HW_SIMD_WIDTH, 1, 1 )) )
49__attribute__( (intel_reqd_sub_group_size( 16 )) ) void kernel
50create_morton_codes_indirect( global struct Globals* globals,
51    global struct BVHBase* bvh,
52    global struct AABB* primref,
53    global struct MortonCodePrimitive* morton_codes,
54    global struct MortonCodePrimitive* morton_codes_tmp,
55    uint use_new_morton_sort)
56{
57    /* construct range of morton codes each work group should create */
58    const uint numPrimitives = globals->numPrimitives;
59    const uint startID = get_group_id( 0 ) * get_local_size( 0 );
60    const uint endID   = min((uint)(startID + get_local_size(0)), numPrimitives);
61
62    /* get lower and upper bounds of geometry and length of scene diagonal */
63    const float3 lower = globals->centroidBounds.lower.xyz;
64    const float3 upper = globals->centroidBounds.upper.xyz;
65    const float diag = length( AABB3f_size( &bvh->Meta.bounds ).xyz );
66
67    /* calculates the 4D grid */
68    const uint shift = get_morton_shift( numPrimitives );
69    const uint grid_size = 1 << (64 - shift) / 4;
70    const float4 grid_base = (float4)(lower, 0.0f);
71    const float4 grid_extend = (float4)(upper - lower, diag);
72    const float4 grid_scale = select( (grid_size * 0.99f) / grid_extend, 0.0f, grid_extend == 0.0f ); // FIXME: 0.99f!!!!!
73
74    const uint req_iterations = get_morton_sort_lsb_req_iterations(shift);
75
76    /* each work group iterates over its range of morton codes to create */
77    uint primID = startID + get_local_id( 0 );
78    if( primID < endID )
79    {
80        /* calculate position inside 4D grid */
81        float4 centroid2 = AABB_centroid2( &primref[primID] );
82        centroid2.w = length( AABB_size( &primref[primID] ).xyz );
83        const uint4 gridpos = convert_uint4_rtz( (centroid2 - grid_base) * grid_scale );
84
85        /* calculate and store morton code */
86        const ulong code = ulong_bitInterleave4D( gridpos );
87        const ulong index_code = ((ulong)code << shift) | (ulong)primID;
88
89        // It is required for morton code to be in morton_codes buffer after LSB sort finishes.
90        // If there would be odd iteration number needed for sorting, it is needed
91        // to skip some iterations of sorting. For odd number of iteration start with morton_codes_tmp buffer
92        if(req_iterations & 1 && !use_new_morton_sort)
93            morton_codes_tmp[primID].index_code = index_code;
94        else
95            morton_codes[primID].index_code = index_code;
96    }
97}
98
99/*
100
101  Initialization of the binary morton code hierarchy.
102
103 */
104
105__attribute__( (reqd_work_group_size( MAX_HW_SIMD_WIDTH, 1, 1 )) ) void kernel init_bottom_up_indirect( global struct Globals* globals,
106    global struct BinaryMortonCodeHierarchy* bnodes )
107{
108    /* construct range each work group will process */
109    const uint numPrimitives = globals->numPrimitives;
110    const uint startID = get_group_id( 0 ) * get_local_size(0);
111    const uint endID   = min((uint)(startID + get_local_size(0)), numPrimitives);
112
113    /* each workgroup iterates over its range to initialize the binary BVH */
114    uint i = startID + get_local_id( 0 );
115    if( i < endID )
116        BinaryMortonCodeHierarchy_init( &bnodes[i], 0, numPrimitives - 1 );
117}
118