xref: /aosp_15_r20/external/mesa3d/src/panfrost/lib/pan_attributes.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2019 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  */
24 
25 #include "util/u_math.h"
26 #include "pan_encoder.h"
27 
28 /* This file handles attribute descriptors. The
29  * bulk of the complexity is from instancing. See mali_job for
30  * notes on how this works. But basically, for small vertex
31  * counts, we have a lookup table, and for large vertex counts,
32  * we look at the high bits as a heuristic. This has to match
33  * exactly how the hardware calculates this (which is why the
34  * algorithm is so weird) or else instancing will break. */
35 
36 /* Given an odd number (of the form 2k + 1), compute k */
37 #define ODD(odd) ((odd - 1) >> 1)
38 
39 static unsigned
panfrost_small_padded_vertex_count(unsigned idx)40 panfrost_small_padded_vertex_count(unsigned idx)
41 {
42    if (idx < 10)
43       return idx;
44    else
45       return (idx + 1) & ~1;
46 }
47 
48 static unsigned
panfrost_large_padded_vertex_count(uint32_t vertex_count)49 panfrost_large_padded_vertex_count(uint32_t vertex_count)
50 {
51    /* First, we have to find the highest set one */
52    unsigned highest = 32 - __builtin_clz(vertex_count);
53 
54    /* Using that, we mask out the highest 4-bits */
55    unsigned n = highest - 4;
56    unsigned nibble = (vertex_count >> n) & 0xF;
57 
58    /* Great, we have the nibble. Now we can just try possibilities. Note
59     * that we don't care about the bottom most bit in most cases, and we
60     * know the top bit must be 1 */
61 
62    unsigned middle_two = (nibble >> 1) & 0x3;
63 
64    switch (middle_two) {
65    case 0b00:
66       if (!(nibble & 1))
67          return (1 << n) * 9;
68       else
69          return (1 << (n + 1)) * 5;
70    case 0b01:
71       return (1 << (n + 2)) * 3;
72    case 0b10:
73       return (1 << (n + 1)) * 7;
74    case 0b11:
75       return (1 << (n + 4));
76    default:
77       return 0; /* unreachable */
78    }
79 }
80 
81 unsigned
panfrost_padded_vertex_count(unsigned vertex_count)82 panfrost_padded_vertex_count(unsigned vertex_count)
83 {
84    if (vertex_count < 20)
85       return panfrost_small_padded_vertex_count(vertex_count);
86    else
87       return panfrost_large_padded_vertex_count(vertex_count);
88 }
89 
90 /* The much, much more irritating case -- instancing is enabled. See
91  * panfrost_job.h for notes on how this works */
92 
93 unsigned
panfrost_compute_magic_divisor(unsigned hw_divisor,unsigned * o_shift,unsigned * extra_flags)94 panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift,
95                                unsigned *extra_flags)
96 {
97    /* We have a NPOT divisor. Here's the fun one (multipling by
98     * the inverse and shifting) */
99 
100    /* floor(log2(d)) */
101    unsigned shift = util_logbase2(hw_divisor);
102 
103    /* m = ceil(2^(32 + shift) / d) */
104    uint64_t shift_hi = 32 + shift;
105    uint64_t t = 1ll << shift_hi;
106    double t_f = t;
107    double hw_divisor_d = hw_divisor;
108    double m_f = ceil(t_f / hw_divisor_d);
109    unsigned m = m_f;
110 
111    /* Default case */
112    uint32_t magic_divisor = m;
113 
114    /* e = 2^(shift + 32) % d */
115    uint64_t e = t % hw_divisor;
116 
117    /* Apply round-down algorithm? e <= 2^shift?. XXX: The blob
118     * seems to use a different condition */
119    if (e <= (1ll << shift)) {
120       magic_divisor = m - 1;
121       *extra_flags = 1;
122    }
123 
124    /* Top flag implicitly set */
125    assert(magic_divisor & (1u << 31));
126    magic_divisor &= ~(1u << 31);
127    *o_shift = shift;
128 
129    return magic_divisor;
130 }
131