xref: /aosp_15_r20/external/mesa3d/src/panfrost/compiler/valhall/va_lower_constants.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2021 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "bi_builder.h"
25 #include "va_compiler.h"
26 #include "valhall.h"
27 
28 /* Only some special immediates are available, as specified in the Table of
29  * Immediates in the specification. Other immediates must be lowered, either to
30  * uniforms or to moves.
31  */
32 
33 static bi_index
va_mov_imm(bi_builder * b,uint32_t imm)34 va_mov_imm(bi_builder *b, uint32_t imm)
35 {
36    bi_index zero = bi_fau(BIR_FAU_IMMEDIATE | 0, false);
37    return bi_iadd_imm_i32(b, zero, imm);
38 }
39 
40 static bi_index
va_lut_index_32(uint32_t imm)41 va_lut_index_32(uint32_t imm)
42 {
43    for (unsigned i = 0; i < ARRAY_SIZE(valhall_immediates); ++i) {
44       if (valhall_immediates[i] == imm)
45          return va_lut(i);
46    }
47 
48    return bi_null();
49 }
50 
51 static bi_index
va_lut_index_16(uint16_t imm)52 va_lut_index_16(uint16_t imm)
53 {
54    uint16_t *arr16 = (uint16_t *)valhall_immediates;
55 
56    for (unsigned i = 0; i < (2 * ARRAY_SIZE(valhall_immediates)); ++i) {
57       if (arr16[i] == imm)
58          return bi_half(va_lut(i >> 1), i & 1);
59    }
60 
61    return bi_null();
62 }
63 
64 UNUSED static bi_index
va_lut_index_8(uint8_t imm)65 va_lut_index_8(uint8_t imm)
66 {
67    uint8_t *arr8 = (uint8_t *)valhall_immediates;
68 
69    for (unsigned i = 0; i < (4 * ARRAY_SIZE(valhall_immediates)); ++i) {
70       if (arr8[i] == imm)
71          return bi_byte(va_lut(i >> 2), i & 3);
72    }
73 
74    return bi_null();
75 }
76 
77 static bi_index
va_demote_constant_fp16(uint32_t value)78 va_demote_constant_fp16(uint32_t value)
79 {
80    uint16_t fp16 = _mesa_float_to_half(uif(value));
81 
82    /* Only convert if it is exact */
83    if (fui(_mesa_half_to_float(fp16)) == value)
84       return va_lut_index_16(fp16);
85    else
86       return bi_null();
87 }
88 
89 /*
90  * Test if a 32-bit word arises as a sign or zero extension of some 8/16-bit
91  * value.
92  */
93 static bool
is_extension_of_8(uint32_t x,bool is_signed)94 is_extension_of_8(uint32_t x, bool is_signed)
95 {
96    if (is_signed)
97       return (x <= INT8_MAX) || ((x >> 7) == BITFIELD_MASK(24 + 1));
98    else
99       return (x <= UINT8_MAX);
100 }
101 
102 static bool
is_extension_of_16(uint32_t x,bool is_signed)103 is_extension_of_16(uint32_t x, bool is_signed)
104 {
105    if (is_signed)
106       return (x <= INT16_MAX) || ((x >> 15) == BITFIELD_MASK(16 + 1));
107    else
108       return (x <= UINT16_MAX);
109 }
110 
111 static bi_index
va_resolve_constant(bi_builder * b,uint32_t value,struct va_src_info info,bool is_signed,bool staging)112 va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info,
113                     bool is_signed, bool staging)
114 {
115    /* Try the constant as-is */
116    if (!staging) {
117       bi_index lut = va_lut_index_32(value);
118       if (!bi_is_null(lut))
119          return lut;
120 
121       /* ...or negated as a FP32 constant */
122       if (info.absneg && info.size == VA_SIZE_32) {
123          lut = bi_neg(va_lut_index_32(fui(-uif(value))));
124          if (!bi_is_null(lut))
125             return lut;
126       }
127 
128       /* ...or negated as a FP16 constant */
129       if (info.absneg && info.size == VA_SIZE_16) {
130          lut = bi_neg(va_lut_index_32(value ^ 0x80008000));
131          if (!bi_is_null(lut))
132             return lut;
133       }
134    }
135 
136    /* Try using a single half of a FP16 constant */
137    bool replicated_halves = (value & 0xFFFF) == (value >> 16);
138    if (!staging && info.swizzle && info.size == VA_SIZE_16 &&
139        replicated_halves) {
140       bi_index lut = va_lut_index_16(value & 0xFFFF);
141       if (!bi_is_null(lut))
142          return lut;
143 
144       /* ...possibly negated */
145       if (info.absneg) {
146          lut = bi_neg(va_lut_index_16((value & 0xFFFF) ^ 0x8000));
147          if (!bi_is_null(lut))
148             return lut;
149       }
150    }
151 
152    /* Try extending a byte */
153    if (!staging && (info.widen || info.lanes || info.lane) &&
154        is_extension_of_8(value, is_signed)) {
155 
156       bi_index lut = va_lut_index_8(value & 0xFF);
157       if (!bi_is_null(lut))
158          return lut;
159    }
160 
161    /* Try extending a halfword */
162    if (!staging && info.widen && is_extension_of_16(value, is_signed)) {
163 
164       bi_index lut = va_lut_index_16(value & 0xFFFF);
165       if (!bi_is_null(lut))
166          return lut;
167    }
168 
169    /* Try demoting the constant to FP16 */
170    if (!staging && info.swizzle && info.size == VA_SIZE_32) {
171       bi_index lut = va_demote_constant_fp16(value);
172       if (!bi_is_null(lut))
173          return lut;
174 
175       if (info.absneg) {
176          bi_index lut = bi_neg(va_demote_constant_fp16(fui(-uif(value))));
177          if (!bi_is_null(lut))
178             return lut;
179       }
180    }
181 
182    /* TODO: Optimize to uniform */
183    return va_mov_imm(b, value);
184 }
185 
186 void
va_lower_constants(bi_context * ctx,bi_instr * I)187 va_lower_constants(bi_context *ctx, bi_instr *I)
188 {
189    bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
190 
191    bi_foreach_src(I, s) {
192       if (I->src[s].type == BI_INDEX_CONSTANT) {
193          /* abs(#c) is pointless, but -#c occurs in transcendental sequences */
194          assert(!I->src[s].abs && "redundant .abs modifier");
195 
196          bool is_signed = valhall_opcodes[I->op].is_signed;
197          bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs);
198          struct va_src_info info = va_src_info(I->op, s);
199          uint32_t value = I->src[s].value;
200          enum bi_swizzle swz = I->src[s].swizzle;
201 
202          /* Resolve any swizzle, keeping in mind the different interpretations
203           * swizzles in different contexts.
204           */
205          if (info.size == VA_SIZE_32) {
206             /* Extracting a half from the 32-bit value */
207             if (swz == BI_SWIZZLE_H00)
208                value = (value & 0xFFFF);
209             else if (swz == BI_SWIZZLE_H11)
210                value = (value >> 16);
211             else
212                assert(swz == BI_SWIZZLE_H01);
213 
214             /* FP16 -> FP32 */
215             if (info.swizzle && swz != BI_SWIZZLE_H01)
216                value = fui(_mesa_half_to_float(value));
217          } else if (info.size == VA_SIZE_16) {
218             assert(swz >= BI_SWIZZLE_H00 && swz <= BI_SWIZZLE_H11);
219             value = bi_apply_swizzle(value, swz);
220          } else if (info.size == VA_SIZE_8 && (info.lane || info.lanes)) {
221             /* 8-bit extract */
222             unsigned chan = (swz - BI_SWIZZLE_B0000);
223             assert(chan < 4);
224 
225             value = (value >> (8 * chan)) & 0xFF;
226          } else {
227             /* TODO: Any other special handling? */
228             value = bi_apply_swizzle(value, swz);
229          }
230 
231          bi_index cons =
232             va_resolve_constant(&b, value, info, is_signed, staging);
233          cons.neg ^= I->src[s].neg;
234          I->src[s] = cons;
235 
236          /* If we're selecting a single 8-bit lane, we should return a single
237           * 8-bit lane to ensure the result is encodeable. By convention,
238           * applying the lane select puts the desired constant (at least) in the
239           * bottom byte, so we can always select the bottom byte.
240           */
241          if (info.lane && I->src[s].swizzle == BI_SWIZZLE_H01) {
242             assert(info.size == VA_SIZE_8);
243             I->src[s] = bi_byte(I->src[s], 0);
244          }
245       }
246    }
247 }
248