1 /*
2 * Copyright (C) 2021 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "bi_builder.h"
25 #include "va_compiler.h"
26 #include "valhall.h"
27
28 /* Only some special immediates are available, as specified in the Table of
29 * Immediates in the specification. Other immediates must be lowered, either to
30 * uniforms or to moves.
31 */
32
33 static bi_index
va_mov_imm(bi_builder * b,uint32_t imm)34 va_mov_imm(bi_builder *b, uint32_t imm)
35 {
36 bi_index zero = bi_fau(BIR_FAU_IMMEDIATE | 0, false);
37 return bi_iadd_imm_i32(b, zero, imm);
38 }
39
40 static bi_index
va_lut_index_32(uint32_t imm)41 va_lut_index_32(uint32_t imm)
42 {
43 for (unsigned i = 0; i < ARRAY_SIZE(valhall_immediates); ++i) {
44 if (valhall_immediates[i] == imm)
45 return va_lut(i);
46 }
47
48 return bi_null();
49 }
50
51 static bi_index
va_lut_index_16(uint16_t imm)52 va_lut_index_16(uint16_t imm)
53 {
54 uint16_t *arr16 = (uint16_t *)valhall_immediates;
55
56 for (unsigned i = 0; i < (2 * ARRAY_SIZE(valhall_immediates)); ++i) {
57 if (arr16[i] == imm)
58 return bi_half(va_lut(i >> 1), i & 1);
59 }
60
61 return bi_null();
62 }
63
64 UNUSED static bi_index
va_lut_index_8(uint8_t imm)65 va_lut_index_8(uint8_t imm)
66 {
67 uint8_t *arr8 = (uint8_t *)valhall_immediates;
68
69 for (unsigned i = 0; i < (4 * ARRAY_SIZE(valhall_immediates)); ++i) {
70 if (arr8[i] == imm)
71 return bi_byte(va_lut(i >> 2), i & 3);
72 }
73
74 return bi_null();
75 }
76
77 static bi_index
va_demote_constant_fp16(uint32_t value)78 va_demote_constant_fp16(uint32_t value)
79 {
80 uint16_t fp16 = _mesa_float_to_half(uif(value));
81
82 /* Only convert if it is exact */
83 if (fui(_mesa_half_to_float(fp16)) == value)
84 return va_lut_index_16(fp16);
85 else
86 return bi_null();
87 }
88
89 /*
90 * Test if a 32-bit word arises as a sign or zero extension of some 8/16-bit
91 * value.
92 */
93 static bool
is_extension_of_8(uint32_t x,bool is_signed)94 is_extension_of_8(uint32_t x, bool is_signed)
95 {
96 if (is_signed)
97 return (x <= INT8_MAX) || ((x >> 7) == BITFIELD_MASK(24 + 1));
98 else
99 return (x <= UINT8_MAX);
100 }
101
102 static bool
is_extension_of_16(uint32_t x,bool is_signed)103 is_extension_of_16(uint32_t x, bool is_signed)
104 {
105 if (is_signed)
106 return (x <= INT16_MAX) || ((x >> 15) == BITFIELD_MASK(16 + 1));
107 else
108 return (x <= UINT16_MAX);
109 }
110
111 static bi_index
va_resolve_constant(bi_builder * b,uint32_t value,struct va_src_info info,bool is_signed,bool staging)112 va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info,
113 bool is_signed, bool staging)
114 {
115 /* Try the constant as-is */
116 if (!staging) {
117 bi_index lut = va_lut_index_32(value);
118 if (!bi_is_null(lut))
119 return lut;
120
121 /* ...or negated as a FP32 constant */
122 if (info.absneg && info.size == VA_SIZE_32) {
123 lut = bi_neg(va_lut_index_32(fui(-uif(value))));
124 if (!bi_is_null(lut))
125 return lut;
126 }
127
128 /* ...or negated as a FP16 constant */
129 if (info.absneg && info.size == VA_SIZE_16) {
130 lut = bi_neg(va_lut_index_32(value ^ 0x80008000));
131 if (!bi_is_null(lut))
132 return lut;
133 }
134 }
135
136 /* Try using a single half of a FP16 constant */
137 bool replicated_halves = (value & 0xFFFF) == (value >> 16);
138 if (!staging && info.swizzle && info.size == VA_SIZE_16 &&
139 replicated_halves) {
140 bi_index lut = va_lut_index_16(value & 0xFFFF);
141 if (!bi_is_null(lut))
142 return lut;
143
144 /* ...possibly negated */
145 if (info.absneg) {
146 lut = bi_neg(va_lut_index_16((value & 0xFFFF) ^ 0x8000));
147 if (!bi_is_null(lut))
148 return lut;
149 }
150 }
151
152 /* Try extending a byte */
153 if (!staging && (info.widen || info.lanes || info.lane) &&
154 is_extension_of_8(value, is_signed)) {
155
156 bi_index lut = va_lut_index_8(value & 0xFF);
157 if (!bi_is_null(lut))
158 return lut;
159 }
160
161 /* Try extending a halfword */
162 if (!staging && info.widen && is_extension_of_16(value, is_signed)) {
163
164 bi_index lut = va_lut_index_16(value & 0xFFFF);
165 if (!bi_is_null(lut))
166 return lut;
167 }
168
169 /* Try demoting the constant to FP16 */
170 if (!staging && info.swizzle && info.size == VA_SIZE_32) {
171 bi_index lut = va_demote_constant_fp16(value);
172 if (!bi_is_null(lut))
173 return lut;
174
175 if (info.absneg) {
176 bi_index lut = bi_neg(va_demote_constant_fp16(fui(-uif(value))));
177 if (!bi_is_null(lut))
178 return lut;
179 }
180 }
181
182 /* TODO: Optimize to uniform */
183 return va_mov_imm(b, value);
184 }
185
186 void
va_lower_constants(bi_context * ctx,bi_instr * I)187 va_lower_constants(bi_context *ctx, bi_instr *I)
188 {
189 bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
190
191 bi_foreach_src(I, s) {
192 if (I->src[s].type == BI_INDEX_CONSTANT) {
193 /* abs(#c) is pointless, but -#c occurs in transcendental sequences */
194 assert(!I->src[s].abs && "redundant .abs modifier");
195
196 bool is_signed = valhall_opcodes[I->op].is_signed;
197 bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs);
198 struct va_src_info info = va_src_info(I->op, s);
199 uint32_t value = I->src[s].value;
200 enum bi_swizzle swz = I->src[s].swizzle;
201
202 /* Resolve any swizzle, keeping in mind the different interpretations
203 * swizzles in different contexts.
204 */
205 if (info.size == VA_SIZE_32) {
206 /* Extracting a half from the 32-bit value */
207 if (swz == BI_SWIZZLE_H00)
208 value = (value & 0xFFFF);
209 else if (swz == BI_SWIZZLE_H11)
210 value = (value >> 16);
211 else
212 assert(swz == BI_SWIZZLE_H01);
213
214 /* FP16 -> FP32 */
215 if (info.swizzle && swz != BI_SWIZZLE_H01)
216 value = fui(_mesa_half_to_float(value));
217 } else if (info.size == VA_SIZE_16) {
218 assert(swz >= BI_SWIZZLE_H00 && swz <= BI_SWIZZLE_H11);
219 value = bi_apply_swizzle(value, swz);
220 } else if (info.size == VA_SIZE_8 && (info.lane || info.lanes)) {
221 /* 8-bit extract */
222 unsigned chan = (swz - BI_SWIZZLE_B0000);
223 assert(chan < 4);
224
225 value = (value >> (8 * chan)) & 0xFF;
226 } else {
227 /* TODO: Any other special handling? */
228 value = bi_apply_swizzle(value, swz);
229 }
230
231 bi_index cons =
232 va_resolve_constant(&b, value, info, is_signed, staging);
233 cons.neg ^= I->src[s].neg;
234 I->src[s] = cons;
235
236 /* If we're selecting a single 8-bit lane, we should return a single
237 * 8-bit lane to ensure the result is encodeable. By convention,
238 * applying the lane select puts the desired constant (at least) in the
239 * bottom byte, so we can always select the bottom byte.
240 */
241 if (info.lane && I->src[s].swizzle == BI_SWIZZLE_H01) {
242 assert(info.size == VA_SIZE_8);
243 I->src[s] = bi_byte(I->src[s], 0);
244 }
245 }
246 }
247 }
248