1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <[email protected]>
30 */
31
32 /** @file
33 *
34 * This file defines struct brw_reg, which is our representation for EU
35 * registers. They're not a hardware specific format, just an abstraction
36 * that intends to capture the full flexibility of the hardware registers.
37 *
38 * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode
39 * the abstract brw_reg type into the actual hardware instruction encoding.
40 */
41
42 #ifndef BRW_REG_H
43 #define BRW_REG_H
44
45 #include <stdbool.h>
46 #include "util/compiler.h"
47 #include "util/glheader.h"
48 #include "util/macros.h"
49 #include "util/rounding.h"
50 #include "util/u_math.h"
51 #include "brw_eu_defines.h"
52 #include "brw_reg_type.h"
53
54 #ifdef __cplusplus
55 extern "C" {
56 #endif
57
58 struct intel_device_info;
59
60 /** Size of general purpose register space in REG_SIZE units */
61 #define BRW_MAX_GRF 128
62 #define XE2_MAX_GRF 256
63
64 /**
65 * BRW hardware swizzles.
66 * Only defines XYZW to ensure it can be contained in 2 bits
67 */
68 #define BRW_SWIZZLE_X 0
69 #define BRW_SWIZZLE_Y 1
70 #define BRW_SWIZZLE_Z 2
71 #define BRW_SWIZZLE_W 3
72
73 #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
74 #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
75
76 #define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
77 #define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
78 #define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
79 #define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
80 #define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
81 #define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
82 #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
83 #define BRW_SWIZZLE_YXYX BRW_SWIZZLE4(1,0,1,0)
84 #define BRW_SWIZZLE_XZXZ BRW_SWIZZLE4(0,2,0,2)
85 #define BRW_SWIZZLE_YZXW BRW_SWIZZLE4(1,2,0,3)
86 #define BRW_SWIZZLE_YWYW BRW_SWIZZLE4(1,3,1,3)
87 #define BRW_SWIZZLE_ZXYW BRW_SWIZZLE4(2,0,1,3)
88 #define BRW_SWIZZLE_ZWZW BRW_SWIZZLE4(2,3,2,3)
89 #define BRW_SWIZZLE_WZWZ BRW_SWIZZLE4(3,2,3,2)
90 #define BRW_SWIZZLE_WZYX BRW_SWIZZLE4(3,2,1,0)
91 #define BRW_SWIZZLE_XXZZ BRW_SWIZZLE4(0,0,2,2)
92 #define BRW_SWIZZLE_YYWW BRW_SWIZZLE4(1,1,3,3)
93 #define BRW_SWIZZLE_YXWZ BRW_SWIZZLE4(1,0,3,2)
94
95 #define BRW_SWZ_COMP_INPUT(comp) (BRW_SWIZZLE_XYZW >> ((comp)*2))
96 #define BRW_SWZ_COMP_OUTPUT(comp) (BRW_SWIZZLE_XYZW << ((comp)*2))
97
98 static inline bool
brw_is_single_value_swizzle(unsigned swiz)99 brw_is_single_value_swizzle(unsigned swiz)
100 {
101 return (swiz == BRW_SWIZZLE_XXXX ||
102 swiz == BRW_SWIZZLE_YYYY ||
103 swiz == BRW_SWIZZLE_ZZZZ ||
104 swiz == BRW_SWIZZLE_WWWW);
105 }
106
107 /**
108 * Compute the swizzle obtained from the application of \p swz0 on the result
109 * of \p swz1. The argument ordering is expected to match function
110 * composition.
111 */
112 static inline unsigned
brw_compose_swizzle(unsigned swz0,unsigned swz1)113 brw_compose_swizzle(unsigned swz0, unsigned swz1)
114 {
115 return BRW_SWIZZLE4(
116 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 0)),
117 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 1)),
118 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 2)),
119 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 3)));
120 }
121
122 /**
123 * Construct an identity swizzle for the set of enabled channels given by \p
124 * mask. The result will only reference channels enabled in the provided \p
125 * mask, assuming that \p mask is non-zero. The constructed swizzle will
126 * satisfy the property that for any instruction OP and any mask:
127 *
128 * brw_OP(p, brw_writemask(dst, mask),
129 * brw_swizzle(src, brw_swizzle_for_mask(mask)));
130 *
131 * will be equivalent to the same instruction without swizzle:
132 *
133 * brw_OP(p, brw_writemask(dst, mask), src);
134 */
135 static inline unsigned
brw_swizzle_for_mask(unsigned mask)136 brw_swizzle_for_mask(unsigned mask)
137 {
138 unsigned last = (mask ? ffs(mask) - 1 : 0);
139 unsigned swz[4];
140
141 for (unsigned i = 0; i < 4; i++)
142 last = swz[i] = (mask & (1 << i) ? i : last);
143
144 return BRW_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
145 }
146
147 uint32_t brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz);
148
149 #define REG_SIZE (8*4)
150
151 /* These aren't hardware structs, just something useful for us to pass around:
152 *
153 * Align1 operation has a lot of control over input ranges. Used in
154 * WM programs to implement shaders decomposed into "channel serial"
155 * or "structure of array" form:
156 */
157 typedef struct brw_reg {
158 union {
159 struct {
160 enum brw_reg_type type:5;
161 enum brw_reg_file file:3;
162 unsigned negate:1; /* source only */
163 unsigned abs:1; /* source only */
164 unsigned address_mode:1; /* relative addressing, hopefully! */
165 unsigned pad0:16;
166 unsigned subnr:5; /* :1 in align16 */
167 };
168 uint32_t bits;
169 };
170
171 /** Offset from the start of the virtual register in bytes. */
172 uint16_t offset;
173
174 /** Register region horizontal stride of virtual registers */
175 uint8_t stride;
176
177 union {
178 struct {
179 unsigned nr;
180 unsigned swizzle:8; /* src only, align16 only */
181 unsigned writemask:4; /* dest only, align16 only */
182 int indirect_offset:10; /* relative addressing offset */
183 unsigned vstride:4; /* source only */
184 unsigned width:3; /* src only, align1 only */
185 unsigned hstride:2; /* align1 only */
186 unsigned pad1:1;
187 };
188
189 double df;
190 uint64_t u64;
191 int64_t d64;
192 float f;
193 int d;
194 unsigned ud;
195 };
196
197 #ifdef __cplusplus
198 /* TODO: Remove this constructor to make this type a POD. Need
199 * to make sure that rest of compiler doesn't rely on type or
200 * stride of BAD_FILE registers.
201 */
brw_regbrw_reg202 brw_reg() {
203 memset((void*)this, 0, sizeof(*this));
204 this->type = BRW_TYPE_UD;
205 this->stride = 1;
206 this->file = BAD_FILE;
207 }
208
209 bool equals(const brw_reg &r) const;
210 bool negative_equals(const brw_reg &r) const;
211 bool is_contiguous() const;
212
213 bool is_zero() const;
214 bool is_one() const;
215 bool is_negative_one() const;
216 bool is_null() const;
217 bool is_accumulator() const;
218
219 /**
220 * Return the size in bytes of a single logical component of the
221 * register assuming the given execution width.
222 */
223 unsigned component_size(unsigned width) const;
224 #endif /* __cplusplus */
225 } brw_reg;
226
227 static inline unsigned
phys_nr(const struct intel_device_info * devinfo,const struct brw_reg reg)228 phys_nr(const struct intel_device_info *devinfo, const struct brw_reg reg)
229 {
230 if (devinfo->ver >= 20) {
231 if (reg.file == FIXED_GRF)
232 return reg.nr / 2;
233 else if (reg.file == ARF &&
234 reg.nr >= BRW_ARF_ACCUMULATOR &&
235 reg.nr < BRW_ARF_FLAG)
236 return BRW_ARF_ACCUMULATOR + (reg.nr - BRW_ARF_ACCUMULATOR) / 2;
237 else
238 return reg.nr;
239 } else {
240 return reg.nr;
241 }
242 }
243
244 static inline unsigned
phys_subnr(const struct intel_device_info * devinfo,const struct brw_reg reg)245 phys_subnr(const struct intel_device_info *devinfo, const struct brw_reg reg)
246 {
247 if (devinfo->ver >= 20) {
248 if (reg.file == FIXED_GRF ||
249 (reg.file == ARF &&
250 reg.nr >= BRW_ARF_ACCUMULATOR &&
251 reg.nr < BRW_ARF_FLAG))
252 return (reg.nr & 1) * REG_SIZE + reg.subnr;
253 else
254 return reg.subnr;
255 } else {
256 return reg.subnr;
257 }
258 }
259
260 static inline bool
brw_regs_equal(const struct brw_reg * a,const struct brw_reg * b)261 brw_regs_equal(const struct brw_reg *a, const struct brw_reg *b)
262 {
263 return a->bits == b->bits &&
264 a->u64 == b->u64 &&
265 a->offset == b->offset &&
266 a->stride == b->stride;
267 }
268
269 static inline bool
brw_regs_negative_equal(const struct brw_reg * a,const struct brw_reg * b)270 brw_regs_negative_equal(const struct brw_reg *a, const struct brw_reg *b)
271 {
272 if (a->file == IMM) {
273 if (a->bits != b->bits)
274 return false;
275
276 switch ((enum brw_reg_type) a->type) {
277 case BRW_TYPE_UQ:
278 case BRW_TYPE_Q:
279 return a->d64 == -b->d64;
280 case BRW_TYPE_DF:
281 return a->df == -b->df;
282 case BRW_TYPE_UD:
283 case BRW_TYPE_D:
284 return a->d == -b->d;
285 case BRW_TYPE_F:
286 return a->f == -b->f;
287 case BRW_TYPE_VF:
288 /* It is tempting to treat 0 as a negation of 0 (and -0 as a negation
289 * of -0). There are occasions where 0 or -0 is used and the exact
290 * bit pattern is desired. At the very least, changing this to allow
291 * 0 as a negation of 0 causes some fp64 tests to fail on IVB.
292 */
293 return a->ud == (b->ud ^ 0x80808080);
294 case BRW_TYPE_UW:
295 case BRW_TYPE_W:
296 case BRW_TYPE_UV:
297 case BRW_TYPE_V:
298 case BRW_TYPE_HF:
299 /* FINISHME: Implement support for these types once there is
300 * something in the compiler that can generate them. Until then,
301 * they cannot be tested.
302 */
303 return false;
304 case BRW_TYPE_UB:
305 case BRW_TYPE_B:
306 default:
307 unreachable("not reached");
308 }
309 } else {
310 struct brw_reg tmp = *a;
311
312 tmp.negate = !tmp.negate;
313
314 return brw_regs_equal(&tmp, b);
315 }
316 }
317
318 static inline enum brw_reg_type
get_exec_type(const enum brw_reg_type type)319 get_exec_type(const enum brw_reg_type type)
320 {
321 switch (type) {
322 case BRW_TYPE_B:
323 case BRW_TYPE_V:
324 return BRW_TYPE_W;
325 case BRW_TYPE_UB:
326 case BRW_TYPE_UV:
327 return BRW_TYPE_UW;
328 case BRW_TYPE_VF:
329 return BRW_TYPE_F;
330 default:
331 return type;
332 }
333 }
334
335 /**
336 * Return an integer type of the requested size and signedness.
337 */
338 static inline enum brw_reg_type
brw_int_type(unsigned sz,bool is_signed)339 brw_int_type(unsigned sz, bool is_signed)
340 {
341 switch (sz) {
342 case 1:
343 return (is_signed ? BRW_TYPE_B : BRW_TYPE_UB);
344 case 2:
345 return (is_signed ? BRW_TYPE_W : BRW_TYPE_UW);
346 case 4:
347 return (is_signed ? BRW_TYPE_D : BRW_TYPE_UD);
348 case 8:
349 return (is_signed ? BRW_TYPE_Q : BRW_TYPE_UQ);
350 default:
351 unreachable("Not reached.");
352 }
353 }
354
355 /**
356 * Construct a brw_reg.
357 * \param file one of the BRW_x_REGISTER_FILE values
358 * \param nr register number/index
359 * \param subnr register sub number
360 * \param negate register negate modifier
361 * \param abs register abs modifier
362 * \param type one of BRW_TYPE_x
363 * \param vstride one of BRW_VERTICAL_STRIDE_x
364 * \param width one of BRW_WIDTH_x
365 * \param hstride one of BRW_HORIZONTAL_STRIDE_x
366 * \param swizzle one of BRW_SWIZZLE_x
367 * \param writemask WRITEMASK_X/Y/Z/W bitfield
368 */
369 static inline struct brw_reg
brw_make_reg(enum brw_reg_file file,unsigned nr,unsigned subnr,unsigned negate,unsigned abs,enum brw_reg_type type,unsigned vstride,unsigned width,unsigned hstride,unsigned swizzle,unsigned writemask)370 brw_make_reg(enum brw_reg_file file,
371 unsigned nr,
372 unsigned subnr,
373 unsigned negate,
374 unsigned abs,
375 enum brw_reg_type type,
376 unsigned vstride,
377 unsigned width,
378 unsigned hstride,
379 unsigned swizzle,
380 unsigned writemask)
381 {
382 struct brw_reg reg;
383 if (file == FIXED_GRF)
384 assert(nr < XE2_MAX_GRF);
385 else if (file == ARF)
386 assert(nr <= BRW_ARF_TIMESTAMP);
387
388 reg.type = type;
389 reg.file = file;
390 reg.negate = negate;
391 reg.abs = abs;
392 reg.address_mode = BRW_ADDRESS_DIRECT;
393 reg.pad0 = 0;
394 reg.subnr = subnr * brw_type_size_bytes(type);
395 reg.nr = nr;
396
397 /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
398 * set swizzle and writemask to W, as the lower bits of subnr will
399 * be lost when converted to align16. This is probably too much to
400 * keep track of as you'd want it adjusted by suboffset(), etc.
401 * Perhaps fix up when converting to align16?
402 */
403 reg.swizzle = swizzle;
404 reg.writemask = writemask;
405 reg.indirect_offset = 0;
406 reg.vstride = vstride;
407 reg.width = width;
408 reg.hstride = hstride;
409 reg.pad1 = 0;
410
411 reg.offset = 0;
412 reg.stride = 1;
413 if (file == IMM &&
414 type != BRW_TYPE_V &&
415 type != BRW_TYPE_UV &&
416 type != BRW_TYPE_VF) {
417 reg.stride = 0;
418 }
419
420 return reg;
421 }
422
423 /** Construct float[16] register */
424 static inline struct brw_reg
brw_vec16_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)425 brw_vec16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
426 {
427 return brw_make_reg(file,
428 nr,
429 subnr,
430 0,
431 0,
432 BRW_TYPE_F,
433 BRW_VERTICAL_STRIDE_16,
434 BRW_WIDTH_16,
435 BRW_HORIZONTAL_STRIDE_1,
436 BRW_SWIZZLE_XYZW,
437 WRITEMASK_XYZW);
438 }
439
440 /** Construct float[8] register */
441 static inline struct brw_reg
brw_vec8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)442 brw_vec8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
443 {
444 return brw_make_reg(file,
445 nr,
446 subnr,
447 0,
448 0,
449 BRW_TYPE_F,
450 BRW_VERTICAL_STRIDE_8,
451 BRW_WIDTH_8,
452 BRW_HORIZONTAL_STRIDE_1,
453 BRW_SWIZZLE_XYZW,
454 WRITEMASK_XYZW);
455 }
456
457 /** Construct float[4] register */
458 static inline struct brw_reg
brw_vec4_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)459 brw_vec4_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
460 {
461 return brw_make_reg(file,
462 nr,
463 subnr,
464 0,
465 0,
466 BRW_TYPE_F,
467 BRW_VERTICAL_STRIDE_4,
468 BRW_WIDTH_4,
469 BRW_HORIZONTAL_STRIDE_1,
470 BRW_SWIZZLE_XYZW,
471 WRITEMASK_XYZW);
472 }
473
474 /** Construct float[2] register */
475 static inline struct brw_reg
brw_vec2_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)476 brw_vec2_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
477 {
478 return brw_make_reg(file,
479 nr,
480 subnr,
481 0,
482 0,
483 BRW_TYPE_F,
484 BRW_VERTICAL_STRIDE_2,
485 BRW_WIDTH_2,
486 BRW_HORIZONTAL_STRIDE_1,
487 BRW_SWIZZLE_XYXY,
488 WRITEMASK_XY);
489 }
490
491 /** Construct float[1] register */
492 static inline struct brw_reg
brw_vec1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)493 brw_vec1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
494 {
495 return brw_make_reg(file,
496 nr,
497 subnr,
498 0,
499 0,
500 BRW_TYPE_F,
501 BRW_VERTICAL_STRIDE_0,
502 BRW_WIDTH_1,
503 BRW_HORIZONTAL_STRIDE_0,
504 BRW_SWIZZLE_XXXX,
505 WRITEMASK_X);
506 }
507
508 static inline struct brw_reg
brw_vecn_reg(unsigned width,enum brw_reg_file file,unsigned nr,unsigned subnr)509 brw_vecn_reg(unsigned width, enum brw_reg_file file,
510 unsigned nr, unsigned subnr)
511 {
512 switch (width) {
513 case 1:
514 return brw_vec1_reg(file, nr, subnr);
515 case 2:
516 return brw_vec2_reg(file, nr, subnr);
517 case 4:
518 return brw_vec4_reg(file, nr, subnr);
519 case 8:
520 return brw_vec8_reg(file, nr, subnr);
521 case 16:
522 return brw_vec16_reg(file, nr, subnr);
523 default:
524 unreachable("Invalid register width");
525 }
526 }
527
528 static inline struct brw_reg
retype(struct brw_reg reg,enum brw_reg_type type)529 retype(struct brw_reg reg, enum brw_reg_type type)
530 {
531 reg.type = type;
532 return reg;
533 }
534
535 static inline struct brw_reg
firsthalf(struct brw_reg reg)536 firsthalf(struct brw_reg reg)
537 {
538 return reg;
539 }
540
541 static inline struct brw_reg
sechalf(struct brw_reg reg)542 sechalf(struct brw_reg reg)
543 {
544 if (reg.vstride)
545 reg.nr++;
546 return reg;
547 }
548
549 static inline struct brw_reg
offset(struct brw_reg reg,unsigned delta)550 offset(struct brw_reg reg, unsigned delta)
551 {
552 reg.nr += delta;
553 return reg;
554 }
555
556
557 static inline struct brw_reg
byte_offset(struct brw_reg reg,unsigned bytes)558 byte_offset(struct brw_reg reg, unsigned bytes)
559 {
560 switch (reg.file) {
561 case BAD_FILE:
562 break;
563 case VGRF:
564 case ATTR:
565 case UNIFORM:
566 reg.offset += bytes;
567 break;
568 case ARF:
569 case FIXED_GRF: {
570 const unsigned suboffset = reg.subnr + bytes;
571 reg.nr += suboffset / REG_SIZE;
572 reg.subnr = suboffset % REG_SIZE;
573 break;
574 }
575 case IMM:
576 default:
577 assert(bytes == 0);
578 }
579 return reg;
580 }
581
582 static inline struct brw_reg
suboffset(struct brw_reg reg,unsigned delta)583 suboffset(struct brw_reg reg, unsigned delta)
584 {
585 return byte_offset(reg, delta * brw_type_size_bytes(reg.type));
586 }
587
588 /** Construct unsigned word[16] register */
589 static inline struct brw_reg
brw_uw16_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)590 brw_uw16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
591 {
592 return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_TYPE_UW), subnr);
593 }
594
595 /** Construct unsigned word[8] register */
596 static inline struct brw_reg
brw_uw8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)597 brw_uw8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
598 {
599 return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_TYPE_UW), subnr);
600 }
601
602 /** Construct unsigned word[1] register */
603 static inline struct brw_reg
brw_uw1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)604 brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
605 {
606 return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_TYPE_UW), subnr);
607 }
608
609 static inline struct brw_reg
brw_ud8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)610 brw_ud8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
611 {
612 return retype(brw_vec8_reg(file, nr, subnr), BRW_TYPE_UD);
613 }
614
615 static inline struct brw_reg
brw_ud1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)616 brw_ud1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
617 {
618 return retype(brw_vec1_reg(file, nr, subnr), BRW_TYPE_UD);
619 }
620
621 static inline struct brw_reg
brw_imm_reg(enum brw_reg_type type)622 brw_imm_reg(enum brw_reg_type type)
623 {
624 return brw_make_reg(IMM,
625 0,
626 0,
627 0,
628 0,
629 type,
630 BRW_VERTICAL_STRIDE_0,
631 BRW_WIDTH_1,
632 BRW_HORIZONTAL_STRIDE_0,
633 0,
634 0);
635 }
636
637 /** Construct float immediate register */
638 static inline struct brw_reg
brw_imm_df(double df)639 brw_imm_df(double df)
640 {
641 struct brw_reg imm = brw_imm_reg(BRW_TYPE_DF);
642 imm.df = df;
643 return imm;
644 }
645
646 static inline struct brw_reg
brw_imm_u64(uint64_t u64)647 brw_imm_u64(uint64_t u64)
648 {
649 struct brw_reg imm = brw_imm_reg(BRW_TYPE_UQ);
650 imm.u64 = u64;
651 return imm;
652 }
653
654 static inline struct brw_reg
brw_imm_f(float f)655 brw_imm_f(float f)
656 {
657 struct brw_reg imm = brw_imm_reg(BRW_TYPE_F);
658 imm.f = f;
659 return imm;
660 }
661
662 /** Construct int64_t immediate register */
663 static inline struct brw_reg
brw_imm_q(int64_t q)664 brw_imm_q(int64_t q)
665 {
666 struct brw_reg imm = brw_imm_reg(BRW_TYPE_Q);
667 imm.d64 = q;
668 return imm;
669 }
670
671 /** Construct int64_t immediate register */
672 static inline struct brw_reg
brw_imm_uq(uint64_t uq)673 brw_imm_uq(uint64_t uq)
674 {
675 struct brw_reg imm = brw_imm_reg(BRW_TYPE_UQ);
676 imm.u64 = uq;
677 return imm;
678 }
679
680 /** Construct integer immediate register */
681 static inline struct brw_reg
brw_imm_d(int d)682 brw_imm_d(int d)
683 {
684 struct brw_reg imm = brw_imm_reg(BRW_TYPE_D);
685 imm.d = d;
686 return imm;
687 }
688
689 /** Construct uint immediate register */
690 static inline struct brw_reg
brw_imm_ud(unsigned ud)691 brw_imm_ud(unsigned ud)
692 {
693 struct brw_reg imm = brw_imm_reg(BRW_TYPE_UD);
694 imm.ud = ud;
695 return imm;
696 }
697
698 /** Construct ushort immediate register */
699 static inline struct brw_reg
brw_imm_uw(uint16_t uw)700 brw_imm_uw(uint16_t uw)
701 {
702 struct brw_reg imm = brw_imm_reg(BRW_TYPE_UW);
703 imm.ud = uw | ((uint32_t)uw << 16);
704 return imm;
705 }
706
707 /** Construct short immediate register */
708 static inline struct brw_reg
brw_imm_w(int16_t w)709 brw_imm_w(int16_t w)
710 {
711 struct brw_reg imm = brw_imm_reg(BRW_TYPE_W);
712 imm.ud = (uint16_t)w | (uint32_t)(uint16_t)w << 16;
713 return imm;
714 }
715
716 /* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
717 * numbers alias with _V and _VF below:
718 */
719
720 /** Construct vector of eight signed half-byte values */
721 static inline struct brw_reg
brw_imm_v(unsigned v)722 brw_imm_v(unsigned v)
723 {
724 struct brw_reg imm = brw_imm_reg(BRW_TYPE_V);
725 imm.ud = v;
726 return imm;
727 }
728
729 /** Construct vector of eight unsigned half-byte values */
730 static inline struct brw_reg
brw_imm_uv(unsigned uv)731 brw_imm_uv(unsigned uv)
732 {
733 struct brw_reg imm = brw_imm_reg(BRW_TYPE_UV);
734 imm.ud = uv;
735 return imm;
736 }
737
738 /** Construct vector of four 8-bit float values */
739 static inline struct brw_reg
brw_imm_vf(unsigned v)740 brw_imm_vf(unsigned v)
741 {
742 struct brw_reg imm = brw_imm_reg(BRW_TYPE_VF);
743 imm.ud = v;
744 return imm;
745 }
746
747 static inline struct brw_reg
brw_imm_vf4(unsigned v0,unsigned v1,unsigned v2,unsigned v3)748 brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
749 {
750 struct brw_reg imm = brw_imm_reg(BRW_TYPE_VF);
751 imm.vstride = BRW_VERTICAL_STRIDE_0;
752 imm.width = BRW_WIDTH_4;
753 imm.hstride = BRW_HORIZONTAL_STRIDE_1;
754 imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
755 return imm;
756 }
757
758
759 static inline struct brw_reg
brw_address(struct brw_reg reg)760 brw_address(struct brw_reg reg)
761 {
762 return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
763 }
764
765 /** Construct float[1] general-purpose register */
766 static inline struct brw_reg
brw_vec1_grf(unsigned nr,unsigned subnr)767 brw_vec1_grf(unsigned nr, unsigned subnr)
768 {
769 return brw_vec1_reg(FIXED_GRF, nr, subnr);
770 }
771
772 static inline struct brw_reg
xe2_vec1_grf(unsigned nr,unsigned subnr)773 xe2_vec1_grf(unsigned nr, unsigned subnr)
774 {
775 return brw_vec1_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
776 }
777
778 /** Construct float[2] general-purpose register */
779 static inline struct brw_reg
brw_vec2_grf(unsigned nr,unsigned subnr)780 brw_vec2_grf(unsigned nr, unsigned subnr)
781 {
782 return brw_vec2_reg(FIXED_GRF, nr, subnr);
783 }
784
785 static inline struct brw_reg
xe2_vec2_grf(unsigned nr,unsigned subnr)786 xe2_vec2_grf(unsigned nr, unsigned subnr)
787 {
788 return brw_vec2_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
789 }
790
791 /** Construct float[4] general-purpose register */
792 static inline struct brw_reg
brw_vec4_grf(unsigned nr,unsigned subnr)793 brw_vec4_grf(unsigned nr, unsigned subnr)
794 {
795 return brw_vec4_reg(FIXED_GRF, nr, subnr);
796 }
797
798 static inline struct brw_reg
xe2_vec4_grf(unsigned nr,unsigned subnr)799 xe2_vec4_grf(unsigned nr, unsigned subnr)
800 {
801 return brw_vec4_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
802 }
803
804 /** Construct float[8] general-purpose register */
805 static inline struct brw_reg
brw_vec8_grf(unsigned nr,unsigned subnr)806 brw_vec8_grf(unsigned nr, unsigned subnr)
807 {
808 return brw_vec8_reg(FIXED_GRF, nr, subnr);
809 }
810
811 static inline struct brw_reg
xe2_vec8_grf(unsigned nr,unsigned subnr)812 xe2_vec8_grf(unsigned nr, unsigned subnr)
813 {
814 return brw_vec8_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
815 }
816
817 /** Construct float[16] general-purpose register */
818 static inline struct brw_reg
brw_vec16_grf(unsigned nr,unsigned subnr)819 brw_vec16_grf(unsigned nr, unsigned subnr)
820 {
821 return brw_vec16_reg(FIXED_GRF, nr, subnr);
822 }
823
824 static inline struct brw_reg
xe2_vec16_grf(unsigned nr,unsigned subnr)825 xe2_vec16_grf(unsigned nr, unsigned subnr)
826 {
827 return brw_vec16_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
828 }
829
830 static inline struct brw_reg
brw_vecn_grf(unsigned width,unsigned nr,unsigned subnr)831 brw_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
832 {
833 return brw_vecn_reg(width, FIXED_GRF, nr, subnr);
834 }
835
836 static inline struct brw_reg
xe2_vecn_grf(unsigned width,unsigned nr,unsigned subnr)837 xe2_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
838 {
839 return brw_vecn_reg(width, FIXED_GRF, nr + subnr / 8, subnr % 8);
840 }
841
842 static inline struct brw_reg
brw_uw1_grf(unsigned nr,unsigned subnr)843 brw_uw1_grf(unsigned nr, unsigned subnr)
844 {
845 return brw_uw1_reg(FIXED_GRF, nr, subnr);
846 }
847
848 static inline struct brw_reg
brw_uw8_grf(unsigned nr,unsigned subnr)849 brw_uw8_grf(unsigned nr, unsigned subnr)
850 {
851 return brw_uw8_reg(FIXED_GRF, nr, subnr);
852 }
853
854 static inline struct brw_reg
brw_uw16_grf(unsigned nr,unsigned subnr)855 brw_uw16_grf(unsigned nr, unsigned subnr)
856 {
857 return brw_uw16_reg(FIXED_GRF, nr, subnr);
858 }
859
860 static inline struct brw_reg
brw_ud8_grf(unsigned nr,unsigned subnr)861 brw_ud8_grf(unsigned nr, unsigned subnr)
862 {
863 return brw_ud8_reg(FIXED_GRF, nr, subnr);
864 }
865
866 static inline struct brw_reg
brw_ud1_grf(unsigned nr,unsigned subnr)867 brw_ud1_grf(unsigned nr, unsigned subnr)
868 {
869 return brw_ud1_reg(FIXED_GRF, nr, subnr);
870 }
871
872
873 /** Construct null register (usually used for setting condition codes) */
874 static inline struct brw_reg
brw_null_reg(void)875 brw_null_reg(void)
876 {
877 return brw_vec8_reg(ARF, BRW_ARF_NULL, 0);
878 }
879
880 static inline struct brw_reg
brw_null_vec(unsigned width)881 brw_null_vec(unsigned width)
882 {
883 return brw_vecn_reg(width, ARF, BRW_ARF_NULL, 0);
884 }
885
886 static inline struct brw_reg
brw_address_reg(unsigned subnr)887 brw_address_reg(unsigned subnr)
888 {
889 return brw_uw1_reg(ARF, BRW_ARF_ADDRESS, subnr);
890 }
891
892 static inline struct brw_reg
brw_tdr_reg(void)893 brw_tdr_reg(void)
894 {
895 return brw_uw1_reg(ARF, BRW_ARF_TDR, 0);
896 }
897
898 /* If/else instructions break in align16 mode if writemask & swizzle
899 * aren't xyzw. This goes against the convention for other scalar
900 * regs:
901 */
902 static inline struct brw_reg
brw_ip_reg(void)903 brw_ip_reg(void)
904 {
905 return brw_make_reg(ARF,
906 BRW_ARF_IP,
907 0,
908 0,
909 0,
910 BRW_TYPE_UD,
911 BRW_VERTICAL_STRIDE_4, /* ? */
912 BRW_WIDTH_1,
913 BRW_HORIZONTAL_STRIDE_0,
914 BRW_SWIZZLE_XYZW, /* NOTE! */
915 WRITEMASK_XYZW); /* NOTE! */
916 }
917
918 static inline struct brw_reg
brw_notification_reg(void)919 brw_notification_reg(void)
920 {
921 return brw_make_reg(ARF,
922 BRW_ARF_NOTIFICATION_COUNT,
923 0,
924 0,
925 0,
926 BRW_TYPE_UD,
927 BRW_VERTICAL_STRIDE_0,
928 BRW_WIDTH_1,
929 BRW_HORIZONTAL_STRIDE_0,
930 BRW_SWIZZLE_XXXX,
931 WRITEMASK_X);
932 }
933
934 static inline struct brw_reg
brw_cr0_reg(unsigned subnr)935 brw_cr0_reg(unsigned subnr)
936 {
937 return brw_ud1_reg(ARF, BRW_ARF_CONTROL, subnr);
938 }
939
940 static inline struct brw_reg
brw_sr0_reg(unsigned subnr)941 brw_sr0_reg(unsigned subnr)
942 {
943 return brw_ud1_reg(ARF, BRW_ARF_STATE, subnr);
944 }
945
946 static inline struct brw_reg
brw_acc_reg(unsigned width)947 brw_acc_reg(unsigned width)
948 {
949 return brw_vecn_reg(width, ARF,
950 BRW_ARF_ACCUMULATOR, 0);
951 }
952
953 static inline struct brw_reg
brw_flag_reg(int reg,int subreg)954 brw_flag_reg(int reg, int subreg)
955 {
956 return brw_uw1_reg(ARF,
957 BRW_ARF_FLAG + reg, subreg);
958 }
959
960 static inline struct brw_reg
brw_flag_subreg(unsigned subreg)961 brw_flag_subreg(unsigned subreg)
962 {
963 return brw_uw1_reg(ARF,
964 BRW_ARF_FLAG + subreg / 2, subreg % 2);
965 }
966
967 /**
968 * Return the mask register present in Gfx4-5, or the related register present
969 * in Gfx7.5 and later hardware referred to as "channel enable" register in
970 * the documentation.
971 */
972 static inline struct brw_reg
brw_mask_reg(unsigned subnr)973 brw_mask_reg(unsigned subnr)
974 {
975 return brw_uw1_reg(ARF, BRW_ARF_MASK, subnr);
976 }
977
978 static inline struct brw_reg
brw_vmask_reg()979 brw_vmask_reg()
980 {
981 return brw_sr0_reg(3);
982 }
983
984 static inline struct brw_reg
brw_dmask_reg()985 brw_dmask_reg()
986 {
987 return brw_sr0_reg(2);
988 }
989
990 static inline struct brw_reg
brw_vgrf(unsigned nr,enum brw_reg_type type)991 brw_vgrf(unsigned nr, enum brw_reg_type type)
992 {
993 struct brw_reg reg = {};
994 reg.file = VGRF;
995 reg.nr = nr;
996 reg.type = type;
997 reg.stride = 1;
998 return reg;
999 }
1000
1001 static inline struct brw_reg
brw_attr_reg(unsigned nr,enum brw_reg_type type)1002 brw_attr_reg(unsigned nr, enum brw_reg_type type)
1003 {
1004 struct brw_reg reg = {};
1005 reg.file = ATTR;
1006 reg.nr = nr;
1007 reg.type = type;
1008 reg.stride = 1;
1009 return reg;
1010 }
1011
1012 static inline struct brw_reg
brw_uniform_reg(unsigned nr,enum brw_reg_type type)1013 brw_uniform_reg(unsigned nr, enum brw_reg_type type)
1014 {
1015 struct brw_reg reg = {};
1016 reg.file = UNIFORM;
1017 reg.nr = nr;
1018 reg.type = type;
1019 reg.stride = 0;
1020 return reg;
1021 }
1022
1023 /* This is almost always called with a numeric constant argument, so
1024 * make things easy to evaluate at compile time:
1025 */
cvt(unsigned val)1026 static inline unsigned cvt(unsigned val)
1027 {
1028 switch (val) {
1029 case 0: return 0;
1030 case 1: return 1;
1031 case 2: return 2;
1032 case 4: return 3;
1033 case 8: return 4;
1034 case 16: return 5;
1035 case 32: return 6;
1036 }
1037 return 0;
1038 }
1039
1040 static inline struct brw_reg
stride(struct brw_reg reg,unsigned vstride,unsigned width,unsigned hstride)1041 stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride)
1042 {
1043 reg.vstride = cvt(vstride);
1044 reg.width = cvt(width) - 1;
1045 reg.hstride = cvt(hstride);
1046 return reg;
1047 }
1048
1049 /**
1050 * Multiply the vertical and horizontal stride of a register by the given
1051 * factor \a s.
1052 */
1053 static inline struct brw_reg
spread(struct brw_reg reg,unsigned s)1054 spread(struct brw_reg reg, unsigned s)
1055 {
1056 if (s) {
1057 assert(util_is_power_of_two_nonzero(s));
1058
1059 if (reg.hstride)
1060 reg.hstride += cvt(s) - 1;
1061
1062 if (reg.vstride)
1063 reg.vstride += cvt(s) - 1;
1064
1065 return reg;
1066 } else {
1067 return stride(reg, 0, 1, 0);
1068 }
1069 }
1070
1071 /**
1072 * Reinterpret each channel of register \p reg as a vector of values of the
1073 * given smaller type and take the i-th subcomponent from each.
1074 */
1075 static inline struct brw_reg
subscript(struct brw_reg reg,enum brw_reg_type type,unsigned i)1076 subscript(struct brw_reg reg, enum brw_reg_type type, unsigned i)
1077 {
1078 assert((i + 1) * brw_type_size_bytes(type) <= brw_type_size_bytes(reg.type));
1079
1080 if (reg.file == ARF || reg.file == FIXED_GRF) {
1081 /* The stride is encoded inconsistently for fixed GRF and ARF registers
1082 * as the log2 of the actual vertical and horizontal strides.
1083 */
1084 const int delta = util_logbase2(brw_type_size_bytes(reg.type)) -
1085 util_logbase2(brw_type_size_bytes(type));
1086 reg.hstride += (reg.hstride ? delta : 0);
1087 reg.vstride += (reg.vstride ? delta : 0);
1088
1089 } else if (reg.file == IMM) {
1090 unsigned bit_size = brw_type_size_bits(type);
1091 reg.u64 >>= i * bit_size;
1092 reg.u64 &= BITFIELD64_MASK(bit_size);
1093 if (bit_size <= 16)
1094 reg.u64 |= reg.u64 << 16;
1095 return retype(reg, type);
1096 } else {
1097 reg.stride *= brw_type_size_bytes(reg.type) / brw_type_size_bytes(type);
1098 }
1099
1100 return byte_offset(retype(reg, type), i * brw_type_size_bytes(type));
1101 }
1102
1103 static inline struct brw_reg
vec16(struct brw_reg reg)1104 vec16(struct brw_reg reg)
1105 {
1106 return stride(reg, 16,16,1);
1107 }
1108
1109 static inline struct brw_reg
vec8(struct brw_reg reg)1110 vec8(struct brw_reg reg)
1111 {
1112 return stride(reg, 8,8,1);
1113 }
1114
1115 static inline struct brw_reg
vec4(struct brw_reg reg)1116 vec4(struct brw_reg reg)
1117 {
1118 return stride(reg, 4,4,1);
1119 }
1120
1121 static inline struct brw_reg
vec2(struct brw_reg reg)1122 vec2(struct brw_reg reg)
1123 {
1124 return stride(reg, 2,2,1);
1125 }
1126
1127 static inline struct brw_reg
vec1(struct brw_reg reg)1128 vec1(struct brw_reg reg)
1129 {
1130 return stride(reg, 0,1,0);
1131 }
1132
1133
1134 static inline struct brw_reg
get_element(struct brw_reg reg,unsigned elt)1135 get_element(struct brw_reg reg, unsigned elt)
1136 {
1137 return vec1(suboffset(reg, elt));
1138 }
1139
1140 static inline struct brw_reg
get_element_ud(struct brw_reg reg,unsigned elt)1141 get_element_ud(struct brw_reg reg, unsigned elt)
1142 {
1143 return vec1(suboffset(retype(reg, BRW_TYPE_UD), elt));
1144 }
1145
1146 static inline struct brw_reg
get_element_d(struct brw_reg reg,unsigned elt)1147 get_element_d(struct brw_reg reg, unsigned elt)
1148 {
1149 return vec1(suboffset(retype(reg, BRW_TYPE_D), elt));
1150 }
1151
1152 static inline struct brw_reg
brw_swizzle(struct brw_reg reg,unsigned swz)1153 brw_swizzle(struct brw_reg reg, unsigned swz)
1154 {
1155 if (reg.file == IMM)
1156 reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swz);
1157 else
1158 reg.swizzle = brw_compose_swizzle(swz, reg.swizzle);
1159
1160 return reg;
1161 }
1162
1163 static inline struct brw_reg
brw_writemask(struct brw_reg reg,unsigned mask)1164 brw_writemask(struct brw_reg reg, unsigned mask)
1165 {
1166 assert(reg.file != IMM);
1167 reg.writemask &= mask;
1168 return reg;
1169 }
1170
1171 static inline struct brw_reg
brw_set_writemask(struct brw_reg reg,unsigned mask)1172 brw_set_writemask(struct brw_reg reg, unsigned mask)
1173 {
1174 assert(reg.file != IMM);
1175 reg.writemask = mask;
1176 return reg;
1177 }
1178
1179 static inline unsigned
brw_writemask_for_size(unsigned n)1180 brw_writemask_for_size(unsigned n)
1181 {
1182 return (1 << n) - 1;
1183 }
1184
1185 static inline unsigned
brw_writemask_for_component_packing(unsigned n,unsigned first_component)1186 brw_writemask_for_component_packing(unsigned n, unsigned first_component)
1187 {
1188 assert(first_component + n <= 4);
1189 return (((1 << n) - 1) << first_component);
1190 }
1191
1192 static inline struct brw_reg
negate(struct brw_reg reg)1193 negate(struct brw_reg reg)
1194 {
1195 reg.negate ^= 1;
1196 return reg;
1197 }
1198
1199 static inline struct brw_reg
brw_abs(struct brw_reg reg)1200 brw_abs(struct brw_reg reg)
1201 {
1202 reg.abs = 1;
1203 reg.negate = 0;
1204 return reg;
1205 }
1206
1207 /************************************************************************/
1208
1209 static inline struct brw_reg
brw_vec1_indirect(unsigned subnr,int offset)1210 brw_vec1_indirect(unsigned subnr, int offset)
1211 {
1212 struct brw_reg reg = brw_vec1_grf(0, 0);
1213 reg.subnr = subnr;
1214 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1215 reg.indirect_offset = offset;
1216 return reg;
1217 }
1218
1219 static inline struct brw_reg
brw_VxH_indirect(unsigned subnr,int offset)1220 brw_VxH_indirect(unsigned subnr, int offset)
1221 {
1222 struct brw_reg reg = brw_vec1_grf(0, 0);
1223 reg.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL;
1224 reg.subnr = subnr;
1225 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1226 reg.indirect_offset = offset;
1227 return reg;
1228 }
1229
1230 static inline bool
region_matches(struct brw_reg reg,enum brw_vertical_stride v,enum brw_width w,enum brw_horizontal_stride h)1231 region_matches(struct brw_reg reg, enum brw_vertical_stride v,
1232 enum brw_width w, enum brw_horizontal_stride h)
1233 {
1234 return reg.vstride == v &&
1235 reg.width == w &&
1236 reg.hstride == h;
1237 }
1238
1239 #define has_scalar_region(reg) \
1240 region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \
1241 BRW_HORIZONTAL_STRIDE_0)
1242
1243 /**
1244 * Return the size in bytes per data element of register \p reg on the
1245 * corresponding register file.
1246 */
1247 static inline unsigned
element_sz(struct brw_reg reg)1248 element_sz(struct brw_reg reg)
1249 {
1250 if (reg.file == IMM || has_scalar_region(reg)) {
1251 return brw_type_size_bytes(reg.type);
1252
1253 } else if (reg.width == BRW_WIDTH_1 &&
1254 reg.hstride == BRW_HORIZONTAL_STRIDE_0) {
1255 assert(reg.vstride != BRW_VERTICAL_STRIDE_0);
1256 return brw_type_size_bytes(reg.type) << (reg.vstride - 1);
1257
1258 } else {
1259 assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0);
1260 assert(reg.vstride == reg.hstride + reg.width);
1261 return brw_type_size_bytes(reg.type) << (reg.hstride - 1);
1262 }
1263 }
1264
1265 /* brw_packed_float.c */
1266 int brw_float_to_vf(float f);
1267 float brw_vf_to_float(unsigned char vf);
1268
1269 bool brw_reg_saturate_immediate(brw_reg *reg);
1270 bool brw_reg_negate_immediate(brw_reg *reg);
1271 bool brw_reg_abs_immediate(brw_reg *reg);
1272
1273 #ifdef __cplusplus
1274 }
1275 #endif
1276
1277 #ifdef __cplusplus
1278
1279 static inline brw_reg
horiz_offset(const brw_reg & reg,unsigned delta)1280 horiz_offset(const brw_reg ®, unsigned delta)
1281 {
1282 switch (reg.file) {
1283 case BAD_FILE:
1284 case UNIFORM:
1285 case IMM:
1286 /* These only have a single component that is implicitly splatted. A
1287 * horizontal offset should be a harmless no-op.
1288 * XXX - Handle vector immediates correctly.
1289 */
1290 return reg;
1291 case VGRF:
1292 case ATTR:
1293 return byte_offset(reg, delta * reg.stride * brw_type_size_bytes(reg.type));
1294 case ARF:
1295 case FIXED_GRF:
1296 if (reg.is_null()) {
1297 return reg;
1298 } else {
1299 const unsigned hstride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
1300 const unsigned vstride = reg.vstride ? 1 << (reg.vstride - 1) : 0;
1301 const unsigned width = 1 << reg.width;
1302
1303 if (delta % width == 0) {
1304 return byte_offset(reg, delta / width * vstride * brw_type_size_bytes(reg.type));
1305 } else {
1306 assert(vstride == hstride * width);
1307 return byte_offset(reg, delta * hstride * brw_type_size_bytes(reg.type));
1308 }
1309 }
1310 }
1311 unreachable("Invalid register file");
1312 }
1313
1314 static inline brw_reg
offset(brw_reg reg,unsigned width,unsigned delta)1315 offset(brw_reg reg, unsigned width, unsigned delta)
1316 {
1317 switch (reg.file) {
1318 case BAD_FILE:
1319 break;
1320 case ARF:
1321 case FIXED_GRF:
1322 case VGRF:
1323 case ATTR:
1324 case UNIFORM:
1325 return byte_offset(reg, delta * reg.component_size(width));
1326 case IMM:
1327 assert(delta == 0);
1328 }
1329 return reg;
1330 }
1331
1332 /**
1333 * Get the scalar channel of \p reg given by \p idx and replicate it to all
1334 * channels of the result.
1335 */
1336 static inline brw_reg
component(brw_reg reg,unsigned idx)1337 component(brw_reg reg, unsigned idx)
1338 {
1339 reg = horiz_offset(reg, idx);
1340 reg.stride = 0;
1341 if (reg.file == ARF || reg.file == FIXED_GRF) {
1342 reg.vstride = BRW_VERTICAL_STRIDE_0;
1343 reg.width = BRW_WIDTH_1;
1344 reg.hstride = BRW_HORIZONTAL_STRIDE_0;
1345 }
1346 return reg;
1347 }
1348
1349 /**
1350 * Return an integer identifying the discrete address space a register is
1351 * contained in. A register is by definition fully contained in the single
1352 * reg_space it belongs to, so two registers with different reg_space ids are
1353 * guaranteed not to overlap. Most register files are a single reg_space of
1354 * its own, only the VGRF and ATTR files are composed of multiple discrete
1355 * address spaces, one for each allocation and input attribute respectively.
1356 */
1357 static inline uint32_t
reg_space(const brw_reg & r)1358 reg_space(const brw_reg &r)
1359 {
1360 return r.file << 16 | (r.file == VGRF || r.file == ATTR ? r.nr : 0);
1361 }
1362
1363 /**
1364 * Return the base offset in bytes of a register relative to the start of its
1365 * reg_space().
1366 */
1367 static inline unsigned
reg_offset(const brw_reg & r)1368 reg_offset(const brw_reg &r)
1369 {
1370 return (r.file == VGRF || r.file == IMM || r.file == ATTR ? 0 : r.nr) *
1371 (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
1372 (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
1373 }
1374
1375 /**
1376 * Return the amount of padding in bytes left unused between individual
1377 * components of register \p r due to a (horizontal) stride value greater than
1378 * one, or zero if components are tightly packed in the register file.
1379 */
1380 static inline unsigned
reg_padding(const brw_reg & r)1381 reg_padding(const brw_reg &r)
1382 {
1383 const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride :
1384 r.hstride == 0 ? 0 :
1385 1 << (r.hstride - 1));
1386 return (MAX2(1, stride) - 1) * brw_type_size_bytes(r.type);
1387 }
1388
1389 /**
1390 * Return whether the register region starting at \p r and spanning \p dr
1391 * bytes could potentially overlap the register region starting at \p s and
1392 * spanning \p ds bytes.
1393 */
1394 static inline bool
regions_overlap(const brw_reg & r,unsigned dr,const brw_reg & s,unsigned ds)1395 regions_overlap(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds)
1396 {
1397 if (r.file != s.file)
1398 return false;
1399
1400 if (r.file == VGRF) {
1401 return r.nr == s.nr &&
1402 !(r.offset + dr <= s.offset || s.offset + ds <= r.offset);
1403 } else {
1404 return !(reg_offset(r) + dr <= reg_offset(s) ||
1405 reg_offset(s) + ds <= reg_offset(r));
1406 }
1407 }
1408
1409 /**
1410 * Check that the register region given by r [r.offset, r.offset + dr[
1411 * is fully contained inside the register region given by s
1412 * [s.offset, s.offset + ds[.
1413 */
1414 static inline bool
region_contained_in(const brw_reg & r,unsigned dr,const brw_reg & s,unsigned ds)1415 region_contained_in(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds)
1416 {
1417 return reg_space(r) == reg_space(s) &&
1418 reg_offset(r) >= reg_offset(s) &&
1419 reg_offset(r) + dr <= reg_offset(s) + ds;
1420 }
1421
1422 /**
1423 * Return whether the given register region is n-periodic, i.e. whether the
1424 * original region remains invariant after shifting it by \p n scalar
1425 * channels.
1426 */
1427 static inline bool
is_periodic(const brw_reg & reg,unsigned n)1428 is_periodic(const brw_reg ®, unsigned n)
1429 {
1430 if (reg.file == BAD_FILE || reg.is_null()) {
1431 return true;
1432
1433 } else if (reg.file == IMM) {
1434 const unsigned period = (reg.type == BRW_TYPE_UV ||
1435 reg.type == BRW_TYPE_V ? 8 :
1436 reg.type == BRW_TYPE_VF ? 4 :
1437 1);
1438 return n % period == 0;
1439
1440 } else if (reg.file == ARF || reg.file == FIXED_GRF) {
1441 const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 :
1442 reg.vstride == 0 ? 1 << reg.width :
1443 ~0);
1444 return n % period == 0;
1445
1446 } else {
1447 return reg.stride == 0;
1448 }
1449 }
1450
1451 static inline bool
is_uniform(const brw_reg & reg)1452 is_uniform(const brw_reg ®)
1453 {
1454 return is_periodic(reg, 1);
1455 }
1456
1457 /**
1458 * Get the specified 8-component quarter of a register.
1459 */
1460 static inline brw_reg
quarter(const brw_reg & reg,unsigned idx)1461 quarter(const brw_reg ®, unsigned idx)
1462 {
1463 assert(idx < 4);
1464 return horiz_offset(reg, 8 * idx);
1465 }
1466
1467 static inline brw_reg
horiz_stride(brw_reg reg,unsigned s)1468 horiz_stride(brw_reg reg, unsigned s)
1469 {
1470 reg.stride *= s;
1471 return reg;
1472 }
1473
1474 static const brw_reg reg_undef;
1475
1476 /*
1477 * Return the stride between channels of the specified register in
1478 * byte units, or ~0u if the region cannot be represented with a
1479 * single one-dimensional stride.
1480 */
1481 static inline unsigned
byte_stride(const brw_reg & reg)1482 byte_stride(const brw_reg ®)
1483 {
1484 switch (reg.file) {
1485 case BAD_FILE:
1486 case UNIFORM:
1487 case IMM:
1488 case VGRF:
1489 case ATTR:
1490 return reg.stride * brw_type_size_bytes(reg.type);
1491 case ARF:
1492 case FIXED_GRF:
1493 if (reg.is_null()) {
1494 return 0;
1495 } else {
1496 const unsigned hstride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
1497 const unsigned vstride = reg.vstride ? 1 << (reg.vstride - 1) : 0;
1498 const unsigned width = 1 << reg.width;
1499
1500 if (width == 1) {
1501 return vstride * brw_type_size_bytes(reg.type);
1502 } else if (hstride * width == vstride) {
1503 return hstride * brw_type_size_bytes(reg.type);
1504 } else {
1505 return ~0u;
1506 }
1507 }
1508 default:
1509 unreachable("Invalid register file");
1510 }
1511 }
1512
1513 #endif /* __cplusplus */
1514
1515 #endif
1516