xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/elk/elk_fs_lower_pack.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015 Connor Abbott
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/half_float.h"
25 #include "elk_fs.h"
26 #include "elk_cfg.h"
27 #include "elk_fs_builder.h"
28 
29 using namespace elk;
30 
31 bool
lower_pack()32 elk_fs_visitor::lower_pack()
33 {
34    bool progress = false;
35 
36    foreach_block_and_inst_safe(block, elk_fs_inst, inst, cfg) {
37       if (inst->opcode != ELK_FS_OPCODE_PACK &&
38           inst->opcode != ELK_FS_OPCODE_PACK_HALF_2x16_SPLIT)
39          continue;
40 
41       assert(inst->dst.file == VGRF);
42       assert(inst->saturate == false);
43       elk_fs_reg dst = inst->dst;
44 
45       const fs_builder ibld(this, block, inst);
46       /* The lowering generates 2 instructions for what was previously 1. This
47        * can trick the IR to believe we're doing partial writes, but the
48        * register is actually fully written. Mark it as undef to help the IR
49        * reduce the liveness of the register.
50        */
51       if (!inst->is_partial_write())
52          ibld.emit_undef_for_dst(inst);
53 
54       switch (inst->opcode) {
55       case ELK_FS_OPCODE_PACK:
56          for (unsigned i = 0; i < inst->sources; i++)
57             ibld.MOV(subscript(dst, inst->src[i].type, i), inst->src[i]);
58          break;
59       case ELK_FS_OPCODE_PACK_HALF_2x16_SPLIT:
60          assert(dst.type == ELK_REGISTER_TYPE_UD);
61 
62          for (unsigned i = 0; i < inst->sources; i++) {
63             if (inst->src[i].file == IMM) {
64                const uint32_t half = _mesa_float_to_half(inst->src[i].f);
65                ibld.MOV(subscript(dst, ELK_REGISTER_TYPE_UW, i),
66                         elk_imm_uw(half));
67             } else if (i == 1) {
68                /* Pre-Skylake requires DWord aligned destinations */
69                elk_fs_reg tmp = ibld.vgrf(ELK_REGISTER_TYPE_UD);
70                ibld.F32TO16(subscript(tmp, ELK_REGISTER_TYPE_HF, 0),
71                             inst->src[i]);
72                ibld.MOV(subscript(dst, ELK_REGISTER_TYPE_UW, 1),
73                         subscript(tmp, ELK_REGISTER_TYPE_UW, 0));
74             } else {
75                ibld.F32TO16(subscript(dst, ELK_REGISTER_TYPE_HF, i),
76                             inst->src[i]);
77             }
78          }
79          break;
80       default:
81          unreachable("skipped above");
82       }
83 
84       inst->remove(block);
85       progress = true;
86    }
87 
88    if (progress)
89       invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
90 
91    return progress;
92 }
93