xref: /aosp_15_r20/external/mesa3d/src/amd/llvm/ac_llvm_helper.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include <llvm-c/Core.h>
8 #include <llvm/Analysis/TargetLibraryInfo.h>
9 #include <llvm/IR/IRBuilder.h>
10 #include <llvm/IR/LegacyPassManager.h>
11 #include <llvm/IR/Module.h>
12 #include <llvm/IR/Verifier.h>
13 #include <llvm/Target/TargetMachine.h>
14 #include <llvm/MC/MCSubtargetInfo.h>
15 #include <llvm/Support/CommandLine.h>
16 #include <llvm/Transforms/IPO.h>
17 #include <llvm/Transforms/Scalar.h>
18 #include <llvm/Transforms/Utils.h>
19 #include <llvm/CodeGen/Passes.h>
20 #include <llvm/Transforms/IPO/AlwaysInliner.h>
21 #include <llvm/Transforms/InstCombine/InstCombine.h>
22 #include <llvm/Transforms/IPO/SCCP.h>
23 #include "llvm/CodeGen/SelectionDAGNodes.h"
24 
25 #include <cstring>
26 
27 /* DO NOT REORDER THE HEADERS
28  * The LLVM headers need to all be included before any Mesa header,
29  * as they use the `restrict` keyword in ways that are incompatible
30  * with our #define in include/c99_compat.h
31  */
32 
33 #include "ac_binary.h"
34 #include "ac_llvm_util.h"
35 #include "ac_llvm_build.h"
36 #include "util/macros.h"
37 
38 using namespace llvm;
39 
40 class RunAtExitForStaticDestructors : public SDNode
41 {
42 public:
43    /* getSDVTList (protected) calls getValueTypeList (private), which contains static variables. */
RunAtExitForStaticDestructors()44    RunAtExitForStaticDestructors(): SDNode(0, 0, DebugLoc(), getSDVTList(MVT::Other))
45    {
46    }
47 };
48 
ac_llvm_run_atexit_for_destructors(void)49 void ac_llvm_run_atexit_for_destructors(void)
50 {
51    /* LLVM >= 16 registers static variable destructors on the first compile, which gcc
52     * implements by calling atexit there. Before that, u_queue registers its atexit
53     * handler to kill all threads. Since exit() runs atexit handlers in the reverse order,
54     * the LLVM destructors are called first while shader compiler threads may still be
55     * running, which crashes in LLVM in SelectionDAG.cpp.
56     *
57     * The solution is to run the code that declares the LLVM static variables first,
58     * so that atexit for LLVM is registered first and u_queue is registered after that,
59     * which ensures that all u_queue threads are terminated before LLVM destructors are
60     * called.
61     *
62     * This just executes the code that declares static variables.
63     */
64    RunAtExitForStaticDestructors();
65 }
66 
ac_is_llvm_processor_supported(LLVMTargetMachineRef tm,const char * processor)67 bool ac_is_llvm_processor_supported(LLVMTargetMachineRef tm, const char *processor)
68 {
69    TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
70    return TM->getMCSubtargetInfo()->isCPUStringValid(processor);
71 }
72 
ac_reset_llvm_all_options_occurrences()73 void ac_reset_llvm_all_options_occurrences()
74 {
75    cl::ResetAllOptionOccurrences();
76 }
77 
ac_add_attr_dereferenceable(LLVMValueRef val,uint64_t bytes)78 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
79 {
80    Argument *A = unwrap<Argument>(val);
81    A->addAttr(Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
82 }
83 
ac_add_attr_alignment(LLVMValueRef val,uint64_t bytes)84 void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
85 {
86    Argument *A = unwrap<Argument>(val);
87    A->addAttr(Attribute::getWithAlignment(A->getContext(), Align(bytes)));
88 }
89 
ac_is_sgpr_param(LLVMValueRef arg)90 bool ac_is_sgpr_param(LLVMValueRef arg)
91 {
92    Argument *A = unwrap<Argument>(arg);
93    AttributeList AS = A->getParent()->getAttributes();
94    unsigned ArgNo = A->getArgNo();
95    return AS.hasParamAttr(ArgNo, Attribute::InReg);
96 }
97 
ac_create_module(LLVMTargetMachineRef tm,LLVMContextRef ctx)98 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
99 {
100    TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
101    LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
102 
103    unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
104    unwrap(module)->setDataLayout(TM->createDataLayout());
105    return module;
106 }
107 
ac_create_builder(LLVMContextRef ctx,enum ac_float_mode float_mode)108 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
109 {
110    LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
111 
112    FastMathFlags flags;
113 
114    switch (float_mode) {
115    case AC_FLOAT_MODE_DEFAULT:
116    case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
117       break;
118 
119    case AC_FLOAT_MODE_DEFAULT_OPENGL:
120       /* Allow optimizations to treat the sign of a zero argument or
121        * result as insignificant.
122        */
123       flags.setNoSignedZeros(); /* nsz */
124 
125       /* Allow optimizations to use the reciprocal of an argument
126        * rather than perform division.
127        */
128       flags.setAllowReciprocal(); /* arcp */
129 
130       unwrap(builder)->setFastMathFlags(flags);
131       break;
132    }
133 
134    return builder;
135 }
136 
ac_enable_signed_zeros(struct ac_llvm_context * ctx)137 void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
138 {
139    if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
140       auto *b = unwrap(ctx->builder);
141       FastMathFlags flags = b->getFastMathFlags();
142 
143       /* This disables the optimization of (x + 0), which is used
144        * to convert negative zero to positive zero.
145        */
146       flags.setNoSignedZeros(false);
147       b->setFastMathFlags(flags);
148    }
149 }
150 
ac_disable_signed_zeros(struct ac_llvm_context * ctx)151 void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
152 {
153    if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
154       auto *b = unwrap(ctx->builder);
155       FastMathFlags flags = b->getFastMathFlags();
156 
157       flags.setNoSignedZeros();
158       b->setFastMathFlags(flags);
159    }
160 }
161 
ac_create_target_library_info(const char * triple)162 LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple)
163 {
164    return reinterpret_cast<LLVMTargetLibraryInfoRef>(
165       new TargetLibraryInfoImpl(Triple(triple)));
166 }
167 
ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)168 void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
169 {
170    delete reinterpret_cast<TargetLibraryInfoImpl *>(library_info);
171 }
172 
173 /* Implementation of raw_pwrite_stream that works on malloc()ed memory for
174  * better compatibility with C code. */
175 struct raw_memory_ostream : public raw_pwrite_stream {
176    char *buffer;
177    size_t written;
178    size_t bufsize;
179 
raw_memory_ostreamraw_memory_ostream180    raw_memory_ostream()
181    {
182       buffer = NULL;
183       written = 0;
184       bufsize = 0;
185       SetUnbuffered();
186    }
187 
~raw_memory_ostreamraw_memory_ostream188    ~raw_memory_ostream()
189    {
190       free(buffer);
191    }
192 
clearraw_memory_ostream193    void clear()
194    {
195       written = 0;
196    }
197 
takeraw_memory_ostream198    void take(char *&out_buffer, size_t &out_size)
199    {
200       out_buffer = buffer;
201       out_size = written;
202       buffer = NULL;
203       written = 0;
204       bufsize = 0;
205    }
206 
207    void flush() = delete;
208 
write_implraw_memory_ostream209    void write_impl(const char *ptr, size_t size) override
210    {
211       if (unlikely(written + size < written))
212          abort();
213       if (written + size > bufsize) {
214          bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
215          buffer = (char *)realloc(buffer, bufsize);
216          if (!buffer) {
217             fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
218             abort();
219          }
220       }
221       memcpy(buffer + written, ptr, size);
222       written += size;
223    }
224 
pwrite_implraw_memory_ostream225    void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
226    {
227       assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
228       memcpy(buffer + offset, ptr, size);
229    }
230 
current_posraw_memory_ostream231    uint64_t current_pos() const override
232    {
233       return written;
234    }
235 };
236 
237 /* The LLVM compiler is represented as a pass manager containing passes for
238  * optimizations, instruction selection, and code generation.
239  */
240 struct ac_compiler_passes {
241    raw_memory_ostream ostream;        /* ELF shader binary stream */
242    legacy::PassManager passmgr; /* list of passes */
243 };
244 
ac_create_llvm_passes(LLVMTargetMachineRef tm)245 struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
246 {
247    struct ac_compiler_passes *p = new ac_compiler_passes();
248    if (!p)
249       return NULL;
250 
251    TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
252 
253    if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr,
254 #if LLVM_VERSION_MAJOR >= 18
255                                CodeGenFileType::ObjectFile)) {
256 #else
257                                CGFT_ObjectFile)) {
258 #endif
259       fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
260       delete p;
261       return NULL;
262    }
263    return p;
264 }
265 
266 void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
267 {
268    delete p;
269 }
270 
271 /* This returns false on failure. */
272 bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
273                               char **pelf_buffer, size_t *pelf_size)
274 {
275    p->passmgr.run(*unwrap(module));
276    p->ostream.take(*pelf_buffer, *pelf_size);
277    return true;
278 }
279 
280 LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
281                                      bool check_ir)
282 {
283    LLVMPassManagerRef passmgr = LLVMCreatePassManager();
284    if (!passmgr)
285       return NULL;
286 
287    if (target_library_info)
288       LLVMAddTargetLibraryInfo(target_library_info, passmgr);
289 
290    if (check_ir)
291       unwrap(passmgr)->add(createVerifierPass());
292 
293    unwrap(passmgr)->add(createAlwaysInlinerLegacyPass());
294 
295    /* Normally, the pass manager runs all passes on one function before
296     * moving onto another. Adding a barrier no-op pass forces the pass
297     * manager to run the inliner on all functions first, which makes sure
298     * that the following passes are only run on the remaining non-inline
299     * function, so it removes useless work done on dead inline functions.
300     */
301    unwrap(passmgr)->add(createBarrierNoopPass());
302 
303    #if LLVM_VERSION_MAJOR >= 16
304    unwrap(passmgr)->add(createSROAPass(true));
305    #else
306    unwrap(passmgr)->add(createSROAPass());
307    #endif
308    /* TODO: restore IPSCCP */
309    unwrap(passmgr)->add(createLICMPass());
310    unwrap(passmgr)->add(createCFGSimplificationPass());
311    /* This is recommended by the instruction combining pass. */
312    unwrap(passmgr)->add(createEarlyCSEPass(true));
313    unwrap(passmgr)->add(createInstructionCombiningPass());
314    return passmgr;
315 }
316 
317 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
318                                  LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
319 {
320    AtomicRMWInst::BinOp binop;
321    switch (op) {
322    case LLVMAtomicRMWBinOpXchg:
323       binop = AtomicRMWInst::Xchg;
324       break;
325    case LLVMAtomicRMWBinOpAdd:
326       binop = AtomicRMWInst::Add;
327       break;
328    case LLVMAtomicRMWBinOpSub:
329       binop = AtomicRMWInst::Sub;
330       break;
331    case LLVMAtomicRMWBinOpAnd:
332       binop = AtomicRMWInst::And;
333       break;
334    case LLVMAtomicRMWBinOpNand:
335       binop = AtomicRMWInst::Nand;
336       break;
337    case LLVMAtomicRMWBinOpOr:
338       binop = AtomicRMWInst::Or;
339       break;
340    case LLVMAtomicRMWBinOpXor:
341       binop = AtomicRMWInst::Xor;
342       break;
343    case LLVMAtomicRMWBinOpMax:
344       binop = AtomicRMWInst::Max;
345       break;
346    case LLVMAtomicRMWBinOpMin:
347       binop = AtomicRMWInst::Min;
348       break;
349    case LLVMAtomicRMWBinOpUMax:
350       binop = AtomicRMWInst::UMax;
351       break;
352    case LLVMAtomicRMWBinOpUMin:
353       binop = AtomicRMWInst::UMin;
354       break;
355    case LLVMAtomicRMWBinOpFAdd:
356       binop = AtomicRMWInst::FAdd;
357       break;
358    default:
359       unreachable("invalid LLVMAtomicRMWBinOp");
360       break;
361    }
362    unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
363    return wrap(unwrap(ctx->builder)
364                         ->CreateAtomicRMW(binop, unwrap(ptr), unwrap(val),
365                                           MaybeAlign(0),
366                                           AtomicOrdering::SequentiallyConsistent, SSID));
367 }
368 
369 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
370                                       LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
371 {
372    unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
373    return wrap(unwrap(ctx->builder)
374                         ->CreateAtomicCmpXchg(unwrap(ptr), unwrap(cmp),
375                                               unwrap(val),
376                                               MaybeAlign(0),
377                                               AtomicOrdering::SequentiallyConsistent,
378                                               AtomicOrdering::SequentiallyConsistent, SSID));
379 }
380