xref: /aosp_15_r20/external/llvm/test/CodeGen/NVPTX/fp-contract.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s --check-prefix=FAST
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefix=DEFAULT
3*9880d681SAndroid Build Coastguard Worker
4*9880d681SAndroid Build Coastguard Workertarget triple = "nvptx64-unknown-cuda"
5*9880d681SAndroid Build Coastguard Worker
6*9880d681SAndroid Build Coastguard Worker;; Make sure we are generating proper instruction sequences for fused ops
7*9880d681SAndroid Build Coastguard Worker;; If fusion is allowed, we try to form fma.rn at the PTX level, and emit
8*9880d681SAndroid Build Coastguard Worker;; add.f32 otherwise.  Without an explicit rounding mode on add.f32, ptxas
9*9880d681SAndroid Build Coastguard Worker;; is free to fuse with a multiply if it is able.  If fusion is not allowed,
10*9880d681SAndroid Build Coastguard Worker;; we do not form fma.rn at the PTX level and explicitly generate add.rn
11*9880d681SAndroid Build Coastguard Worker;; for all adds to prevent ptxas from fusion the ops.
12*9880d681SAndroid Build Coastguard Worker
13*9880d681SAndroid Build Coastguard Worker;; FAST-LABEL: @t0
14*9880d681SAndroid Build Coastguard Worker;; DEFAULT-LABEL: @t0
15*9880d681SAndroid Build Coastguard Workerdefine float @t0(float %a, float %b, float %c) {
16*9880d681SAndroid Build Coastguard Worker;; FAST: fma.rn.f32
17*9880d681SAndroid Build Coastguard Worker;; DEFAULT: mul.rn.f32
18*9880d681SAndroid Build Coastguard Worker;; DEFAULT: add.rn.f32
19*9880d681SAndroid Build Coastguard Worker  %v0 = fmul float %a, %b
20*9880d681SAndroid Build Coastguard Worker  %v1 = fadd float %v0, %c
21*9880d681SAndroid Build Coastguard Worker  ret float %v1
22*9880d681SAndroid Build Coastguard Worker}
23*9880d681SAndroid Build Coastguard Worker
24*9880d681SAndroid Build Coastguard Worker;; FAST-LABEL: @t1
25*9880d681SAndroid Build Coastguard Worker;; DEFAULT-LABEL: @t1
26*9880d681SAndroid Build Coastguard Workerdefine float @t1(float %a, float %b) {
27*9880d681SAndroid Build Coastguard Worker;; We cannot form an fma here, but make sure we explicitly emit add.rn.f32
28*9880d681SAndroid Build Coastguard Worker;; to prevent ptxas from fusing this with anything else.
29*9880d681SAndroid Build Coastguard Worker;; FAST: add.f32
30*9880d681SAndroid Build Coastguard Worker;; DEFAULT: add.rn.f32
31*9880d681SAndroid Build Coastguard Worker  %v1 = fadd float %a, %b
32*9880d681SAndroid Build Coastguard Worker  ret float %v1
33*9880d681SAndroid Build Coastguard Worker}
34