1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5#include "textflag.h" 6 7// The method is based on a paper by Naoki Shibata: "Efficient evaluation 8// methods of elementary functions suitable for SIMD computation", Proc. 9// of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32 10// (May 2010). The paper is available at 11// https://link.springer.com/article/10.1007/s00450-010-0108-2 12// 13// The original code and the constants below are from the author's 14// implementation available at http://freshmeat.net/projects/sleef. 15// The README file says, "The software is in public domain. 16// You can use the software without any obligation." 17// 18// This code is a simplified version of the original. 19 20#define LN2 0.6931471805599453094172321214581766 // log_e(2) 21#define LOG2E 1.4426950408889634073599246810018920 // 1/LN2 22#define LN2U 0.69314718055966295651160180568695068359375 // upper half LN2 23#define LN2L 0.28235290563031577122588448175013436025525412068e-12 // lower half LN2 24#define PosInf 0x7FF0000000000000 25#define NegInf 0xFFF0000000000000 26#define Overflow 7.09782712893384e+02 27 28DATA exprodata<>+0(SB)/8, $0.5 29DATA exprodata<>+8(SB)/8, $1.0 30DATA exprodata<>+16(SB)/8, $2.0 31DATA exprodata<>+24(SB)/8, $1.6666666666666666667e-1 32DATA exprodata<>+32(SB)/8, $4.1666666666666666667e-2 33DATA exprodata<>+40(SB)/8, $8.3333333333333333333e-3 34DATA exprodata<>+48(SB)/8, $1.3888888888888888889e-3 35DATA exprodata<>+56(SB)/8, $1.9841269841269841270e-4 36DATA exprodata<>+64(SB)/8, $2.4801587301587301587e-5 37GLOBL exprodata<>+0(SB), RODATA, $72 38 39// func Exp(x float64) float64 40TEXT ·archExp(SB),NOSPLIT,$0 41 // test bits for not-finite 42 MOVQ x+0(FP), BX 43 MOVQ $~(1<<63), AX // sign bit mask 44 MOVQ BX, DX 45 ANDQ AX, DX 46 MOVQ $PosInf, AX 47 CMPQ AX, DX 48 JLE notFinite 49 // check if argument will overflow 50 MOVQ BX, X0 51 MOVSD $Overflow, X1 52 COMISD X1, X0 53 JA overflow 54 MOVSD $LOG2E, X1 55 MULSD X0, X1 56 CVTSD2SL X1, BX // BX = exponent 57 CVTSL2SD BX, X1 58 CMPB ·useFMA(SB), $1 59 JE avxfma 60 MOVSD $LN2U, X2 61 MULSD X1, X2 62 SUBSD X2, X0 63 MOVSD $LN2L, X2 64 MULSD X1, X2 65 SUBSD X2, X0 66 // reduce argument 67 MULSD $0.0625, X0 68 // Taylor series evaluation 69 MOVSD exprodata<>+64(SB), X1 70 MULSD X0, X1 71 ADDSD exprodata<>+56(SB), X1 72 MULSD X0, X1 73 ADDSD exprodata<>+48(SB), X1 74 MULSD X0, X1 75 ADDSD exprodata<>+40(SB), X1 76 MULSD X0, X1 77 ADDSD exprodata<>+32(SB), X1 78 MULSD X0, X1 79 ADDSD exprodata<>+24(SB), X1 80 MULSD X0, X1 81 ADDSD exprodata<>+0(SB), X1 82 MULSD X0, X1 83 ADDSD exprodata<>+8(SB), X1 84 MULSD X1, X0 85 MOVSD exprodata<>+16(SB), X1 86 ADDSD X0, X1 87 MULSD X1, X0 88 MOVSD exprodata<>+16(SB), X1 89 ADDSD X0, X1 90 MULSD X1, X0 91 MOVSD exprodata<>+16(SB), X1 92 ADDSD X0, X1 93 MULSD X1, X0 94 MOVSD exprodata<>+16(SB), X1 95 ADDSD X0, X1 96 MULSD X1, X0 97 ADDSD exprodata<>+8(SB), X0 98 // return fr * 2**exponent 99ldexp: 100 ADDL $0x3FF, BX // add bias 101 JLE denormal 102 CMPL BX, $0x7FF 103 JGE overflow 104lastStep: 105 SHLQ $52, BX 106 MOVQ BX, X1 107 MULSD X1, X0 108 MOVSD X0, ret+8(FP) 109 RET 110notFinite: 111 // test bits for -Inf 112 MOVQ $NegInf, AX 113 CMPQ AX, BX 114 JNE notNegInf 115 // -Inf, return 0 116underflow: // return 0 117 MOVQ $0, ret+8(FP) 118 RET 119overflow: // return +Inf 120 MOVQ $PosInf, BX 121notNegInf: // NaN or +Inf, return x 122 MOVQ BX, ret+8(FP) 123 RET 124denormal: 125 CMPL BX, $-52 126 JL underflow 127 ADDL $0x3FE, BX // add bias - 1 128 SHLQ $52, BX 129 MOVQ BX, X1 130 MULSD X1, X0 131 MOVQ $1, BX 132 JMP lastStep 133 134avxfma: 135 MOVSD $LN2U, X2 136 VFNMADD231SD X2, X1, X0 137 MOVSD $LN2L, X2 138 VFNMADD231SD X2, X1, X0 139 // reduce argument 140 MULSD $0.0625, X0 141 // Taylor series evaluation 142 MOVSD exprodata<>+64(SB), X1 143 VFMADD213SD exprodata<>+56(SB), X0, X1 144 VFMADD213SD exprodata<>+48(SB), X0, X1 145 VFMADD213SD exprodata<>+40(SB), X0, X1 146 VFMADD213SD exprodata<>+32(SB), X0, X1 147 VFMADD213SD exprodata<>+24(SB), X0, X1 148 VFMADD213SD exprodata<>+0(SB), X0, X1 149 VFMADD213SD exprodata<>+8(SB), X0, X1 150 MULSD X1, X0 151 VADDSD exprodata<>+16(SB), X0, X1 152 MULSD X1, X0 153 VADDSD exprodata<>+16(SB), X0, X1 154 MULSD X1, X0 155 VADDSD exprodata<>+16(SB), X0, X1 156 MULSD X1, X0 157 VADDSD exprodata<>+16(SB), X0, X1 158 VFMADD213SD exprodata<>+8(SB), X1, X0 159 JMP ldexp 160