1*bf2c3715SXin Li #include <typeinfo>
2*bf2c3715SXin Li #include <iostream>
3*bf2c3715SXin Li #include <Eigen/Core>
4*bf2c3715SXin Li #include "BenchTimer.h"
5*bf2c3715SXin Li using namespace Eigen;
6*bf2c3715SXin Li using namespace std;
7*bf2c3715SXin Li
8*bf2c3715SXin Li template<typename T>
sqsumNorm(T & v)9*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v)
10*bf2c3715SXin Li {
11*bf2c3715SXin Li return v.norm();
12*bf2c3715SXin Li }
13*bf2c3715SXin Li
14*bf2c3715SXin Li template<typename T>
stableNorm(T & v)15*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v)
16*bf2c3715SXin Li {
17*bf2c3715SXin Li return v.stableNorm();
18*bf2c3715SXin Li }
19*bf2c3715SXin Li
20*bf2c3715SXin Li template<typename T>
hypotNorm(T & v)21*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v)
22*bf2c3715SXin Li {
23*bf2c3715SXin Li return v.hypotNorm();
24*bf2c3715SXin Li }
25*bf2c3715SXin Li
26*bf2c3715SXin Li template<typename T>
blueNorm(T & v)27*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v)
28*bf2c3715SXin Li {
29*bf2c3715SXin Li return v.blueNorm();
30*bf2c3715SXin Li }
31*bf2c3715SXin Li
32*bf2c3715SXin Li template<typename T>
lapackNorm(T & v)33*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v)
34*bf2c3715SXin Li {
35*bf2c3715SXin Li typedef typename T::Scalar Scalar;
36*bf2c3715SXin Li int n = v.size();
37*bf2c3715SXin Li Scalar scale = 0;
38*bf2c3715SXin Li Scalar ssq = 1;
39*bf2c3715SXin Li for (int i=0;i<n;++i)
40*bf2c3715SXin Li {
41*bf2c3715SXin Li Scalar ax = std::abs(v.coeff(i));
42*bf2c3715SXin Li if (scale >= ax)
43*bf2c3715SXin Li {
44*bf2c3715SXin Li ssq += numext::abs2(ax/scale);
45*bf2c3715SXin Li }
46*bf2c3715SXin Li else
47*bf2c3715SXin Li {
48*bf2c3715SXin Li ssq = Scalar(1) + ssq * numext::abs2(scale/ax);
49*bf2c3715SXin Li scale = ax;
50*bf2c3715SXin Li }
51*bf2c3715SXin Li }
52*bf2c3715SXin Li return scale * std::sqrt(ssq);
53*bf2c3715SXin Li }
54*bf2c3715SXin Li
55*bf2c3715SXin Li template<typename T>
twopassNorm(T & v)56*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v)
57*bf2c3715SXin Li {
58*bf2c3715SXin Li typedef typename T::Scalar Scalar;
59*bf2c3715SXin Li Scalar s = v.array().abs().maxCoeff();
60*bf2c3715SXin Li return s*(v/s).norm();
61*bf2c3715SXin Li }
62*bf2c3715SXin Li
63*bf2c3715SXin Li template<typename T>
bl2passNorm(T & v)64*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v)
65*bf2c3715SXin Li {
66*bf2c3715SXin Li return v.stableNorm();
67*bf2c3715SXin Li }
68*bf2c3715SXin Li
69*bf2c3715SXin Li template<typename T>
divacNorm(T & v)70*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v)
71*bf2c3715SXin Li {
72*bf2c3715SXin Li int n =v.size() / 2;
73*bf2c3715SXin Li for (int i=0;i<n;++i)
74*bf2c3715SXin Li v(i) = v(2*i)*v(2*i) + v(2*i+1)*v(2*i+1);
75*bf2c3715SXin Li n = n/2;
76*bf2c3715SXin Li while (n>0)
77*bf2c3715SXin Li {
78*bf2c3715SXin Li for (int i=0;i<n;++i)
79*bf2c3715SXin Li v(i) = v(2*i) + v(2*i+1);
80*bf2c3715SXin Li n = n/2;
81*bf2c3715SXin Li }
82*bf2c3715SXin Li return std::sqrt(v(0));
83*bf2c3715SXin Li }
84*bf2c3715SXin Li
85*bf2c3715SXin Li namespace Eigen {
86*bf2c3715SXin Li namespace internal {
87*bf2c3715SXin Li #ifdef EIGEN_VECTORIZE
plt(const Packet4f & a,Packet4f & b)88*bf2c3715SXin Li Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); }
plt(const Packet2d & a,Packet2d & b)89*bf2c3715SXin Li Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); }
90*bf2c3715SXin Li
pandnot(const Packet4f & a,Packet4f & b)91*bf2c3715SXin Li Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); }
pandnot(const Packet2d & a,Packet2d & b)92*bf2c3715SXin Li Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); }
93*bf2c3715SXin Li #endif
94*bf2c3715SXin Li }
95*bf2c3715SXin Li }
96*bf2c3715SXin Li
97*bf2c3715SXin Li template<typename T>
pblueNorm(const T & v)98*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
99*bf2c3715SXin Li {
100*bf2c3715SXin Li #ifndef EIGEN_VECTORIZE
101*bf2c3715SXin Li return v.blueNorm();
102*bf2c3715SXin Li #else
103*bf2c3715SXin Li typedef typename T::Scalar Scalar;
104*bf2c3715SXin Li
105*bf2c3715SXin Li static int nmax = 0;
106*bf2c3715SXin Li static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr;
107*bf2c3715SXin Li int n;
108*bf2c3715SXin Li
109*bf2c3715SXin Li if(nmax <= 0)
110*bf2c3715SXin Li {
111*bf2c3715SXin Li int nbig, ibeta, it, iemin, iemax, iexp;
112*bf2c3715SXin Li Scalar abig, eps;
113*bf2c3715SXin Li
114*bf2c3715SXin Li nbig = NumTraits<int>::highest(); // largest integer
115*bf2c3715SXin Li ibeta = std::numeric_limits<Scalar>::radix; // NumTraits<Scalar>::Base; // base for floating-point numbers
116*bf2c3715SXin Li it = NumTraits<Scalar>::digits(); // NumTraits<Scalar>::Mantissa; // number of base-beta digits in mantissa
117*bf2c3715SXin Li iemin = NumTraits<Scalar>::min_exponent(); // minimum exponent
118*bf2c3715SXin Li iemax = NumTraits<Scalar>::max_exponent(); // maximum exponent
119*bf2c3715SXin Li rbig = NumTraits<Scalar>::highest(); // largest floating-point number
120*bf2c3715SXin Li
121*bf2c3715SXin Li // Check the basic machine-dependent constants.
122*bf2c3715SXin Li if(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5)
123*bf2c3715SXin Li || (it<=4 && ibeta <= 3 ) || it<2)
124*bf2c3715SXin Li {
125*bf2c3715SXin Li eigen_assert(false && "the algorithm cannot be guaranteed on this computer");
126*bf2c3715SXin Li }
127*bf2c3715SXin Li iexp = -((1-iemin)/2);
128*bf2c3715SXin Li b1 = std::pow(ibeta, iexp); // lower boundary of midrange
129*bf2c3715SXin Li iexp = (iemax + 1 - it)/2;
130*bf2c3715SXin Li b2 = std::pow(ibeta,iexp); // upper boundary of midrange
131*bf2c3715SXin Li
132*bf2c3715SXin Li iexp = (2-iemin)/2;
133*bf2c3715SXin Li s1m = std::pow(ibeta,iexp); // scaling factor for lower range
134*bf2c3715SXin Li iexp = - ((iemax+it)/2);
135*bf2c3715SXin Li s2m = std::pow(ibeta,iexp); // scaling factor for upper range
136*bf2c3715SXin Li
137*bf2c3715SXin Li overfl = rbig*s2m; // overflow boundary for abig
138*bf2c3715SXin Li eps = std::pow(ibeta, 1-it);
139*bf2c3715SXin Li relerr = std::sqrt(eps); // tolerance for neglecting asml
140*bf2c3715SXin Li abig = 1.0/eps - 1.0;
141*bf2c3715SXin Li if (Scalar(nbig)>abig) nmax = abig; // largest safe n
142*bf2c3715SXin Li else nmax = nbig;
143*bf2c3715SXin Li }
144*bf2c3715SXin Li
145*bf2c3715SXin Li typedef typename internal::packet_traits<Scalar>::type Packet;
146*bf2c3715SXin Li const int ps = internal::packet_traits<Scalar>::size;
147*bf2c3715SXin Li Packet pasml = internal::pset1<Packet>(Scalar(0));
148*bf2c3715SXin Li Packet pamed = internal::pset1<Packet>(Scalar(0));
149*bf2c3715SXin Li Packet pabig = internal::pset1<Packet>(Scalar(0));
150*bf2c3715SXin Li Packet ps2m = internal::pset1<Packet>(s2m);
151*bf2c3715SXin Li Packet ps1m = internal::pset1<Packet>(s1m);
152*bf2c3715SXin Li Packet pb2 = internal::pset1<Packet>(b2);
153*bf2c3715SXin Li Packet pb1 = internal::pset1<Packet>(b1);
154*bf2c3715SXin Li for(int j=0; j<v.size(); j+=ps)
155*bf2c3715SXin Li {
156*bf2c3715SXin Li Packet ax = internal::pabs(v.template packet<Aligned>(j));
157*bf2c3715SXin Li Packet ax_s2m = internal::pmul(ax,ps2m);
158*bf2c3715SXin Li Packet ax_s1m = internal::pmul(ax,ps1m);
159*bf2c3715SXin Li Packet maskBig = internal::plt(pb2,ax);
160*bf2c3715SXin Li Packet maskSml = internal::plt(ax,pb1);
161*bf2c3715SXin Li
162*bf2c3715SXin Li // Packet maskMed = internal::pand(maskSml,maskBig);
163*bf2c3715SXin Li // Packet scale = internal::pset1(Scalar(0));
164*bf2c3715SXin Li // scale = internal::por(scale, internal::pand(maskBig,ps2m));
165*bf2c3715SXin Li // scale = internal::por(scale, internal::pand(maskSml,ps1m));
166*bf2c3715SXin Li // scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed));
167*bf2c3715SXin Li // ax = internal::pmul(ax,scale);
168*bf2c3715SXin Li // ax = internal::pmul(ax,ax);
169*bf2c3715SXin Li // pabig = internal::padd(pabig, internal::pand(maskBig, ax));
170*bf2c3715SXin Li // pasml = internal::padd(pasml, internal::pand(maskSml, ax));
171*bf2c3715SXin Li // pamed = internal::padd(pamed, internal::pandnot(ax,maskMed));
172*bf2c3715SXin Li
173*bf2c3715SXin Li
174*bf2c3715SXin Li pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m,ax_s2m)));
175*bf2c3715SXin Li pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m,ax_s1m)));
176*bf2c3715SXin Li pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax,ax),internal::pand(maskSml,maskBig)));
177*bf2c3715SXin Li }
178*bf2c3715SXin Li Scalar abig = internal::predux(pabig);
179*bf2c3715SXin Li Scalar asml = internal::predux(pasml);
180*bf2c3715SXin Li Scalar amed = internal::predux(pamed);
181*bf2c3715SXin Li if(abig > Scalar(0))
182*bf2c3715SXin Li {
183*bf2c3715SXin Li abig = std::sqrt(abig);
184*bf2c3715SXin Li if(abig > overfl)
185*bf2c3715SXin Li {
186*bf2c3715SXin Li eigen_assert(false && "overflow");
187*bf2c3715SXin Li return rbig;
188*bf2c3715SXin Li }
189*bf2c3715SXin Li if(amed > Scalar(0))
190*bf2c3715SXin Li {
191*bf2c3715SXin Li abig = abig/s2m;
192*bf2c3715SXin Li amed = std::sqrt(amed);
193*bf2c3715SXin Li }
194*bf2c3715SXin Li else
195*bf2c3715SXin Li {
196*bf2c3715SXin Li return abig/s2m;
197*bf2c3715SXin Li }
198*bf2c3715SXin Li
199*bf2c3715SXin Li }
200*bf2c3715SXin Li else if(asml > Scalar(0))
201*bf2c3715SXin Li {
202*bf2c3715SXin Li if (amed > Scalar(0))
203*bf2c3715SXin Li {
204*bf2c3715SXin Li abig = std::sqrt(amed);
205*bf2c3715SXin Li amed = std::sqrt(asml) / s1m;
206*bf2c3715SXin Li }
207*bf2c3715SXin Li else
208*bf2c3715SXin Li {
209*bf2c3715SXin Li return std::sqrt(asml)/s1m;
210*bf2c3715SXin Li }
211*bf2c3715SXin Li }
212*bf2c3715SXin Li else
213*bf2c3715SXin Li {
214*bf2c3715SXin Li return std::sqrt(amed);
215*bf2c3715SXin Li }
216*bf2c3715SXin Li asml = std::min(abig, amed);
217*bf2c3715SXin Li abig = std::max(abig, amed);
218*bf2c3715SXin Li if(asml <= abig*relerr)
219*bf2c3715SXin Li return abig;
220*bf2c3715SXin Li else
221*bf2c3715SXin Li return abig * std::sqrt(Scalar(1) + numext::abs2(asml/abig));
222*bf2c3715SXin Li #endif
223*bf2c3715SXin Li }
224*bf2c3715SXin Li
225*bf2c3715SXin Li #define BENCH_PERF(NRM) { \
226*bf2c3715SXin Li float af = 0; double ad = 0; std::complex<float> ac = 0; \
227*bf2c3715SXin Li Eigen::BenchTimer tf, td, tcf; tf.reset(); td.reset(); tcf.reset();\
228*bf2c3715SXin Li for (int k=0; k<tries; ++k) { \
229*bf2c3715SXin Li tf.start(); \
230*bf2c3715SXin Li for (int i=0; i<iters; ++i) { af += NRM(vf); } \
231*bf2c3715SXin Li tf.stop(); \
232*bf2c3715SXin Li } \
233*bf2c3715SXin Li for (int k=0; k<tries; ++k) { \
234*bf2c3715SXin Li td.start(); \
235*bf2c3715SXin Li for (int i=0; i<iters; ++i) { ad += NRM(vd); } \
236*bf2c3715SXin Li td.stop(); \
237*bf2c3715SXin Li } \
238*bf2c3715SXin Li /*for (int k=0; k<std::max(1,tries/3); ++k) { \
239*bf2c3715SXin Li tcf.start(); \
240*bf2c3715SXin Li for (int i=0; i<iters; ++i) { ac += NRM(vcf); } \
241*bf2c3715SXin Li tcf.stop(); \
242*bf2c3715SXin Li } */\
243*bf2c3715SXin Li std::cout << #NRM << "\t" << tf.value() << " " << td.value() << " " << tcf.value() << "\n"; \
244*bf2c3715SXin Li }
245*bf2c3715SXin Li
check_accuracy(double basef,double based,int s)246*bf2c3715SXin Li void check_accuracy(double basef, double based, int s)
247*bf2c3715SXin Li {
248*bf2c3715SXin Li double yf = basef * std::abs(internal::random<double>());
249*bf2c3715SXin Li double yd = based * std::abs(internal::random<double>());
250*bf2c3715SXin Li VectorXf vf = VectorXf::Ones(s) * yf;
251*bf2c3715SXin Li VectorXd vd = VectorXd::Ones(s) * yd;
252*bf2c3715SXin Li
253*bf2c3715SXin Li std::cout << "reference\t" << std::sqrt(double(s))*yf << "\t" << std::sqrt(double(s))*yd << "\n";
254*bf2c3715SXin Li std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n";
255*bf2c3715SXin Li std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n";
256*bf2c3715SXin Li std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n";
257*bf2c3715SXin Li std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\n";
258*bf2c3715SXin Li std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\n";
259*bf2c3715SXin Li std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\n";
260*bf2c3715SXin Li std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\n";
261*bf2c3715SXin Li }
262*bf2c3715SXin Li
check_accuracy_var(int ef0,int ef1,int ed0,int ed1,int s)263*bf2c3715SXin Li void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s)
264*bf2c3715SXin Li {
265*bf2c3715SXin Li VectorXf vf(s);
266*bf2c3715SXin Li VectorXd vd(s);
267*bf2c3715SXin Li for (int i=0; i<s; ++i)
268*bf2c3715SXin Li {
269*bf2c3715SXin Li vf[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0,ef1));
270*bf2c3715SXin Li vd[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0,ed1));
271*bf2c3715SXin Li }
272*bf2c3715SXin Li
273*bf2c3715SXin Li //std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
274*bf2c3715SXin Li std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast<long double>()) << "\t" << sqsumNorm(vd.cast<long double>()) << "\n";
275*bf2c3715SXin Li std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast<long double>()) << "\t" << hypotNorm(vd.cast<long double>()) << "\n";
276*bf2c3715SXin Li std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
277*bf2c3715SXin Li std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
278*bf2c3715SXin Li std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast<long double>()) << "\t" << lapackNorm(vd.cast<long double>()) << "\n";
279*bf2c3715SXin Li std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t" << twopassNorm(vf.cast<long double>()) << "\t" << twopassNorm(vd.cast<long double>()) << "\n";
280*bf2c3715SXin Li // std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast<long double>()) << "\t" << bl2passNorm(vd.cast<long double>()) << "\n";
281*bf2c3715SXin Li }
282*bf2c3715SXin Li
main(int argc,char ** argv)283*bf2c3715SXin Li int main(int argc, char** argv)
284*bf2c3715SXin Li {
285*bf2c3715SXin Li int tries = 10;
286*bf2c3715SXin Li int iters = 100000;
287*bf2c3715SXin Li double y = 1.1345743233455785456788e12 * internal::random<double>();
288*bf2c3715SXin Li VectorXf v = VectorXf::Ones(1024) * y;
289*bf2c3715SXin Li
290*bf2c3715SXin Li // return 0;
291*bf2c3715SXin Li int s = 10000;
292*bf2c3715SXin Li double basef_ok = 1.1345743233455785456788e15;
293*bf2c3715SXin Li double based_ok = 1.1345743233455785456788e95;
294*bf2c3715SXin Li
295*bf2c3715SXin Li double basef_under = 1.1345743233455785456788e-27;
296*bf2c3715SXin Li double based_under = 1.1345743233455785456788e-303;
297*bf2c3715SXin Li
298*bf2c3715SXin Li double basef_over = 1.1345743233455785456788e+27;
299*bf2c3715SXin Li double based_over = 1.1345743233455785456788e+302;
300*bf2c3715SXin Li
301*bf2c3715SXin Li std::cout.precision(20);
302*bf2c3715SXin Li
303*bf2c3715SXin Li std::cerr << "\nNo under/overflow:\n";
304*bf2c3715SXin Li check_accuracy(basef_ok, based_ok, s);
305*bf2c3715SXin Li
306*bf2c3715SXin Li std::cerr << "\nUnderflow:\n";
307*bf2c3715SXin Li check_accuracy(basef_under, based_under, s);
308*bf2c3715SXin Li
309*bf2c3715SXin Li std::cerr << "\nOverflow:\n";
310*bf2c3715SXin Li check_accuracy(basef_over, based_over, s);
311*bf2c3715SXin Li
312*bf2c3715SXin Li std::cerr << "\nVarying (over):\n";
313*bf2c3715SXin Li for (int k=0; k<1; ++k)
314*bf2c3715SXin Li {
315*bf2c3715SXin Li check_accuracy_var(20,27,190,302,s);
316*bf2c3715SXin Li std::cout << "\n";
317*bf2c3715SXin Li }
318*bf2c3715SXin Li
319*bf2c3715SXin Li std::cerr << "\nVarying (under):\n";
320*bf2c3715SXin Li for (int k=0; k<1; ++k)
321*bf2c3715SXin Li {
322*bf2c3715SXin Li check_accuracy_var(-27,20,-302,-190,s);
323*bf2c3715SXin Li std::cout << "\n";
324*bf2c3715SXin Li }
325*bf2c3715SXin Li
326*bf2c3715SXin Li y = 1;
327*bf2c3715SXin Li std::cout.precision(4);
328*bf2c3715SXin Li int s1 = 1024*1024*32;
329*bf2c3715SXin Li std::cerr << "Performance (out of cache, " << s1 << "):\n";
330*bf2c3715SXin Li {
331*bf2c3715SXin Li int iters = 1;
332*bf2c3715SXin Li VectorXf vf = VectorXf::Random(s1) * y;
333*bf2c3715SXin Li VectorXd vd = VectorXd::Random(s1) * y;
334*bf2c3715SXin Li VectorXcf vcf = VectorXcf::Random(s1) * y;
335*bf2c3715SXin Li BENCH_PERF(sqsumNorm);
336*bf2c3715SXin Li BENCH_PERF(stableNorm);
337*bf2c3715SXin Li BENCH_PERF(blueNorm);
338*bf2c3715SXin Li BENCH_PERF(pblueNorm);
339*bf2c3715SXin Li BENCH_PERF(lapackNorm);
340*bf2c3715SXin Li BENCH_PERF(hypotNorm);
341*bf2c3715SXin Li BENCH_PERF(twopassNorm);
342*bf2c3715SXin Li BENCH_PERF(bl2passNorm);
343*bf2c3715SXin Li }
344*bf2c3715SXin Li
345*bf2c3715SXin Li std::cerr << "\nPerformance (in cache, " << 512 << "):\n";
346*bf2c3715SXin Li {
347*bf2c3715SXin Li int iters = 100000;
348*bf2c3715SXin Li VectorXf vf = VectorXf::Random(512) * y;
349*bf2c3715SXin Li VectorXd vd = VectorXd::Random(512) * y;
350*bf2c3715SXin Li VectorXcf vcf = VectorXcf::Random(512) * y;
351*bf2c3715SXin Li BENCH_PERF(sqsumNorm);
352*bf2c3715SXin Li BENCH_PERF(stableNorm);
353*bf2c3715SXin Li BENCH_PERF(blueNorm);
354*bf2c3715SXin Li BENCH_PERF(pblueNorm);
355*bf2c3715SXin Li BENCH_PERF(lapackNorm);
356*bf2c3715SXin Li BENCH_PERF(hypotNorm);
357*bf2c3715SXin Li BENCH_PERF(twopassNorm);
358*bf2c3715SXin Li BENCH_PERF(bl2passNorm);
359*bf2c3715SXin Li }
360*bf2c3715SXin Li }
361