xref: /aosp_15_r20/external/eigen/bench/bench_norm.cpp (revision bf2c37156dfe67e5dfebd6d394bad8b2ab5804d4)
1*bf2c3715SXin Li #include <typeinfo>
2*bf2c3715SXin Li #include <iostream>
3*bf2c3715SXin Li #include <Eigen/Core>
4*bf2c3715SXin Li #include "BenchTimer.h"
5*bf2c3715SXin Li using namespace Eigen;
6*bf2c3715SXin Li using namespace std;
7*bf2c3715SXin Li 
8*bf2c3715SXin Li template<typename T>
sqsumNorm(T & v)9*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v)
10*bf2c3715SXin Li {
11*bf2c3715SXin Li   return v.norm();
12*bf2c3715SXin Li }
13*bf2c3715SXin Li 
14*bf2c3715SXin Li template<typename T>
stableNorm(T & v)15*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v)
16*bf2c3715SXin Li {
17*bf2c3715SXin Li   return v.stableNorm();
18*bf2c3715SXin Li }
19*bf2c3715SXin Li 
20*bf2c3715SXin Li template<typename T>
hypotNorm(T & v)21*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v)
22*bf2c3715SXin Li {
23*bf2c3715SXin Li   return v.hypotNorm();
24*bf2c3715SXin Li }
25*bf2c3715SXin Li 
26*bf2c3715SXin Li template<typename T>
blueNorm(T & v)27*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v)
28*bf2c3715SXin Li {
29*bf2c3715SXin Li   return v.blueNorm();
30*bf2c3715SXin Li }
31*bf2c3715SXin Li 
32*bf2c3715SXin Li template<typename T>
lapackNorm(T & v)33*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v)
34*bf2c3715SXin Li {
35*bf2c3715SXin Li   typedef typename T::Scalar Scalar;
36*bf2c3715SXin Li   int n = v.size();
37*bf2c3715SXin Li   Scalar scale = 0;
38*bf2c3715SXin Li   Scalar ssq = 1;
39*bf2c3715SXin Li   for (int i=0;i<n;++i)
40*bf2c3715SXin Li   {
41*bf2c3715SXin Li     Scalar ax = std::abs(v.coeff(i));
42*bf2c3715SXin Li     if (scale >= ax)
43*bf2c3715SXin Li     {
44*bf2c3715SXin Li       ssq += numext::abs2(ax/scale);
45*bf2c3715SXin Li     }
46*bf2c3715SXin Li     else
47*bf2c3715SXin Li     {
48*bf2c3715SXin Li       ssq = Scalar(1) + ssq * numext::abs2(scale/ax);
49*bf2c3715SXin Li       scale = ax;
50*bf2c3715SXin Li     }
51*bf2c3715SXin Li   }
52*bf2c3715SXin Li   return scale * std::sqrt(ssq);
53*bf2c3715SXin Li }
54*bf2c3715SXin Li 
55*bf2c3715SXin Li template<typename T>
twopassNorm(T & v)56*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v)
57*bf2c3715SXin Li {
58*bf2c3715SXin Li   typedef typename T::Scalar Scalar;
59*bf2c3715SXin Li   Scalar s = v.array().abs().maxCoeff();
60*bf2c3715SXin Li   return s*(v/s).norm();
61*bf2c3715SXin Li }
62*bf2c3715SXin Li 
63*bf2c3715SXin Li template<typename T>
bl2passNorm(T & v)64*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v)
65*bf2c3715SXin Li {
66*bf2c3715SXin Li   return v.stableNorm();
67*bf2c3715SXin Li }
68*bf2c3715SXin Li 
69*bf2c3715SXin Li template<typename T>
divacNorm(T & v)70*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v)
71*bf2c3715SXin Li {
72*bf2c3715SXin Li   int n =v.size() / 2;
73*bf2c3715SXin Li   for (int i=0;i<n;++i)
74*bf2c3715SXin Li     v(i) = v(2*i)*v(2*i) + v(2*i+1)*v(2*i+1);
75*bf2c3715SXin Li   n = n/2;
76*bf2c3715SXin Li   while (n>0)
77*bf2c3715SXin Li   {
78*bf2c3715SXin Li     for (int i=0;i<n;++i)
79*bf2c3715SXin Li       v(i) = v(2*i) + v(2*i+1);
80*bf2c3715SXin Li     n = n/2;
81*bf2c3715SXin Li   }
82*bf2c3715SXin Li   return std::sqrt(v(0));
83*bf2c3715SXin Li }
84*bf2c3715SXin Li 
85*bf2c3715SXin Li namespace Eigen {
86*bf2c3715SXin Li namespace internal {
87*bf2c3715SXin Li #ifdef EIGEN_VECTORIZE
plt(const Packet4f & a,Packet4f & b)88*bf2c3715SXin Li Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); }
plt(const Packet2d & a,Packet2d & b)89*bf2c3715SXin Li Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); }
90*bf2c3715SXin Li 
pandnot(const Packet4f & a,Packet4f & b)91*bf2c3715SXin Li Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); }
pandnot(const Packet2d & a,Packet2d & b)92*bf2c3715SXin Li Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); }
93*bf2c3715SXin Li #endif
94*bf2c3715SXin Li }
95*bf2c3715SXin Li }
96*bf2c3715SXin Li 
97*bf2c3715SXin Li template<typename T>
pblueNorm(const T & v)98*bf2c3715SXin Li EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
99*bf2c3715SXin Li {
100*bf2c3715SXin Li   #ifndef EIGEN_VECTORIZE
101*bf2c3715SXin Li   return v.blueNorm();
102*bf2c3715SXin Li   #else
103*bf2c3715SXin Li   typedef typename T::Scalar Scalar;
104*bf2c3715SXin Li 
105*bf2c3715SXin Li   static int nmax = 0;
106*bf2c3715SXin Li   static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr;
107*bf2c3715SXin Li   int n;
108*bf2c3715SXin Li 
109*bf2c3715SXin Li   if(nmax <= 0)
110*bf2c3715SXin Li   {
111*bf2c3715SXin Li     int nbig, ibeta, it, iemin, iemax, iexp;
112*bf2c3715SXin Li     Scalar abig, eps;
113*bf2c3715SXin Li 
114*bf2c3715SXin Li     nbig  = NumTraits<int>::highest();          // largest integer
115*bf2c3715SXin Li     ibeta = std::numeric_limits<Scalar>::radix; // NumTraits<Scalar>::Base;                    // base for floating-point numbers
116*bf2c3715SXin Li     it    = NumTraits<Scalar>::digits();        // NumTraits<Scalar>::Mantissa;                // number of base-beta digits in mantissa
117*bf2c3715SXin Li     iemin = NumTraits<Scalar>::min_exponent();  // minimum exponent
118*bf2c3715SXin Li     iemax = NumTraits<Scalar>::max_exponent();  // maximum exponent
119*bf2c3715SXin Li     rbig  = NumTraits<Scalar>::highest();       // largest floating-point number
120*bf2c3715SXin Li 
121*bf2c3715SXin Li     // Check the basic machine-dependent constants.
122*bf2c3715SXin Li     if(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5)
123*bf2c3715SXin Li       || (it<=4 && ibeta <= 3 ) || it<2)
124*bf2c3715SXin Li     {
125*bf2c3715SXin Li       eigen_assert(false && "the algorithm cannot be guaranteed on this computer");
126*bf2c3715SXin Li     }
127*bf2c3715SXin Li     iexp  = -((1-iemin)/2);
128*bf2c3715SXin Li     b1    = std::pow(ibeta, iexp);  // lower boundary of midrange
129*bf2c3715SXin Li     iexp  = (iemax + 1 - it)/2;
130*bf2c3715SXin Li     b2    = std::pow(ibeta,iexp);   // upper boundary of midrange
131*bf2c3715SXin Li 
132*bf2c3715SXin Li     iexp  = (2-iemin)/2;
133*bf2c3715SXin Li     s1m   = std::pow(ibeta,iexp);   // scaling factor for lower range
134*bf2c3715SXin Li     iexp  = - ((iemax+it)/2);
135*bf2c3715SXin Li     s2m   = std::pow(ibeta,iexp);   // scaling factor for upper range
136*bf2c3715SXin Li 
137*bf2c3715SXin Li     overfl  = rbig*s2m;          // overflow boundary for abig
138*bf2c3715SXin Li     eps     = std::pow(ibeta, 1-it);
139*bf2c3715SXin Li     relerr  = std::sqrt(eps);      // tolerance for neglecting asml
140*bf2c3715SXin Li     abig    = 1.0/eps - 1.0;
141*bf2c3715SXin Li     if (Scalar(nbig)>abig)  nmax = abig;  // largest safe n
142*bf2c3715SXin Li     else                    nmax = nbig;
143*bf2c3715SXin Li   }
144*bf2c3715SXin Li 
145*bf2c3715SXin Li   typedef typename internal::packet_traits<Scalar>::type Packet;
146*bf2c3715SXin Li   const int ps = internal::packet_traits<Scalar>::size;
147*bf2c3715SXin Li   Packet pasml = internal::pset1<Packet>(Scalar(0));
148*bf2c3715SXin Li   Packet pamed = internal::pset1<Packet>(Scalar(0));
149*bf2c3715SXin Li   Packet pabig = internal::pset1<Packet>(Scalar(0));
150*bf2c3715SXin Li   Packet ps2m = internal::pset1<Packet>(s2m);
151*bf2c3715SXin Li   Packet ps1m = internal::pset1<Packet>(s1m);
152*bf2c3715SXin Li   Packet pb2  = internal::pset1<Packet>(b2);
153*bf2c3715SXin Li   Packet pb1  = internal::pset1<Packet>(b1);
154*bf2c3715SXin Li   for(int j=0; j<v.size(); j+=ps)
155*bf2c3715SXin Li   {
156*bf2c3715SXin Li     Packet ax = internal::pabs(v.template packet<Aligned>(j));
157*bf2c3715SXin Li     Packet ax_s2m = internal::pmul(ax,ps2m);
158*bf2c3715SXin Li     Packet ax_s1m = internal::pmul(ax,ps1m);
159*bf2c3715SXin Li     Packet maskBig = internal::plt(pb2,ax);
160*bf2c3715SXin Li     Packet maskSml = internal::plt(ax,pb1);
161*bf2c3715SXin Li 
162*bf2c3715SXin Li //     Packet maskMed = internal::pand(maskSml,maskBig);
163*bf2c3715SXin Li //     Packet scale = internal::pset1(Scalar(0));
164*bf2c3715SXin Li //     scale = internal::por(scale, internal::pand(maskBig,ps2m));
165*bf2c3715SXin Li //     scale = internal::por(scale, internal::pand(maskSml,ps1m));
166*bf2c3715SXin Li //     scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed));
167*bf2c3715SXin Li //     ax = internal::pmul(ax,scale);
168*bf2c3715SXin Li //     ax = internal::pmul(ax,ax);
169*bf2c3715SXin Li //     pabig = internal::padd(pabig, internal::pand(maskBig, ax));
170*bf2c3715SXin Li //     pasml = internal::padd(pasml, internal::pand(maskSml, ax));
171*bf2c3715SXin Li //     pamed = internal::padd(pamed, internal::pandnot(ax,maskMed));
172*bf2c3715SXin Li 
173*bf2c3715SXin Li 
174*bf2c3715SXin Li     pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m,ax_s2m)));
175*bf2c3715SXin Li     pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m,ax_s1m)));
176*bf2c3715SXin Li     pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax,ax),internal::pand(maskSml,maskBig)));
177*bf2c3715SXin Li   }
178*bf2c3715SXin Li   Scalar abig = internal::predux(pabig);
179*bf2c3715SXin Li   Scalar asml = internal::predux(pasml);
180*bf2c3715SXin Li   Scalar amed = internal::predux(pamed);
181*bf2c3715SXin Li   if(abig > Scalar(0))
182*bf2c3715SXin Li   {
183*bf2c3715SXin Li     abig = std::sqrt(abig);
184*bf2c3715SXin Li     if(abig > overfl)
185*bf2c3715SXin Li     {
186*bf2c3715SXin Li       eigen_assert(false && "overflow");
187*bf2c3715SXin Li       return rbig;
188*bf2c3715SXin Li     }
189*bf2c3715SXin Li     if(amed > Scalar(0))
190*bf2c3715SXin Li     {
191*bf2c3715SXin Li       abig = abig/s2m;
192*bf2c3715SXin Li       amed = std::sqrt(amed);
193*bf2c3715SXin Li     }
194*bf2c3715SXin Li     else
195*bf2c3715SXin Li     {
196*bf2c3715SXin Li       return abig/s2m;
197*bf2c3715SXin Li     }
198*bf2c3715SXin Li 
199*bf2c3715SXin Li   }
200*bf2c3715SXin Li   else if(asml > Scalar(0))
201*bf2c3715SXin Li   {
202*bf2c3715SXin Li     if (amed > Scalar(0))
203*bf2c3715SXin Li     {
204*bf2c3715SXin Li       abig = std::sqrt(amed);
205*bf2c3715SXin Li       amed = std::sqrt(asml) / s1m;
206*bf2c3715SXin Li     }
207*bf2c3715SXin Li     else
208*bf2c3715SXin Li     {
209*bf2c3715SXin Li       return std::sqrt(asml)/s1m;
210*bf2c3715SXin Li     }
211*bf2c3715SXin Li   }
212*bf2c3715SXin Li   else
213*bf2c3715SXin Li   {
214*bf2c3715SXin Li     return std::sqrt(amed);
215*bf2c3715SXin Li   }
216*bf2c3715SXin Li   asml = std::min(abig, amed);
217*bf2c3715SXin Li   abig = std::max(abig, amed);
218*bf2c3715SXin Li   if(asml <= abig*relerr)
219*bf2c3715SXin Li     return abig;
220*bf2c3715SXin Li   else
221*bf2c3715SXin Li     return abig * std::sqrt(Scalar(1) + numext::abs2(asml/abig));
222*bf2c3715SXin Li   #endif
223*bf2c3715SXin Li }
224*bf2c3715SXin Li 
225*bf2c3715SXin Li #define BENCH_PERF(NRM) { \
226*bf2c3715SXin Li   float af = 0; double ad = 0; std::complex<float> ac = 0; \
227*bf2c3715SXin Li   Eigen::BenchTimer tf, td, tcf; tf.reset(); td.reset(); tcf.reset();\
228*bf2c3715SXin Li   for (int k=0; k<tries; ++k) { \
229*bf2c3715SXin Li     tf.start(); \
230*bf2c3715SXin Li     for (int i=0; i<iters; ++i) { af += NRM(vf); } \
231*bf2c3715SXin Li     tf.stop(); \
232*bf2c3715SXin Li   } \
233*bf2c3715SXin Li   for (int k=0; k<tries; ++k) { \
234*bf2c3715SXin Li     td.start(); \
235*bf2c3715SXin Li     for (int i=0; i<iters; ++i) { ad += NRM(vd); } \
236*bf2c3715SXin Li     td.stop(); \
237*bf2c3715SXin Li   } \
238*bf2c3715SXin Li   /*for (int k=0; k<std::max(1,tries/3); ++k) { \
239*bf2c3715SXin Li     tcf.start(); \
240*bf2c3715SXin Li     for (int i=0; i<iters; ++i) { ac += NRM(vcf); } \
241*bf2c3715SXin Li     tcf.stop(); \
242*bf2c3715SXin Li   } */\
243*bf2c3715SXin Li   std::cout << #NRM << "\t" << tf.value() << "   " << td.value() <<  "    " << tcf.value() << "\n"; \
244*bf2c3715SXin Li }
245*bf2c3715SXin Li 
check_accuracy(double basef,double based,int s)246*bf2c3715SXin Li void check_accuracy(double basef, double based, int s)
247*bf2c3715SXin Li {
248*bf2c3715SXin Li   double yf = basef * std::abs(internal::random<double>());
249*bf2c3715SXin Li   double yd = based * std::abs(internal::random<double>());
250*bf2c3715SXin Li   VectorXf vf = VectorXf::Ones(s) * yf;
251*bf2c3715SXin Li   VectorXd vd = VectorXd::Ones(s) * yd;
252*bf2c3715SXin Li 
253*bf2c3715SXin Li   std::cout << "reference\t" << std::sqrt(double(s))*yf << "\t" << std::sqrt(double(s))*yd << "\n";
254*bf2c3715SXin Li   std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n";
255*bf2c3715SXin Li   std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n";
256*bf2c3715SXin Li   std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n";
257*bf2c3715SXin Li   std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\n";
258*bf2c3715SXin Li   std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\n";
259*bf2c3715SXin Li   std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\n";
260*bf2c3715SXin Li   std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\n";
261*bf2c3715SXin Li }
262*bf2c3715SXin Li 
check_accuracy_var(int ef0,int ef1,int ed0,int ed1,int s)263*bf2c3715SXin Li void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s)
264*bf2c3715SXin Li {
265*bf2c3715SXin Li   VectorXf vf(s);
266*bf2c3715SXin Li   VectorXd vd(s);
267*bf2c3715SXin Li   for (int i=0; i<s; ++i)
268*bf2c3715SXin Li   {
269*bf2c3715SXin Li     vf[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0,ef1));
270*bf2c3715SXin Li     vd[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0,ed1));
271*bf2c3715SXin Li   }
272*bf2c3715SXin Li 
273*bf2c3715SXin Li   //std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
274*bf2c3715SXin Li   std::cout << "sqsumNorm\t"  << sqsumNorm(vf)  << "\t" << sqsumNorm(vd)  << "\t" << sqsumNorm(vf.cast<long double>()) << "\t" << sqsumNorm(vd.cast<long double>()) << "\n";
275*bf2c3715SXin Li   std::cout << "hypotNorm\t"  << hypotNorm(vf)  << "\t" << hypotNorm(vd)  << "\t" << hypotNorm(vf.cast<long double>()) << "\t" << hypotNorm(vd.cast<long double>()) << "\n";
276*bf2c3715SXin Li   std::cout << "blueNorm\t"   << blueNorm(vf)   << "\t" << blueNorm(vd)   << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
277*bf2c3715SXin Li   std::cout << "pblueNorm\t"  << pblueNorm(vf)  << "\t" << pblueNorm(vd)  << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
278*bf2c3715SXin Li   std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast<long double>()) << "\t" << lapackNorm(vd.cast<long double>()) << "\n";
279*bf2c3715SXin Li   std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t" << twopassNorm(vf.cast<long double>()) << "\t" << twopassNorm(vd.cast<long double>()) << "\n";
280*bf2c3715SXin Li //   std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast<long double>()) << "\t" << bl2passNorm(vd.cast<long double>()) << "\n";
281*bf2c3715SXin Li }
282*bf2c3715SXin Li 
main(int argc,char ** argv)283*bf2c3715SXin Li int main(int argc, char** argv)
284*bf2c3715SXin Li {
285*bf2c3715SXin Li   int tries = 10;
286*bf2c3715SXin Li   int iters = 100000;
287*bf2c3715SXin Li   double y = 1.1345743233455785456788e12 * internal::random<double>();
288*bf2c3715SXin Li   VectorXf v = VectorXf::Ones(1024) * y;
289*bf2c3715SXin Li 
290*bf2c3715SXin Li // return 0;
291*bf2c3715SXin Li   int s = 10000;
292*bf2c3715SXin Li   double basef_ok = 1.1345743233455785456788e15;
293*bf2c3715SXin Li   double based_ok = 1.1345743233455785456788e95;
294*bf2c3715SXin Li 
295*bf2c3715SXin Li   double basef_under = 1.1345743233455785456788e-27;
296*bf2c3715SXin Li   double based_under = 1.1345743233455785456788e-303;
297*bf2c3715SXin Li 
298*bf2c3715SXin Li   double basef_over = 1.1345743233455785456788e+27;
299*bf2c3715SXin Li   double based_over = 1.1345743233455785456788e+302;
300*bf2c3715SXin Li 
301*bf2c3715SXin Li   std::cout.precision(20);
302*bf2c3715SXin Li 
303*bf2c3715SXin Li   std::cerr << "\nNo under/overflow:\n";
304*bf2c3715SXin Li   check_accuracy(basef_ok, based_ok, s);
305*bf2c3715SXin Li 
306*bf2c3715SXin Li   std::cerr << "\nUnderflow:\n";
307*bf2c3715SXin Li   check_accuracy(basef_under, based_under, s);
308*bf2c3715SXin Li 
309*bf2c3715SXin Li   std::cerr << "\nOverflow:\n";
310*bf2c3715SXin Li   check_accuracy(basef_over, based_over, s);
311*bf2c3715SXin Li 
312*bf2c3715SXin Li   std::cerr << "\nVarying (over):\n";
313*bf2c3715SXin Li   for (int k=0; k<1; ++k)
314*bf2c3715SXin Li   {
315*bf2c3715SXin Li     check_accuracy_var(20,27,190,302,s);
316*bf2c3715SXin Li     std::cout << "\n";
317*bf2c3715SXin Li   }
318*bf2c3715SXin Li 
319*bf2c3715SXin Li   std::cerr << "\nVarying (under):\n";
320*bf2c3715SXin Li   for (int k=0; k<1; ++k)
321*bf2c3715SXin Li   {
322*bf2c3715SXin Li     check_accuracy_var(-27,20,-302,-190,s);
323*bf2c3715SXin Li     std::cout << "\n";
324*bf2c3715SXin Li   }
325*bf2c3715SXin Li 
326*bf2c3715SXin Li   y = 1;
327*bf2c3715SXin Li   std::cout.precision(4);
328*bf2c3715SXin Li   int s1 = 1024*1024*32;
329*bf2c3715SXin Li   std::cerr << "Performance (out of cache, " << s1 << "):\n";
330*bf2c3715SXin Li   {
331*bf2c3715SXin Li     int iters = 1;
332*bf2c3715SXin Li     VectorXf vf = VectorXf::Random(s1) * y;
333*bf2c3715SXin Li     VectorXd vd = VectorXd::Random(s1) * y;
334*bf2c3715SXin Li     VectorXcf vcf = VectorXcf::Random(s1) * y;
335*bf2c3715SXin Li     BENCH_PERF(sqsumNorm);
336*bf2c3715SXin Li     BENCH_PERF(stableNorm);
337*bf2c3715SXin Li     BENCH_PERF(blueNorm);
338*bf2c3715SXin Li     BENCH_PERF(pblueNorm);
339*bf2c3715SXin Li     BENCH_PERF(lapackNorm);
340*bf2c3715SXin Li     BENCH_PERF(hypotNorm);
341*bf2c3715SXin Li     BENCH_PERF(twopassNorm);
342*bf2c3715SXin Li     BENCH_PERF(bl2passNorm);
343*bf2c3715SXin Li   }
344*bf2c3715SXin Li 
345*bf2c3715SXin Li   std::cerr << "\nPerformance (in cache, " << 512 << "):\n";
346*bf2c3715SXin Li   {
347*bf2c3715SXin Li     int iters = 100000;
348*bf2c3715SXin Li     VectorXf vf = VectorXf::Random(512) * y;
349*bf2c3715SXin Li     VectorXd vd = VectorXd::Random(512) * y;
350*bf2c3715SXin Li     VectorXcf vcf = VectorXcf::Random(512) * y;
351*bf2c3715SXin Li     BENCH_PERF(sqsumNorm);
352*bf2c3715SXin Li     BENCH_PERF(stableNorm);
353*bf2c3715SXin Li     BENCH_PERF(blueNorm);
354*bf2c3715SXin Li     BENCH_PERF(pblueNorm);
355*bf2c3715SXin Li     BENCH_PERF(lapackNorm);
356*bf2c3715SXin Li     BENCH_PERF(hypotNorm);
357*bf2c3715SXin Li     BENCH_PERF(twopassNorm);
358*bf2c3715SXin Li     BENCH_PERF(bl2passNorm);
359*bf2c3715SXin Li   }
360*bf2c3715SXin Li }
361