1*bf2c3715SXin Li // g++ -DNDEBUG -O3 -I.. benchCholesky.cpp -o benchCholesky && ./benchCholesky
2*bf2c3715SXin Li // options:
3*bf2c3715SXin Li // -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
4*bf2c3715SXin Li // -DEIGEN_DONT_VECTORIZE
5*bf2c3715SXin Li // -msse2
6*bf2c3715SXin Li // -DREPEAT=100
7*bf2c3715SXin Li // -DTRIES=10
8*bf2c3715SXin Li // -DSCALAR=double
9*bf2c3715SXin Li
10*bf2c3715SXin Li #include <iostream>
11*bf2c3715SXin Li
12*bf2c3715SXin Li #include <Eigen/Core>
13*bf2c3715SXin Li #include <Eigen/Cholesky>
14*bf2c3715SXin Li #include <bench/BenchUtil.h>
15*bf2c3715SXin Li using namespace Eigen;
16*bf2c3715SXin Li
17*bf2c3715SXin Li #ifndef REPEAT
18*bf2c3715SXin Li #define REPEAT 10000
19*bf2c3715SXin Li #endif
20*bf2c3715SXin Li
21*bf2c3715SXin Li #ifndef TRIES
22*bf2c3715SXin Li #define TRIES 10
23*bf2c3715SXin Li #endif
24*bf2c3715SXin Li
25*bf2c3715SXin Li typedef float Scalar;
26*bf2c3715SXin Li
27*bf2c3715SXin Li template <typename MatrixType>
benchLLT(const MatrixType & m)28*bf2c3715SXin Li __attribute__ ((noinline)) void benchLLT(const MatrixType& m)
29*bf2c3715SXin Li {
30*bf2c3715SXin Li int rows = m.rows();
31*bf2c3715SXin Li int cols = m.cols();
32*bf2c3715SXin Li
33*bf2c3715SXin Li double cost = 0;
34*bf2c3715SXin Li for (int j=0; j<rows; ++j)
35*bf2c3715SXin Li {
36*bf2c3715SXin Li int r = std::max(rows - j -1,0);
37*bf2c3715SXin Li cost += 2*(r*j+r+j);
38*bf2c3715SXin Li }
39*bf2c3715SXin Li
40*bf2c3715SXin Li int repeats = (REPEAT*1000)/(rows*rows);
41*bf2c3715SXin Li
42*bf2c3715SXin Li typedef typename MatrixType::Scalar Scalar;
43*bf2c3715SXin Li typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
44*bf2c3715SXin Li
45*bf2c3715SXin Li MatrixType a = MatrixType::Random(rows,cols);
46*bf2c3715SXin Li SquareMatrixType covMat = a * a.adjoint();
47*bf2c3715SXin Li
48*bf2c3715SXin Li BenchTimer timerNoSqrt, timerSqrt;
49*bf2c3715SXin Li
50*bf2c3715SXin Li Scalar acc = 0;
51*bf2c3715SXin Li int r = internal::random<int>(0,covMat.rows()-1);
52*bf2c3715SXin Li int c = internal::random<int>(0,covMat.cols()-1);
53*bf2c3715SXin Li for (int t=0; t<TRIES; ++t)
54*bf2c3715SXin Li {
55*bf2c3715SXin Li timerNoSqrt.start();
56*bf2c3715SXin Li for (int k=0; k<repeats; ++k)
57*bf2c3715SXin Li {
58*bf2c3715SXin Li LDLT<SquareMatrixType> cholnosqrt(covMat);
59*bf2c3715SXin Li acc += cholnosqrt.matrixL().coeff(r,c);
60*bf2c3715SXin Li }
61*bf2c3715SXin Li timerNoSqrt.stop();
62*bf2c3715SXin Li }
63*bf2c3715SXin Li
64*bf2c3715SXin Li for (int t=0; t<TRIES; ++t)
65*bf2c3715SXin Li {
66*bf2c3715SXin Li timerSqrt.start();
67*bf2c3715SXin Li for (int k=0; k<repeats; ++k)
68*bf2c3715SXin Li {
69*bf2c3715SXin Li LLT<SquareMatrixType> chol(covMat);
70*bf2c3715SXin Li acc += chol.matrixL().coeff(r,c);
71*bf2c3715SXin Li }
72*bf2c3715SXin Li timerSqrt.stop();
73*bf2c3715SXin Li }
74*bf2c3715SXin Li
75*bf2c3715SXin Li if (MatrixType::RowsAtCompileTime==Dynamic)
76*bf2c3715SXin Li std::cout << "dyn ";
77*bf2c3715SXin Li else
78*bf2c3715SXin Li std::cout << "fixed ";
79*bf2c3715SXin Li std::cout << covMat.rows() << " \t"
80*bf2c3715SXin Li << (timerNoSqrt.best()) / repeats << "s "
81*bf2c3715SXin Li << "(" << 1e-9 * cost*repeats/timerNoSqrt.best() << " GFLOPS)\t"
82*bf2c3715SXin Li << (timerSqrt.best()) / repeats << "s "
83*bf2c3715SXin Li << "(" << 1e-9 * cost*repeats/timerSqrt.best() << " GFLOPS)\n";
84*bf2c3715SXin Li
85*bf2c3715SXin Li
86*bf2c3715SXin Li #ifdef BENCH_GSL
87*bf2c3715SXin Li if (MatrixType::RowsAtCompileTime==Dynamic)
88*bf2c3715SXin Li {
89*bf2c3715SXin Li timerSqrt.reset();
90*bf2c3715SXin Li
91*bf2c3715SXin Li gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
92*bf2c3715SXin Li gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
93*bf2c3715SXin Li
94*bf2c3715SXin Li eiToGsl(covMat, &gslCovMat);
95*bf2c3715SXin Li for (int t=0; t<TRIES; ++t)
96*bf2c3715SXin Li {
97*bf2c3715SXin Li timerSqrt.start();
98*bf2c3715SXin Li for (int k=0; k<repeats; ++k)
99*bf2c3715SXin Li {
100*bf2c3715SXin Li gsl_matrix_memcpy(gslCopy,gslCovMat);
101*bf2c3715SXin Li gsl_linalg_cholesky_decomp(gslCopy);
102*bf2c3715SXin Li acc += gsl_matrix_get(gslCopy,r,c);
103*bf2c3715SXin Li }
104*bf2c3715SXin Li timerSqrt.stop();
105*bf2c3715SXin Li }
106*bf2c3715SXin Li
107*bf2c3715SXin Li std::cout << " | \t"
108*bf2c3715SXin Li << timerSqrt.value() * REPEAT / repeats << "s";
109*bf2c3715SXin Li
110*bf2c3715SXin Li gsl_matrix_free(gslCovMat);
111*bf2c3715SXin Li }
112*bf2c3715SXin Li #endif
113*bf2c3715SXin Li std::cout << "\n";
114*bf2c3715SXin Li // make sure the compiler does not optimize too much
115*bf2c3715SXin Li if (acc==123)
116*bf2c3715SXin Li std::cout << acc;
117*bf2c3715SXin Li }
118*bf2c3715SXin Li
main(int argc,char * argv[])119*bf2c3715SXin Li int main(int argc, char* argv[])
120*bf2c3715SXin Li {
121*bf2c3715SXin Li const int dynsizes[] = {4,6,8,16,24,32,49,64,128,256,512,900,1500,0};
122*bf2c3715SXin Li std::cout << "size LDLT LLT";
123*bf2c3715SXin Li // #ifdef BENCH_GSL
124*bf2c3715SXin Li // std::cout << " GSL (standard + double + ATLAS) ";
125*bf2c3715SXin Li // #endif
126*bf2c3715SXin Li std::cout << "\n";
127*bf2c3715SXin Li for (int i=0; dynsizes[i]>0; ++i)
128*bf2c3715SXin Li benchLLT(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
129*bf2c3715SXin Li
130*bf2c3715SXin Li benchLLT(Matrix<Scalar,2,2>());
131*bf2c3715SXin Li benchLLT(Matrix<Scalar,3,3>());
132*bf2c3715SXin Li benchLLT(Matrix<Scalar,4,4>());
133*bf2c3715SXin Li benchLLT(Matrix<Scalar,5,5>());
134*bf2c3715SXin Li benchLLT(Matrix<Scalar,6,6>());
135*bf2c3715SXin Li benchLLT(Matrix<Scalar,7,7>());
136*bf2c3715SXin Li benchLLT(Matrix<Scalar,8,8>());
137*bf2c3715SXin Li benchLLT(Matrix<Scalar,12,12>());
138*bf2c3715SXin Li benchLLT(Matrix<Scalar,16,16>());
139*bf2c3715SXin Li return 0;
140*bf2c3715SXin Li }
141*bf2c3715SXin Li
142