xref: /aosp_15_r20/external/eigen/bench/benchBlasGemm.cpp (revision bf2c37156dfe67e5dfebd6d394bad8b2ab5804d4)
1*bf2c3715SXin Li // g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
2*bf2c3715SXin Li // possible options:
3*bf2c3715SXin Li //    -DEIGEN_DONT_VECTORIZE
4*bf2c3715SXin Li //    -msse2
5*bf2c3715SXin Li 
6*bf2c3715SXin Li // #define EIGEN_DEFAULT_TO_ROW_MAJOR
7*bf2c3715SXin Li #define _FLOAT
8*bf2c3715SXin Li 
9*bf2c3715SXin Li #include <iostream>
10*bf2c3715SXin Li 
11*bf2c3715SXin Li #include <Eigen/Core>
12*bf2c3715SXin Li #include "BenchTimer.h"
13*bf2c3715SXin Li 
14*bf2c3715SXin Li // include the BLAS headers
15*bf2c3715SXin Li extern "C" {
16*bf2c3715SXin Li #include <cblas.h>
17*bf2c3715SXin Li }
18*bf2c3715SXin Li #include <string>
19*bf2c3715SXin Li 
20*bf2c3715SXin Li #ifdef _FLOAT
21*bf2c3715SXin Li typedef float Scalar;
22*bf2c3715SXin Li #define CBLAS_GEMM cblas_sgemm
23*bf2c3715SXin Li #else
24*bf2c3715SXin Li typedef double Scalar;
25*bf2c3715SXin Li #define CBLAS_GEMM cblas_dgemm
26*bf2c3715SXin Li #endif
27*bf2c3715SXin Li 
28*bf2c3715SXin Li 
29*bf2c3715SXin Li typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix;
30*bf2c3715SXin Li void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
31*bf2c3715SXin Li void check_product(int M, int N, int K);
32*bf2c3715SXin Li void check_product(void);
33*bf2c3715SXin Li 
main(int argc,char * argv[])34*bf2c3715SXin Li int main(int argc, char *argv[])
35*bf2c3715SXin Li {
36*bf2c3715SXin Li   // disable SSE exceptions
37*bf2c3715SXin Li   #ifdef __GNUC__
38*bf2c3715SXin Li   {
39*bf2c3715SXin Li     int aux;
40*bf2c3715SXin Li     asm(
41*bf2c3715SXin Li     "stmxcsr   %[aux]           \n\t"
42*bf2c3715SXin Li     "orl       $32832, %[aux]   \n\t"
43*bf2c3715SXin Li     "ldmxcsr   %[aux]           \n\t"
44*bf2c3715SXin Li     : : [aux] "m" (aux));
45*bf2c3715SXin Li   }
46*bf2c3715SXin Li   #endif
47*bf2c3715SXin Li 
48*bf2c3715SXin Li   int nbtries=1, nbloops=1, M, N, K;
49*bf2c3715SXin Li 
50*bf2c3715SXin Li   if (argc==2)
51*bf2c3715SXin Li   {
52*bf2c3715SXin Li     if (std::string(argv[1])=="check")
53*bf2c3715SXin Li       check_product();
54*bf2c3715SXin Li     else
55*bf2c3715SXin Li       M = N = K = atoi(argv[1]);
56*bf2c3715SXin Li   }
57*bf2c3715SXin Li   else if ((argc==3) && (std::string(argv[1])=="auto"))
58*bf2c3715SXin Li   {
59*bf2c3715SXin Li     M = N = K = atoi(argv[2]);
60*bf2c3715SXin Li     nbloops = 1000000000/(M*M*M);
61*bf2c3715SXin Li     if (nbloops<1)
62*bf2c3715SXin Li       nbloops = 1;
63*bf2c3715SXin Li     nbtries = 6;
64*bf2c3715SXin Li   }
65*bf2c3715SXin Li   else if (argc==4)
66*bf2c3715SXin Li   {
67*bf2c3715SXin Li     M = N = K = atoi(argv[1]);
68*bf2c3715SXin Li     nbloops = atoi(argv[2]);
69*bf2c3715SXin Li     nbtries = atoi(argv[3]);
70*bf2c3715SXin Li   }
71*bf2c3715SXin Li   else if (argc==6)
72*bf2c3715SXin Li   {
73*bf2c3715SXin Li     M = atoi(argv[1]);
74*bf2c3715SXin Li     N = atoi(argv[2]);
75*bf2c3715SXin Li     K = atoi(argv[3]);
76*bf2c3715SXin Li     nbloops = atoi(argv[4]);
77*bf2c3715SXin Li     nbtries = atoi(argv[5]);
78*bf2c3715SXin Li   }
79*bf2c3715SXin Li   else
80*bf2c3715SXin Li   {
81*bf2c3715SXin Li     std::cout << "Usage: " << argv[0] << " size  \n";
82*bf2c3715SXin Li     std::cout << "Usage: " << argv[0] << " auto size\n";
83*bf2c3715SXin Li     std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
84*bf2c3715SXin Li     std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
85*bf2c3715SXin Li     std::cout << "Usage: " << argv[0] << " check\n";
86*bf2c3715SXin Li     std::cout << "Options:\n";
87*bf2c3715SXin Li     std::cout << "    size       unique size of the 2 matrices (integer)\n";
88*bf2c3715SXin Li     std::cout << "    auto       automatically set the number of repetitions and tries\n";
89*bf2c3715SXin Li     std::cout << "    nbloops    number of times the GEMM routines is executed\n";
90*bf2c3715SXin Li     std::cout << "    nbtries    number of times the loop is benched (return the best try)\n";
91*bf2c3715SXin Li     std::cout << "    M N K      sizes of the matrices: MxN  =  MxK * KxN (integers)\n";
92*bf2c3715SXin Li     std::cout << "    check      check eigen product using cblas as a reference\n";
93*bf2c3715SXin Li     exit(1);
94*bf2c3715SXin Li   }
95*bf2c3715SXin Li 
96*bf2c3715SXin Li   double nbmad = double(M) * double(N) * double(K) * double(nbloops);
97*bf2c3715SXin Li 
98*bf2c3715SXin Li   if (!(std::string(argv[1])=="auto"))
99*bf2c3715SXin Li     std::cout << M << " x " << N << " x " << K << "\n";
100*bf2c3715SXin Li 
101*bf2c3715SXin Li   Scalar alpha, beta;
102*bf2c3715SXin Li   MyMatrix ma(M,K), mb(K,N), mc(M,N);
103*bf2c3715SXin Li   ma = MyMatrix::Random(M,K);
104*bf2c3715SXin Li   mb = MyMatrix::Random(K,N);
105*bf2c3715SXin Li   mc = MyMatrix::Random(M,N);
106*bf2c3715SXin Li 
107*bf2c3715SXin Li   Eigen::BenchTimer timer;
108*bf2c3715SXin Li 
109*bf2c3715SXin Li   // we simply compute c += a*b, so:
110*bf2c3715SXin Li   alpha = 1;
111*bf2c3715SXin Li   beta = 1;
112*bf2c3715SXin Li 
113*bf2c3715SXin Li   // bench cblas
114*bf2c3715SXin Li   // ROWS_A, COLS_B, COLS_A, 1.0,  A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
115*bf2c3715SXin Li   if (!(std::string(argv[1])=="auto"))
116*bf2c3715SXin Li   {
117*bf2c3715SXin Li     timer.reset();
118*bf2c3715SXin Li     for (uint k=0 ; k<nbtries ; ++k)
119*bf2c3715SXin Li     {
120*bf2c3715SXin Li         timer.start();
121*bf2c3715SXin Li         for (uint j=0 ; j<nbloops ; ++j)
122*bf2c3715SXin Li               #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
123*bf2c3715SXin Li               CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
124*bf2c3715SXin Li               #else
125*bf2c3715SXin Li               CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
126*bf2c3715SXin Li               #endif
127*bf2c3715SXin Li         timer.stop();
128*bf2c3715SXin Li     }
129*bf2c3715SXin Li     if (!(std::string(argv[1])=="auto"))
130*bf2c3715SXin Li       std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
131*bf2c3715SXin Li     else
132*bf2c3715SXin Li         std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
133*bf2c3715SXin Li   }
134*bf2c3715SXin Li 
135*bf2c3715SXin Li   // clear
136*bf2c3715SXin Li   ma = MyMatrix::Random(M,K);
137*bf2c3715SXin Li   mb = MyMatrix::Random(K,N);
138*bf2c3715SXin Li   mc = MyMatrix::Random(M,N);
139*bf2c3715SXin Li 
140*bf2c3715SXin Li   // eigen
141*bf2c3715SXin Li //   if (!(std::string(argv[1])=="auto"))
142*bf2c3715SXin Li   {
143*bf2c3715SXin Li       timer.reset();
144*bf2c3715SXin Li       for (uint k=0 ; k<nbtries ; ++k)
145*bf2c3715SXin Li       {
146*bf2c3715SXin Li           timer.start();
147*bf2c3715SXin Li           bench_eigengemm(mc, ma, mb, nbloops);
148*bf2c3715SXin Li           timer.stop();
149*bf2c3715SXin Li       }
150*bf2c3715SXin Li       if (!(std::string(argv[1])=="auto"))
151*bf2c3715SXin Li         std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
152*bf2c3715SXin Li       else
153*bf2c3715SXin Li         std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
154*bf2c3715SXin Li   }
155*bf2c3715SXin Li 
156*bf2c3715SXin Li   std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
157*bf2c3715SXin Li   std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
158*bf2c3715SXin Li 
159*bf2c3715SXin Li 
160*bf2c3715SXin Li   return 0;
161*bf2c3715SXin Li }
162*bf2c3715SXin Li 
163*bf2c3715SXin Li using namespace Eigen;
164*bf2c3715SXin Li 
bench_eigengemm(MyMatrix & mc,const MyMatrix & ma,const MyMatrix & mb,int nbloops)165*bf2c3715SXin Li void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
166*bf2c3715SXin Li {
167*bf2c3715SXin Li   for (uint j=0 ; j<nbloops ; ++j)
168*bf2c3715SXin Li       mc.noalias() += ma * mb;
169*bf2c3715SXin Li }
170*bf2c3715SXin Li 
171*bf2c3715SXin Li #define MYVERIFY(A,M) if (!(A)) { \
172*bf2c3715SXin Li     std::cout << "FAIL: " << M << "\n"; \
173*bf2c3715SXin Li   }
check_product(int M,int N,int K)174*bf2c3715SXin Li void check_product(int M, int N, int K)
175*bf2c3715SXin Li {
176*bf2c3715SXin Li   MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
177*bf2c3715SXin Li   ma = MyMatrix::Random(M,K);
178*bf2c3715SXin Li   mb = MyMatrix::Random(K,N);
179*bf2c3715SXin Li   maT = ma.transpose();
180*bf2c3715SXin Li   mbT = mb.transpose();
181*bf2c3715SXin Li   mc = MyMatrix::Random(M,N);
182*bf2c3715SXin Li 
183*bf2c3715SXin Li   MyMatrix::Scalar eps = 1e-4;
184*bf2c3715SXin Li 
185*bf2c3715SXin Li   meigen = mref = mc;
186*bf2c3715SXin Li   CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
187*bf2c3715SXin Li   meigen += ma * mb;
188*bf2c3715SXin Li   MYVERIFY(meigen.isApprox(mref, eps),". * .");
189*bf2c3715SXin Li 
190*bf2c3715SXin Li   meigen = mref = mc;
191*bf2c3715SXin Li   CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
192*bf2c3715SXin Li   meigen += maT.transpose() * mb;
193*bf2c3715SXin Li   MYVERIFY(meigen.isApprox(mref, eps),"T * .");
194*bf2c3715SXin Li 
195*bf2c3715SXin Li   meigen = mref = mc;
196*bf2c3715SXin Li   CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
197*bf2c3715SXin Li   meigen += (maT.transpose()) * (mbT.transpose());
198*bf2c3715SXin Li   MYVERIFY(meigen.isApprox(mref, eps),"T * T");
199*bf2c3715SXin Li 
200*bf2c3715SXin Li   meigen = mref = mc;
201*bf2c3715SXin Li   CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
202*bf2c3715SXin Li   meigen += ma * mbT.transpose();
203*bf2c3715SXin Li   MYVERIFY(meigen.isApprox(mref, eps),". * T");
204*bf2c3715SXin Li }
205*bf2c3715SXin Li 
check_product(void)206*bf2c3715SXin Li void check_product(void)
207*bf2c3715SXin Li {
208*bf2c3715SXin Li   int M, N, K;
209*bf2c3715SXin Li   for (uint i=0; i<1000; ++i)
210*bf2c3715SXin Li   {
211*bf2c3715SXin Li     M = internal::random<int>(1,64);
212*bf2c3715SXin Li     N = internal::random<int>(1,768);
213*bf2c3715SXin Li     K = internal::random<int>(1,768);
214*bf2c3715SXin Li     M = (0 + M) * 1;
215*bf2c3715SXin Li     std::cout << M << " x " << N << " x " << K << "\n";
216*bf2c3715SXin Li     check_product(M, N, K);
217*bf2c3715SXin Li   }
218*bf2c3715SXin Li }
219*bf2c3715SXin Li 
220