xref: /aosp_15_r20/external/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h (revision bf2c37156dfe67e5dfebd6d394bad8b2ab5804d4)
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2009 Gael Guennebaud <[email protected]>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
11 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
12 
13 namespace Eigen {
14 
15 namespace internal {
16 
17 // pack a selfadjoint block diagonal for use with the gebp_kernel
18 template<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
19 struct symm_pack_lhs
20 {
21   template<int BlockRows> inline
packsymm_pack_lhs22   void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
23   {
24     // normal copy
25     for(Index k=0; k<i; k++)
26       for(Index w=0; w<BlockRows; w++)
27         blockA[count++] = lhs(i+w,k);           // normal
28     // symmetric copy
29     Index h = 0;
30     for(Index k=i; k<i+BlockRows; k++)
31     {
32       for(Index w=0; w<h; w++)
33         blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
34 
35       blockA[count++] = numext::real(lhs(k,k));   // real (diagonal)
36 
37       for(Index w=h+1; w<BlockRows; w++)
38         blockA[count++] = lhs(i+w, k);          // normal
39       ++h;
40     }
41     // transposed copy
42     for(Index k=i+BlockRows; k<cols; k++)
43       for(Index w=0; w<BlockRows; w++)
44         blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
45   }
operatorsymm_pack_lhs46   void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
47   {
48     typedef typename unpacket_traits<typename packet_traits<Scalar>::type>::half HalfPacket;
49     typedef typename unpacket_traits<typename unpacket_traits<typename packet_traits<Scalar>::type>::half>::half QuarterPacket;
50     enum { PacketSize = packet_traits<Scalar>::size,
51            HalfPacketSize = unpacket_traits<HalfPacket>::size,
52            QuarterPacketSize = unpacket_traits<QuarterPacket>::size,
53            HasHalf = (int)HalfPacketSize < (int)PacketSize,
54            HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize};
55 
56     const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
57     Index count = 0;
58     //Index peeled_mc3 = (rows/Pack1)*Pack1;
59 
60     const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
61     const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
62     const Index peeled_mc1 = Pack1>=1*PacketSize ? peeled_mc2+((rows-peeled_mc2)/(1*PacketSize))*(1*PacketSize) : 0;
63     const Index peeled_mc_half = Pack1>=HalfPacketSize ? peeled_mc1+((rows-peeled_mc1)/(HalfPacketSize))*(HalfPacketSize) : 0;
64     const Index peeled_mc_quarter = Pack1>=QuarterPacketSize ? peeled_mc_half+((rows-peeled_mc_half)/(QuarterPacketSize))*(QuarterPacketSize) : 0;
65 
66     if(Pack1>=3*PacketSize)
67       for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
68         pack<3*PacketSize>(blockA, lhs, cols, i, count);
69 
70     if(Pack1>=2*PacketSize)
71       for(Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize)
72         pack<2*PacketSize>(blockA, lhs, cols, i, count);
73 
74     if(Pack1>=1*PacketSize)
75       for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
76         pack<1*PacketSize>(blockA, lhs, cols, i, count);
77 
78     if(HasHalf && Pack1>=HalfPacketSize)
79       for(Index i=peeled_mc1; i<peeled_mc_half; i+=HalfPacketSize)
80         pack<HalfPacketSize>(blockA, lhs, cols, i, count);
81 
82     if(HasQuarter && Pack1>=QuarterPacketSize)
83       for(Index i=peeled_mc_half; i<peeled_mc_quarter; i+=QuarterPacketSize)
84         pack<QuarterPacketSize>(blockA, lhs, cols, i, count);
85 
86     // do the same with mr==1
87     for(Index i=peeled_mc_quarter; i<rows; i++)
88     {
89       for(Index k=0; k<i; k++)
90         blockA[count++] = lhs(i, k);                   // normal
91 
92       blockA[count++] = numext::real(lhs(i, i));       // real (diagonal)
93 
94       for(Index k=i+1; k<cols; k++)
95         blockA[count++] = numext::conj(lhs(k, i));     // transposed
96     }
97   }
98 };
99 
100 template<typename Scalar, typename Index, int nr, int StorageOrder>
101 struct symm_pack_rhs
102 {
103   enum { PacketSize = packet_traits<Scalar>::size };
operatorsymm_pack_rhs104   void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
105   {
106     Index end_k = k2 + rows;
107     Index count = 0;
108     const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
109     Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
110     Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
111 
112     // first part: normal case
113     for(Index j2=0; j2<k2; j2+=nr)
114     {
115       for(Index k=k2; k<end_k; k++)
116       {
117         blockB[count+0] = rhs(k,j2+0);
118         blockB[count+1] = rhs(k,j2+1);
119         if (nr>=4)
120         {
121           blockB[count+2] = rhs(k,j2+2);
122           blockB[count+3] = rhs(k,j2+3);
123         }
124         if (nr>=8)
125         {
126           blockB[count+4] = rhs(k,j2+4);
127           blockB[count+5] = rhs(k,j2+5);
128           blockB[count+6] = rhs(k,j2+6);
129           blockB[count+7] = rhs(k,j2+7);
130         }
131         count += nr;
132       }
133     }
134 
135     // second part: diagonal block
136     Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2;
137     if(nr>=8)
138     {
139       for(Index j2=k2; j2<end8; j2+=8)
140       {
141         // again we can split vertically in three different parts (transpose, symmetric, normal)
142         // transpose
143         for(Index k=k2; k<j2; k++)
144         {
145           blockB[count+0] = numext::conj(rhs(j2+0,k));
146           blockB[count+1] = numext::conj(rhs(j2+1,k));
147           blockB[count+2] = numext::conj(rhs(j2+2,k));
148           blockB[count+3] = numext::conj(rhs(j2+3,k));
149           blockB[count+4] = numext::conj(rhs(j2+4,k));
150           blockB[count+5] = numext::conj(rhs(j2+5,k));
151           blockB[count+6] = numext::conj(rhs(j2+6,k));
152           blockB[count+7] = numext::conj(rhs(j2+7,k));
153           count += 8;
154         }
155         // symmetric
156         Index h = 0;
157         for(Index k=j2; k<j2+8; k++)
158         {
159           // normal
160           for (Index w=0 ; w<h; ++w)
161             blockB[count+w] = rhs(k,j2+w);
162 
163           blockB[count+h] = numext::real(rhs(k,k));
164 
165           // transpose
166           for (Index w=h+1 ; w<8; ++w)
167             blockB[count+w] = numext::conj(rhs(j2+w,k));
168           count += 8;
169           ++h;
170         }
171         // normal
172         for(Index k=j2+8; k<end_k; k++)
173         {
174           blockB[count+0] = rhs(k,j2+0);
175           blockB[count+1] = rhs(k,j2+1);
176           blockB[count+2] = rhs(k,j2+2);
177           blockB[count+3] = rhs(k,j2+3);
178           blockB[count+4] = rhs(k,j2+4);
179           blockB[count+5] = rhs(k,j2+5);
180           blockB[count+6] = rhs(k,j2+6);
181           blockB[count+7] = rhs(k,j2+7);
182           count += 8;
183         }
184       }
185     }
186     if(nr>=4)
187     {
188       for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4)
189       {
190         // again we can split vertically in three different parts (transpose, symmetric, normal)
191         // transpose
192         for(Index k=k2; k<j2; k++)
193         {
194           blockB[count+0] = numext::conj(rhs(j2+0,k));
195           blockB[count+1] = numext::conj(rhs(j2+1,k));
196           blockB[count+2] = numext::conj(rhs(j2+2,k));
197           blockB[count+3] = numext::conj(rhs(j2+3,k));
198           count += 4;
199         }
200         // symmetric
201         Index h = 0;
202         for(Index k=j2; k<j2+4; k++)
203         {
204           // normal
205           for (Index w=0 ; w<h; ++w)
206             blockB[count+w] = rhs(k,j2+w);
207 
208           blockB[count+h] = numext::real(rhs(k,k));
209 
210           // transpose
211           for (Index w=h+1 ; w<4; ++w)
212             blockB[count+w] = numext::conj(rhs(j2+w,k));
213           count += 4;
214           ++h;
215         }
216         // normal
217         for(Index k=j2+4; k<end_k; k++)
218         {
219           blockB[count+0] = rhs(k,j2+0);
220           blockB[count+1] = rhs(k,j2+1);
221           blockB[count+2] = rhs(k,j2+2);
222           blockB[count+3] = rhs(k,j2+3);
223           count += 4;
224         }
225       }
226     }
227 
228     // third part: transposed
229     if(nr>=8)
230     {
231       for(Index j2=k2+rows; j2<packet_cols8; j2+=8)
232       {
233         for(Index k=k2; k<end_k; k++)
234         {
235           blockB[count+0] = numext::conj(rhs(j2+0,k));
236           blockB[count+1] = numext::conj(rhs(j2+1,k));
237           blockB[count+2] = numext::conj(rhs(j2+2,k));
238           blockB[count+3] = numext::conj(rhs(j2+3,k));
239           blockB[count+4] = numext::conj(rhs(j2+4,k));
240           blockB[count+5] = numext::conj(rhs(j2+5,k));
241           blockB[count+6] = numext::conj(rhs(j2+6,k));
242           blockB[count+7] = numext::conj(rhs(j2+7,k));
243           count += 8;
244         }
245       }
246     }
247     if(nr>=4)
248     {
249       for(Index j2=(std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4)
250       {
251         for(Index k=k2; k<end_k; k++)
252         {
253           blockB[count+0] = numext::conj(rhs(j2+0,k));
254           blockB[count+1] = numext::conj(rhs(j2+1,k));
255           blockB[count+2] = numext::conj(rhs(j2+2,k));
256           blockB[count+3] = numext::conj(rhs(j2+3,k));
257           count += 4;
258         }
259       }
260     }
261 
262     // copy the remaining columns one at a time (=> the same with nr==1)
263     for(Index j2=packet_cols4; j2<cols; ++j2)
264     {
265       // transpose
266       Index half = (std::min)(end_k,j2);
267       for(Index k=k2; k<half; k++)
268       {
269         blockB[count] = numext::conj(rhs(j2,k));
270         count += 1;
271       }
272 
273       if(half==j2 && half<k2+rows)
274       {
275         blockB[count] = numext::real(rhs(j2,j2));
276         count += 1;
277       }
278       else
279         half--;
280 
281       // normal
282       for(Index k=half+1; k<k2+rows; k++)
283       {
284         blockB[count] = rhs(k,j2);
285         count += 1;
286       }
287     }
288   }
289 };
290 
291 /* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of
292  * the general matrix matrix product.
293  */
294 template <typename Scalar, typename Index,
295           int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
296           int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
297           int ResStorageOrder, int ResInnerStride>
298 struct product_selfadjoint_matrix;
299 
300 template <typename Scalar, typename Index,
301           int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
302           int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
303           int ResInnerStride>
304 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor,ResInnerStride>
305 {
306 
307   static EIGEN_STRONG_INLINE void run(
308     Index rows, Index cols,
309     const Scalar* lhs, Index lhsStride,
310     const Scalar* rhs, Index rhsStride,
311     Scalar* res,       Index resIncr, Index resStride,
312     const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
313   {
314     product_selfadjoint_matrix<Scalar, Index,
315       EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
316       RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
317       EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
318       LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
319       ColMajor,ResInnerStride>
320       ::run(cols, rows,  rhs, rhsStride,  lhs, lhsStride,  res, resIncr, resStride,  alpha, blocking);
321   }
322 };
323 
324 template <typename Scalar, typename Index,
325           int LhsStorageOrder, bool ConjugateLhs,
326           int RhsStorageOrder, bool ConjugateRhs,
327           int ResInnerStride>
328 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>
329 {
330 
331   static EIGEN_DONT_INLINE void run(
332     Index rows, Index cols,
333     const Scalar* _lhs, Index lhsStride,
334     const Scalar* _rhs, Index rhsStride,
335     Scalar* res,        Index resIncr, Index resStride,
336     const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
337 };
338 
339 template <typename Scalar, typename Index,
340           int LhsStorageOrder, bool ConjugateLhs,
341           int RhsStorageOrder, bool ConjugateRhs,
342           int ResInnerStride>
343 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>::run(
344     Index rows, Index cols,
345     const Scalar* _lhs, Index lhsStride,
346     const Scalar* _rhs, Index rhsStride,
347     Scalar* _res,       Index resIncr, Index resStride,
348     const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
349   {
350     Index size = rows;
351 
352     typedef gebp_traits<Scalar,Scalar> Traits;
353 
354     typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
355     typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
356     typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
357     typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
358     LhsMapper lhs(_lhs,lhsStride);
359     LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
360     RhsMapper rhs(_rhs,rhsStride);
361     ResMapper res(_res, resStride, resIncr);
362 
363     Index kc = blocking.kc();                   // cache block size along the K direction
364     Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
365     // kc must be smaller than mc
366     kc = (std::min)(kc,mc);
367     std::size_t sizeA = kc*mc;
368     std::size_t sizeB = kc*cols;
369     ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
370     ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
371 
372     gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
373     symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
374     gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
375     gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
376 
377     for(Index k2=0; k2<size; k2+=kc)
378     {
379       const Index actual_kc = (std::min)(k2+kc,size)-k2;
380 
381       // we have selected one row panel of rhs and one column panel of lhs
382       // pack rhs's panel into a sequential chunk of memory
383       // and expand each coeff to a constant packet for further reuse
384       pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, cols);
385 
386       // the select lhs's panel has to be split in three different parts:
387       //  1 - the transposed panel above the diagonal block => transposed packed copy
388       //  2 - the diagonal block => special packed copy
389       //  3 - the panel below the diagonal block => generic packed copy
390       for(Index i2=0; i2<k2; i2+=mc)
391       {
392         const Index actual_mc = (std::min)(i2+mc,k2)-i2;
393         // transposed packed copy
394         pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
395 
396         gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
397       }
398       // the block diagonal
399       {
400         const Index actual_mc = (std::min)(k2+kc,size)-k2;
401         // symmetric packed copy
402         pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
403 
404         gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
405       }
406 
407       for(Index i2=k2+kc; i2<size; i2+=mc)
408       {
409         const Index actual_mc = (std::min)(i2+mc,size)-i2;
410         gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder,false>()
411           (blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
412 
413         gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
414       }
415     }
416   }
417 
418 // matrix * selfadjoint product
419 template <typename Scalar, typename Index,
420           int LhsStorageOrder, bool ConjugateLhs,
421           int RhsStorageOrder, bool ConjugateRhs,
422           int ResInnerStride>
423 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>
424 {
425 
426   static EIGEN_DONT_INLINE void run(
427     Index rows, Index cols,
428     const Scalar* _lhs, Index lhsStride,
429     const Scalar* _rhs, Index rhsStride,
430     Scalar* res,        Index resIncr, Index resStride,
431     const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
432 };
433 
434 template <typename Scalar, typename Index,
435           int LhsStorageOrder, bool ConjugateLhs,
436           int RhsStorageOrder, bool ConjugateRhs,
437           int ResInnerStride>
438 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>::run(
439     Index rows, Index cols,
440     const Scalar* _lhs, Index lhsStride,
441     const Scalar* _rhs, Index rhsStride,
442     Scalar* _res,       Index resIncr, Index resStride,
443     const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
444   {
445     Index size = cols;
446 
447     typedef gebp_traits<Scalar,Scalar> Traits;
448 
449     typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
450     typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
451     LhsMapper lhs(_lhs,lhsStride);
452     ResMapper res(_res,resStride, resIncr);
453 
454     Index kc = blocking.kc();                   // cache block size along the K direction
455     Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
456     std::size_t sizeA = kc*mc;
457     std::size_t sizeB = kc*cols;
458     ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
459     ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
460 
461     gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
462     gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs;
463     symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
464 
465     for(Index k2=0; k2<size; k2+=kc)
466     {
467       const Index actual_kc = (std::min)(k2+kc,size)-k2;
468 
469       pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
470 
471       // => GEPP
472       for(Index i2=0; i2<rows; i2+=mc)
473       {
474         const Index actual_mc = (std::min)(i2+mc,rows)-i2;
475         pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
476 
477         gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
478       }
479     }
480   }
481 
482 } // end namespace internal
483 
484 /***************************************************************************
485 * Wrapper to product_selfadjoint_matrix
486 ***************************************************************************/
487 
488 namespace internal {
489 
490 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
491 struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
492 {
493   typedef typename Product<Lhs,Rhs>::Scalar Scalar;
494 
495   typedef internal::blas_traits<Lhs> LhsBlasTraits;
496   typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
497   typedef internal::blas_traits<Rhs> RhsBlasTraits;
498   typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
499 
500   enum {
501     LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
502     LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
503     RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
504     RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
505   };
506 
507   template<typename Dest>
508   static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
509   {
510     eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
511 
512     typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
513     typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
514 
515     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
516                                * RhsBlasTraits::extractScalarFactor(a_rhs);
517 
518     typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
519               Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType;
520 
521     BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
522 
523     internal::product_selfadjoint_matrix<Scalar, Index,
524       EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
525       NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
526       EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
527       NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
528       internal::traits<Dest>::Flags&RowMajorBit  ? RowMajor : ColMajor,
529       Dest::InnerStrideAtCompileTime>
530       ::run(
531         lhs.rows(), rhs.cols(),                 // sizes
532         &lhs.coeffRef(0,0), lhs.outerStride(),  // lhs info
533         &rhs.coeffRef(0,0), rhs.outerStride(),  // rhs info
534         &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(),  // result info
535         actualAlpha, blocking                   // alpha
536       );
537   }
538 };
539 
540 } // end namespace internal
541 
542 } // end namespace Eigen
543 
544 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
545