1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2009 Gael Guennebaud <[email protected]> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H 11 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H 12 13 namespace Eigen { 14 15 namespace internal { 16 17 // pack a selfadjoint block diagonal for use with the gebp_kernel 18 template<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder> 19 struct symm_pack_lhs 20 { 21 template<int BlockRows> inline packsymm_pack_lhs22 void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count) 23 { 24 // normal copy 25 for(Index k=0; k<i; k++) 26 for(Index w=0; w<BlockRows; w++) 27 blockA[count++] = lhs(i+w,k); // normal 28 // symmetric copy 29 Index h = 0; 30 for(Index k=i; k<i+BlockRows; k++) 31 { 32 for(Index w=0; w<h; w++) 33 blockA[count++] = numext::conj(lhs(k, i+w)); // transposed 34 35 blockA[count++] = numext::real(lhs(k,k)); // real (diagonal) 36 37 for(Index w=h+1; w<BlockRows; w++) 38 blockA[count++] = lhs(i+w, k); // normal 39 ++h; 40 } 41 // transposed copy 42 for(Index k=i+BlockRows; k<cols; k++) 43 for(Index w=0; w<BlockRows; w++) 44 blockA[count++] = numext::conj(lhs(k, i+w)); // transposed 45 } operatorsymm_pack_lhs46 void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows) 47 { 48 typedef typename unpacket_traits<typename packet_traits<Scalar>::type>::half HalfPacket; 49 typedef typename unpacket_traits<typename unpacket_traits<typename packet_traits<Scalar>::type>::half>::half QuarterPacket; 50 enum { PacketSize = packet_traits<Scalar>::size, 51 HalfPacketSize = unpacket_traits<HalfPacket>::size, 52 QuarterPacketSize = unpacket_traits<QuarterPacket>::size, 53 HasHalf = (int)HalfPacketSize < (int)PacketSize, 54 HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize}; 55 56 const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride); 57 Index count = 0; 58 //Index peeled_mc3 = (rows/Pack1)*Pack1; 59 60 const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0; 61 const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0; 62 const Index peeled_mc1 = Pack1>=1*PacketSize ? peeled_mc2+((rows-peeled_mc2)/(1*PacketSize))*(1*PacketSize) : 0; 63 const Index peeled_mc_half = Pack1>=HalfPacketSize ? peeled_mc1+((rows-peeled_mc1)/(HalfPacketSize))*(HalfPacketSize) : 0; 64 const Index peeled_mc_quarter = Pack1>=QuarterPacketSize ? peeled_mc_half+((rows-peeled_mc_half)/(QuarterPacketSize))*(QuarterPacketSize) : 0; 65 66 if(Pack1>=3*PacketSize) 67 for(Index i=0; i<peeled_mc3; i+=3*PacketSize) 68 pack<3*PacketSize>(blockA, lhs, cols, i, count); 69 70 if(Pack1>=2*PacketSize) 71 for(Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize) 72 pack<2*PacketSize>(blockA, lhs, cols, i, count); 73 74 if(Pack1>=1*PacketSize) 75 for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize) 76 pack<1*PacketSize>(blockA, lhs, cols, i, count); 77 78 if(HasHalf && Pack1>=HalfPacketSize) 79 for(Index i=peeled_mc1; i<peeled_mc_half; i+=HalfPacketSize) 80 pack<HalfPacketSize>(blockA, lhs, cols, i, count); 81 82 if(HasQuarter && Pack1>=QuarterPacketSize) 83 for(Index i=peeled_mc_half; i<peeled_mc_quarter; i+=QuarterPacketSize) 84 pack<QuarterPacketSize>(blockA, lhs, cols, i, count); 85 86 // do the same with mr==1 87 for(Index i=peeled_mc_quarter; i<rows; i++) 88 { 89 for(Index k=0; k<i; k++) 90 blockA[count++] = lhs(i, k); // normal 91 92 blockA[count++] = numext::real(lhs(i, i)); // real (diagonal) 93 94 for(Index k=i+1; k<cols; k++) 95 blockA[count++] = numext::conj(lhs(k, i)); // transposed 96 } 97 } 98 }; 99 100 template<typename Scalar, typename Index, int nr, int StorageOrder> 101 struct symm_pack_rhs 102 { 103 enum { PacketSize = packet_traits<Scalar>::size }; operatorsymm_pack_rhs104 void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2) 105 { 106 Index end_k = k2 + rows; 107 Index count = 0; 108 const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride); 109 Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0; 110 Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0; 111 112 // first part: normal case 113 for(Index j2=0; j2<k2; j2+=nr) 114 { 115 for(Index k=k2; k<end_k; k++) 116 { 117 blockB[count+0] = rhs(k,j2+0); 118 blockB[count+1] = rhs(k,j2+1); 119 if (nr>=4) 120 { 121 blockB[count+2] = rhs(k,j2+2); 122 blockB[count+3] = rhs(k,j2+3); 123 } 124 if (nr>=8) 125 { 126 blockB[count+4] = rhs(k,j2+4); 127 blockB[count+5] = rhs(k,j2+5); 128 blockB[count+6] = rhs(k,j2+6); 129 blockB[count+7] = rhs(k,j2+7); 130 } 131 count += nr; 132 } 133 } 134 135 // second part: diagonal block 136 Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2; 137 if(nr>=8) 138 { 139 for(Index j2=k2; j2<end8; j2+=8) 140 { 141 // again we can split vertically in three different parts (transpose, symmetric, normal) 142 // transpose 143 for(Index k=k2; k<j2; k++) 144 { 145 blockB[count+0] = numext::conj(rhs(j2+0,k)); 146 blockB[count+1] = numext::conj(rhs(j2+1,k)); 147 blockB[count+2] = numext::conj(rhs(j2+2,k)); 148 blockB[count+3] = numext::conj(rhs(j2+3,k)); 149 blockB[count+4] = numext::conj(rhs(j2+4,k)); 150 blockB[count+5] = numext::conj(rhs(j2+5,k)); 151 blockB[count+6] = numext::conj(rhs(j2+6,k)); 152 blockB[count+7] = numext::conj(rhs(j2+7,k)); 153 count += 8; 154 } 155 // symmetric 156 Index h = 0; 157 for(Index k=j2; k<j2+8; k++) 158 { 159 // normal 160 for (Index w=0 ; w<h; ++w) 161 blockB[count+w] = rhs(k,j2+w); 162 163 blockB[count+h] = numext::real(rhs(k,k)); 164 165 // transpose 166 for (Index w=h+1 ; w<8; ++w) 167 blockB[count+w] = numext::conj(rhs(j2+w,k)); 168 count += 8; 169 ++h; 170 } 171 // normal 172 for(Index k=j2+8; k<end_k; k++) 173 { 174 blockB[count+0] = rhs(k,j2+0); 175 blockB[count+1] = rhs(k,j2+1); 176 blockB[count+2] = rhs(k,j2+2); 177 blockB[count+3] = rhs(k,j2+3); 178 blockB[count+4] = rhs(k,j2+4); 179 blockB[count+5] = rhs(k,j2+5); 180 blockB[count+6] = rhs(k,j2+6); 181 blockB[count+7] = rhs(k,j2+7); 182 count += 8; 183 } 184 } 185 } 186 if(nr>=4) 187 { 188 for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4) 189 { 190 // again we can split vertically in three different parts (transpose, symmetric, normal) 191 // transpose 192 for(Index k=k2; k<j2; k++) 193 { 194 blockB[count+0] = numext::conj(rhs(j2+0,k)); 195 blockB[count+1] = numext::conj(rhs(j2+1,k)); 196 blockB[count+2] = numext::conj(rhs(j2+2,k)); 197 blockB[count+3] = numext::conj(rhs(j2+3,k)); 198 count += 4; 199 } 200 // symmetric 201 Index h = 0; 202 for(Index k=j2; k<j2+4; k++) 203 { 204 // normal 205 for (Index w=0 ; w<h; ++w) 206 blockB[count+w] = rhs(k,j2+w); 207 208 blockB[count+h] = numext::real(rhs(k,k)); 209 210 // transpose 211 for (Index w=h+1 ; w<4; ++w) 212 blockB[count+w] = numext::conj(rhs(j2+w,k)); 213 count += 4; 214 ++h; 215 } 216 // normal 217 for(Index k=j2+4; k<end_k; k++) 218 { 219 blockB[count+0] = rhs(k,j2+0); 220 blockB[count+1] = rhs(k,j2+1); 221 blockB[count+2] = rhs(k,j2+2); 222 blockB[count+3] = rhs(k,j2+3); 223 count += 4; 224 } 225 } 226 } 227 228 // third part: transposed 229 if(nr>=8) 230 { 231 for(Index j2=k2+rows; j2<packet_cols8; j2+=8) 232 { 233 for(Index k=k2; k<end_k; k++) 234 { 235 blockB[count+0] = numext::conj(rhs(j2+0,k)); 236 blockB[count+1] = numext::conj(rhs(j2+1,k)); 237 blockB[count+2] = numext::conj(rhs(j2+2,k)); 238 blockB[count+3] = numext::conj(rhs(j2+3,k)); 239 blockB[count+4] = numext::conj(rhs(j2+4,k)); 240 blockB[count+5] = numext::conj(rhs(j2+5,k)); 241 blockB[count+6] = numext::conj(rhs(j2+6,k)); 242 blockB[count+7] = numext::conj(rhs(j2+7,k)); 243 count += 8; 244 } 245 } 246 } 247 if(nr>=4) 248 { 249 for(Index j2=(std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4) 250 { 251 for(Index k=k2; k<end_k; k++) 252 { 253 blockB[count+0] = numext::conj(rhs(j2+0,k)); 254 blockB[count+1] = numext::conj(rhs(j2+1,k)); 255 blockB[count+2] = numext::conj(rhs(j2+2,k)); 256 blockB[count+3] = numext::conj(rhs(j2+3,k)); 257 count += 4; 258 } 259 } 260 } 261 262 // copy the remaining columns one at a time (=> the same with nr==1) 263 for(Index j2=packet_cols4; j2<cols; ++j2) 264 { 265 // transpose 266 Index half = (std::min)(end_k,j2); 267 for(Index k=k2; k<half; k++) 268 { 269 blockB[count] = numext::conj(rhs(j2,k)); 270 count += 1; 271 } 272 273 if(half==j2 && half<k2+rows) 274 { 275 blockB[count] = numext::real(rhs(j2,j2)); 276 count += 1; 277 } 278 else 279 half--; 280 281 // normal 282 for(Index k=half+1; k<k2+rows; k++) 283 { 284 blockB[count] = rhs(k,j2); 285 count += 1; 286 } 287 } 288 } 289 }; 290 291 /* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of 292 * the general matrix matrix product. 293 */ 294 template <typename Scalar, typename Index, 295 int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs, 296 int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, 297 int ResStorageOrder, int ResInnerStride> 298 struct product_selfadjoint_matrix; 299 300 template <typename Scalar, typename Index, 301 int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs, 302 int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, 303 int ResInnerStride> 304 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor,ResInnerStride> 305 { 306 307 static EIGEN_STRONG_INLINE void run( 308 Index rows, Index cols, 309 const Scalar* lhs, Index lhsStride, 310 const Scalar* rhs, Index rhsStride, 311 Scalar* res, Index resIncr, Index resStride, 312 const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking) 313 { 314 product_selfadjoint_matrix<Scalar, Index, 315 EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor, 316 RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs), 317 EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor, 318 LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs), 319 ColMajor,ResInnerStride> 320 ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking); 321 } 322 }; 323 324 template <typename Scalar, typename Index, 325 int LhsStorageOrder, bool ConjugateLhs, 326 int RhsStorageOrder, bool ConjugateRhs, 327 int ResInnerStride> 328 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride> 329 { 330 331 static EIGEN_DONT_INLINE void run( 332 Index rows, Index cols, 333 const Scalar* _lhs, Index lhsStride, 334 const Scalar* _rhs, Index rhsStride, 335 Scalar* res, Index resIncr, Index resStride, 336 const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking); 337 }; 338 339 template <typename Scalar, typename Index, 340 int LhsStorageOrder, bool ConjugateLhs, 341 int RhsStorageOrder, bool ConjugateRhs, 342 int ResInnerStride> 343 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>::run( 344 Index rows, Index cols, 345 const Scalar* _lhs, Index lhsStride, 346 const Scalar* _rhs, Index rhsStride, 347 Scalar* _res, Index resIncr, Index resStride, 348 const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking) 349 { 350 Index size = rows; 351 352 typedef gebp_traits<Scalar,Scalar> Traits; 353 354 typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper; 355 typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper; 356 typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper; 357 typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper; 358 LhsMapper lhs(_lhs,lhsStride); 359 LhsTransposeMapper lhs_transpose(_lhs,lhsStride); 360 RhsMapper rhs(_rhs,rhsStride); 361 ResMapper res(_res, resStride, resIncr); 362 363 Index kc = blocking.kc(); // cache block size along the K direction 364 Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction 365 // kc must be smaller than mc 366 kc = (std::min)(kc,mc); 367 std::size_t sizeA = kc*mc; 368 std::size_t sizeB = kc*cols; 369 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); 370 ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); 371 372 gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; 373 symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; 374 gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs; 375 gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed; 376 377 for(Index k2=0; k2<size; k2+=kc) 378 { 379 const Index actual_kc = (std::min)(k2+kc,size)-k2; 380 381 // we have selected one row panel of rhs and one column panel of lhs 382 // pack rhs's panel into a sequential chunk of memory 383 // and expand each coeff to a constant packet for further reuse 384 pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, cols); 385 386 // the select lhs's panel has to be split in three different parts: 387 // 1 - the transposed panel above the diagonal block => transposed packed copy 388 // 2 - the diagonal block => special packed copy 389 // 3 - the panel below the diagonal block => generic packed copy 390 for(Index i2=0; i2<k2; i2+=mc) 391 { 392 const Index actual_mc = (std::min)(i2+mc,k2)-i2; 393 // transposed packed copy 394 pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc); 395 396 gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha); 397 } 398 // the block diagonal 399 { 400 const Index actual_mc = (std::min)(k2+kc,size)-k2; 401 // symmetric packed copy 402 pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc); 403 404 gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha); 405 } 406 407 for(Index i2=k2+kc; i2<size; i2+=mc) 408 { 409 const Index actual_mc = (std::min)(i2+mc,size)-i2; 410 gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder,false>() 411 (blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc); 412 413 gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha); 414 } 415 } 416 } 417 418 // matrix * selfadjoint product 419 template <typename Scalar, typename Index, 420 int LhsStorageOrder, bool ConjugateLhs, 421 int RhsStorageOrder, bool ConjugateRhs, 422 int ResInnerStride> 423 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride> 424 { 425 426 static EIGEN_DONT_INLINE void run( 427 Index rows, Index cols, 428 const Scalar* _lhs, Index lhsStride, 429 const Scalar* _rhs, Index rhsStride, 430 Scalar* res, Index resIncr, Index resStride, 431 const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking); 432 }; 433 434 template <typename Scalar, typename Index, 435 int LhsStorageOrder, bool ConjugateLhs, 436 int RhsStorageOrder, bool ConjugateRhs, 437 int ResInnerStride> 438 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>::run( 439 Index rows, Index cols, 440 const Scalar* _lhs, Index lhsStride, 441 const Scalar* _rhs, Index rhsStride, 442 Scalar* _res, Index resIncr, Index resStride, 443 const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking) 444 { 445 Index size = cols; 446 447 typedef gebp_traits<Scalar,Scalar> Traits; 448 449 typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper; 450 typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper; 451 LhsMapper lhs(_lhs,lhsStride); 452 ResMapper res(_res,resStride, resIncr); 453 454 Index kc = blocking.kc(); // cache block size along the K direction 455 Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction 456 std::size_t sizeA = kc*mc; 457 std::size_t sizeB = kc*cols; 458 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); 459 ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); 460 461 gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; 462 gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs; 463 symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; 464 465 for(Index k2=0; k2<size; k2+=kc) 466 { 467 const Index actual_kc = (std::min)(k2+kc,size)-k2; 468 469 pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2); 470 471 // => GEPP 472 for(Index i2=0; i2<rows; i2+=mc) 473 { 474 const Index actual_mc = (std::min)(i2+mc,rows)-i2; 475 pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc); 476 477 gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha); 478 } 479 } 480 } 481 482 } // end namespace internal 483 484 /*************************************************************************** 485 * Wrapper to product_selfadjoint_matrix 486 ***************************************************************************/ 487 488 namespace internal { 489 490 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode> 491 struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false> 492 { 493 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 494 495 typedef internal::blas_traits<Lhs> LhsBlasTraits; 496 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; 497 typedef internal::blas_traits<Rhs> RhsBlasTraits; 498 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; 499 500 enum { 501 LhsIsUpper = (LhsMode&(Upper|Lower))==Upper, 502 LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint, 503 RhsIsUpper = (RhsMode&(Upper|Lower))==Upper, 504 RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint 505 }; 506 507 template<typename Dest> 508 static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha) 509 { 510 eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols()); 511 512 typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs); 513 typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs); 514 515 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) 516 * RhsBlasTraits::extractScalarFactor(a_rhs); 517 518 typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, 519 Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType; 520 521 BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false); 522 523 internal::product_selfadjoint_matrix<Scalar, Index, 524 EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, 525 NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), 526 EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint, 527 NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)), 528 internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor, 529 Dest::InnerStrideAtCompileTime> 530 ::run( 531 lhs.rows(), rhs.cols(), // sizes 532 &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info 533 &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info 534 &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info 535 actualAlpha, blocking // alpha 536 ); 537 } 538 }; 539 540 } // end namespace internal 541 542 } // end namespace Eigen 543 544 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H 545