1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2011 Benoit Jacob <[email protected]> 5 // Copyright (C) 2011-2014 Gael Guennebaud <[email protected]> 6 // Copyright (C) 2011-2012 Jitse Niesen <[email protected]> 7 // 8 // This Source Code Form is subject to the terms of the Mozilla 9 // Public License v. 2.0. If a copy of the MPL was not distributed 10 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 11 12 #ifndef EIGEN_ASSIGN_EVALUATOR_H 13 #define EIGEN_ASSIGN_EVALUATOR_H 14 15 namespace Eigen { 16 17 // This implementation is based on Assign.h 18 19 namespace internal { 20 21 /*************************************************************************** 22 * Part 1 : the logic deciding a strategy for traversal and unrolling * 23 ***************************************************************************/ 24 25 // copy_using_evaluator_traits is based on assign_traits 26 27 template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = -1> 28 struct copy_using_evaluator_traits 29 { 30 typedef typename DstEvaluator::XprType Dst; 31 typedef typename Dst::Scalar DstScalar; 32 33 enum { 34 DstFlags = DstEvaluator::Flags, 35 SrcFlags = SrcEvaluator::Flags 36 }; 37 38 public: 39 enum { 40 DstAlignment = DstEvaluator::Alignment, 41 SrcAlignment = SrcEvaluator::Alignment, 42 DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit, 43 JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) 44 }; 45 46 private: 47 enum { 48 InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) 49 : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime) 50 : int(Dst::RowsAtCompileTime), 51 InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) 52 : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) 53 : int(Dst::MaxRowsAtCompileTime), 54 RestrictedInnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(InnerSize,MaxPacketSize), 55 RestrictedLinearSize = EIGEN_SIZE_MIN_PREFER_FIXED(Dst::SizeAtCompileTime,MaxPacketSize), 56 OuterStride = int(outer_stride_at_compile_time<Dst>::ret), 57 MaxSizeAtCompileTime = Dst::SizeAtCompileTime 58 }; 59 60 // TODO distinguish between linear traversal and inner-traversals 61 typedef typename find_best_packet<DstScalar,RestrictedLinearSize>::type LinearPacketType; 62 typedef typename find_best_packet<DstScalar,RestrictedInnerSize>::type InnerPacketType; 63 64 enum { 65 LinearPacketSize = unpacket_traits<LinearPacketType>::size, 66 InnerPacketSize = unpacket_traits<InnerPacketType>::size 67 }; 68 69 public: 70 enum { 71 LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment, 72 InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment 73 }; 74 75 private: 76 enum { 77 DstIsRowMajor = DstFlags&RowMajorBit, 78 SrcIsRowMajor = SrcFlags&RowMajorBit, 79 StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), 80 MightVectorize = bool(StorageOrdersAgree) 81 && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) 82 && bool(functor_traits<AssignFunc>::PacketAccess), 83 MayInnerVectorize = MightVectorize 84 && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0 85 && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0 86 && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)), 87 MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), 88 MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess) 89 && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), 90 /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, 91 so it's only good for large enough sizes. */ 92 MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess) 93 && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize))) 94 /* slice vectorization can be slow, so we only want it if the slices are big, which is 95 indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block 96 in a fixed-size matrix 97 However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */ 98 }; 99 100 public: 101 enum { 102 Traversal = int(Dst::SizeAtCompileTime) == 0 ? int(AllAtOnceTraversal) // If compile-size is zero, traversing will fail at compile-time. 103 : (int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize)) ? int(LinearVectorizedTraversal) 104 : int(MayInnerVectorize) ? int(InnerVectorizedTraversal) 105 : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) 106 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) 107 : int(MayLinearize) ? int(LinearTraversal) 108 : int(DefaultTraversal), 109 Vectorized = int(Traversal) == InnerVectorizedTraversal 110 || int(Traversal) == LinearVectorizedTraversal 111 || int(Traversal) == SliceVectorizedTraversal 112 }; 113 114 typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType; 115 116 private: 117 enum { 118 ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize 119 : Vectorized ? InnerPacketSize 120 : 1, 121 UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize, 122 MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic 123 && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit), 124 MayUnrollInner = int(InnerSize) != Dynamic 125 && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit) 126 }; 127 128 public: 129 enum { 130 Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) 131 ? ( 132 int(MayUnrollCompletely) ? int(CompleteUnrolling) 133 : int(MayUnrollInner) ? int(InnerUnrolling) 134 : int(NoUnrolling) 135 ) 136 : int(Traversal) == int(LinearVectorizedTraversal) 137 ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment))) 138 ? int(CompleteUnrolling) 139 : int(NoUnrolling) ) 140 : int(Traversal) == int(LinearTraversal) 141 ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) 142 : int(NoUnrolling) ) 143 #if EIGEN_UNALIGNED_VECTORIZE 144 : int(Traversal) == int(SliceVectorizedTraversal) 145 ? ( bool(MayUnrollInner) ? int(InnerUnrolling) 146 : int(NoUnrolling) ) 147 #endif 148 : int(NoUnrolling) 149 }; 150 151 #ifdef EIGEN_DEBUG_ASSIGN debugcopy_using_evaluator_traits152 static void debug() 153 { 154 std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; 155 std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; 156 std::cerr.setf(std::ios::hex, std::ios::basefield); 157 std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl; 158 std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl; 159 std::cerr.unsetf(std::ios::hex); 160 EIGEN_DEBUG_VAR(DstAlignment) 161 EIGEN_DEBUG_VAR(SrcAlignment) 162 EIGEN_DEBUG_VAR(LinearRequiredAlignment) 163 EIGEN_DEBUG_VAR(InnerRequiredAlignment) 164 EIGEN_DEBUG_VAR(JointAlignment) 165 EIGEN_DEBUG_VAR(InnerSize) 166 EIGEN_DEBUG_VAR(InnerMaxSize) 167 EIGEN_DEBUG_VAR(LinearPacketSize) 168 EIGEN_DEBUG_VAR(InnerPacketSize) 169 EIGEN_DEBUG_VAR(ActualPacketSize) 170 EIGEN_DEBUG_VAR(StorageOrdersAgree) 171 EIGEN_DEBUG_VAR(MightVectorize) 172 EIGEN_DEBUG_VAR(MayLinearize) 173 EIGEN_DEBUG_VAR(MayInnerVectorize) 174 EIGEN_DEBUG_VAR(MayLinearVectorize) 175 EIGEN_DEBUG_VAR(MaySliceVectorize) 176 std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; 177 EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost) 178 EIGEN_DEBUG_VAR(DstEvaluator::CoeffReadCost) 179 EIGEN_DEBUG_VAR(Dst::SizeAtCompileTime) 180 EIGEN_DEBUG_VAR(UnrollingLimit) 181 EIGEN_DEBUG_VAR(MayUnrollCompletely) 182 EIGEN_DEBUG_VAR(MayUnrollInner) 183 std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; 184 std::cerr << std::endl; 185 } 186 #endif 187 }; 188 189 /*************************************************************************** 190 * Part 2 : meta-unrollers 191 ***************************************************************************/ 192 193 /************************ 194 *** Default traversal *** 195 ************************/ 196 197 template<typename Kernel, int Index, int Stop> 198 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling 199 { 200 // FIXME: this is not very clean, perhaps this information should be provided by the kernel? 201 typedef typename Kernel::DstEvaluatorType DstEvaluatorType; 202 typedef typename DstEvaluatorType::XprType DstXprType; 203 204 enum { 205 outer = Index / DstXprType::InnerSizeAtCompileTime, 206 inner = Index % DstXprType::InnerSizeAtCompileTime 207 }; 208 runcopy_using_evaluator_DefaultTraversal_CompleteUnrolling209 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 210 { 211 kernel.assignCoeffByOuterInner(outer, inner); 212 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel); 213 } 214 }; 215 216 template<typename Kernel, int Stop> 217 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop> 218 { 219 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } 220 }; 221 222 template<typename Kernel, int Index_, int Stop> 223 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling 224 { 225 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) 226 { 227 kernel.assignCoeffByOuterInner(outer, Index_); 228 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer); 229 } 230 }; 231 232 template<typename Kernel, int Stop> 233 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop> 234 { 235 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { } 236 }; 237 238 /*********************** 239 *** Linear traversal *** 240 ***********************/ 241 242 template<typename Kernel, int Index, int Stop> 243 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling 244 { 245 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) 246 { 247 kernel.assignCoeff(Index); 248 copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel); 249 } 250 }; 251 252 template<typename Kernel, int Stop> 253 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop> 254 { 255 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } 256 }; 257 258 /************************** 259 *** Inner vectorization *** 260 **************************/ 261 262 template<typename Kernel, int Index, int Stop> 263 struct copy_using_evaluator_innervec_CompleteUnrolling 264 { 265 // FIXME: this is not very clean, perhaps this information should be provided by the kernel? 266 typedef typename Kernel::DstEvaluatorType DstEvaluatorType; 267 typedef typename DstEvaluatorType::XprType DstXprType; 268 typedef typename Kernel::PacketType PacketType; 269 270 enum { 271 outer = Index / DstXprType::InnerSizeAtCompileTime, 272 inner = Index % DstXprType::InnerSizeAtCompileTime, 273 SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, 274 DstAlignment = Kernel::AssignmentTraits::DstAlignment 275 }; 276 277 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 278 { 279 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner); 280 enum { NextIndex = Index + unpacket_traits<PacketType>::size }; 281 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel); 282 } 283 }; 284 285 template<typename Kernel, int Stop> 286 struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop> 287 { 288 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } 289 }; 290 291 template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment> 292 struct copy_using_evaluator_innervec_InnerUnrolling 293 { 294 typedef typename Kernel::PacketType PacketType; 295 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) 296 { 297 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_); 298 enum { NextIndex = Index_ + unpacket_traits<PacketType>::size }; 299 copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer); 300 } 301 }; 302 303 template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment> 304 struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment> 305 { 306 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { } 307 }; 308 309 /*************************************************************************** 310 * Part 3 : implementation of all cases 311 ***************************************************************************/ 312 313 // dense_assignment_loop is based on assign_impl 314 315 template<typename Kernel, 316 int Traversal = Kernel::AssignmentTraits::Traversal, 317 int Unrolling = Kernel::AssignmentTraits::Unrolling> 318 struct dense_assignment_loop; 319 320 /************************ 321 ***** Special Cases ***** 322 ************************/ 323 324 // Zero-sized assignment is a no-op. 325 template<typename Kernel, int Unrolling> 326 struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling> 327 { 328 EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel& /*kernel*/) 329 { 330 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 331 EIGEN_STATIC_ASSERT(int(DstXprType::SizeAtCompileTime) == 0, 332 EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT) 333 } 334 }; 335 336 /************************ 337 *** Default traversal *** 338 ************************/ 339 340 template<typename Kernel> 341 struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling> 342 { 343 EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel) 344 { 345 for(Index outer = 0; outer < kernel.outerSize(); ++outer) { 346 for(Index inner = 0; inner < kernel.innerSize(); ++inner) { 347 kernel.assignCoeffByOuterInner(outer, inner); 348 } 349 } 350 } 351 }; 352 353 template<typename Kernel> 354 struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling> 355 { 356 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 357 { 358 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 359 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); 360 } 361 }; 362 363 template<typename Kernel> 364 struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling> 365 { 366 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 367 { 368 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 369 370 const Index outerSize = kernel.outerSize(); 371 for(Index outer = 0; outer < outerSize; ++outer) 372 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer); 373 } 374 }; 375 376 /*************************** 377 *** Linear vectorization *** 378 ***************************/ 379 380 381 // The goal of unaligned_dense_assignment_loop is simply to factorize the handling 382 // of the non vectorizable beginning and ending parts 383 384 template <bool IsAligned = false> 385 struct unaligned_dense_assignment_loop 386 { 387 // if IsAligned = true, then do nothing 388 template <typename Kernel> 389 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {} 390 }; 391 392 template <> 393 struct unaligned_dense_assignment_loop<false> 394 { 395 // MSVC must not inline this functions. If it does, it fails to optimize the 396 // packet access path. 397 // FIXME check which version exhibits this issue 398 #if EIGEN_COMP_MSVC 399 template <typename Kernel> 400 static EIGEN_DONT_INLINE void run(Kernel &kernel, 401 Index start, 402 Index end) 403 #else 404 template <typename Kernel> 405 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, 406 Index start, 407 Index end) 408 #endif 409 { 410 for (Index index = start; index < end; ++index) 411 kernel.assignCoeff(index); 412 } 413 }; 414 415 template<typename Kernel> 416 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling> 417 { 418 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 419 { 420 const Index size = kernel.size(); 421 typedef typename Kernel::Scalar Scalar; 422 typedef typename Kernel::PacketType PacketType; 423 enum { 424 requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment, 425 packetSize = unpacket_traits<PacketType>::size, 426 dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), 427 dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment) 428 : int(Kernel::AssignmentTraits::DstAlignment), 429 srcAlignment = Kernel::AssignmentTraits::JointAlignment 430 }; 431 const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size); 432 const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; 433 434 unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart); 435 436 for(Index index = alignedStart; index < alignedEnd; index += packetSize) 437 kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index); 438 439 unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size); 440 } 441 }; 442 443 template<typename Kernel> 444 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling> 445 { 446 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 447 { 448 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 449 typedef typename Kernel::PacketType PacketType; 450 451 enum { size = DstXprType::SizeAtCompileTime, 452 packetSize =unpacket_traits<PacketType>::size, 453 alignedSize = (int(size)/packetSize)*packetSize }; 454 455 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel); 456 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel); 457 } 458 }; 459 460 /************************** 461 *** Inner vectorization *** 462 **************************/ 463 464 template<typename Kernel> 465 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> 466 { 467 typedef typename Kernel::PacketType PacketType; 468 enum { 469 SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, 470 DstAlignment = Kernel::AssignmentTraits::DstAlignment 471 }; 472 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 473 { 474 const Index innerSize = kernel.innerSize(); 475 const Index outerSize = kernel.outerSize(); 476 const Index packetSize = unpacket_traits<PacketType>::size; 477 for(Index outer = 0; outer < outerSize; ++outer) 478 for(Index inner = 0; inner < innerSize; inner+=packetSize) 479 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner); 480 } 481 }; 482 483 template<typename Kernel> 484 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling> 485 { 486 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 487 { 488 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 489 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); 490 } 491 }; 492 493 template<typename Kernel> 494 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling> 495 { 496 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 497 { 498 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 499 typedef typename Kernel::AssignmentTraits Traits; 500 const Index outerSize = kernel.outerSize(); 501 for(Index outer = 0; outer < outerSize; ++outer) 502 copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime, 503 Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer); 504 } 505 }; 506 507 /*********************** 508 *** Linear traversal *** 509 ***********************/ 510 511 template<typename Kernel> 512 struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling> 513 { 514 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 515 { 516 const Index size = kernel.size(); 517 for(Index i = 0; i < size; ++i) 518 kernel.assignCoeff(i); 519 } 520 }; 521 522 template<typename Kernel> 523 struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling> 524 { 525 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 526 { 527 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 528 copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); 529 } 530 }; 531 532 /************************** 533 *** Slice vectorization *** 534 ***************************/ 535 536 template<typename Kernel> 537 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling> 538 { 539 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 540 { 541 typedef typename Kernel::Scalar Scalar; 542 typedef typename Kernel::PacketType PacketType; 543 enum { 544 packetSize = unpacket_traits<PacketType>::size, 545 requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment), 546 alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar), 547 dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), 548 dstAlignment = alignable ? int(requestedAlignment) 549 : int(Kernel::AssignmentTraits::DstAlignment) 550 }; 551 const Scalar *dst_ptr = kernel.dstDataPtr(); 552 if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0) 553 { 554 // the pointer is not aligned-on scalar, so alignment is not possible 555 return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel); 556 } 557 const Index packetAlignedMask = packetSize - 1; 558 const Index innerSize = kernel.innerSize(); 559 const Index outerSize = kernel.outerSize(); 560 const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0; 561 Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize); 562 563 for(Index outer = 0; outer < outerSize; ++outer) 564 { 565 const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); 566 // do the non-vectorizable part of the assignment 567 for(Index inner = 0; inner<alignedStart ; ++inner) 568 kernel.assignCoeffByOuterInner(outer, inner); 569 570 // do the vectorizable part of the assignment 571 for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) 572 kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner); 573 574 // do the non-vectorizable part of the assignment 575 for(Index inner = alignedEnd; inner<innerSize ; ++inner) 576 kernel.assignCoeffByOuterInner(outer, inner); 577 578 alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize); 579 } 580 } 581 }; 582 583 #if EIGEN_UNALIGNED_VECTORIZE 584 template<typename Kernel> 585 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling> 586 { 587 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 588 { 589 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 590 typedef typename Kernel::PacketType PacketType; 591 592 enum { innerSize = DstXprType::InnerSizeAtCompileTime, 593 packetSize =unpacket_traits<PacketType>::size, 594 vectorizableSize = (int(innerSize) / int(packetSize)) * int(packetSize), 595 size = DstXprType::SizeAtCompileTime }; 596 597 for(Index outer = 0; outer < kernel.outerSize(); ++outer) 598 { 599 copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer); 600 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, innerSize>::run(kernel, outer); 601 } 602 } 603 }; 604 #endif 605 606 607 /*************************************************************************** 608 * Part 4 : Generic dense assignment kernel 609 ***************************************************************************/ 610 611 // This class generalize the assignment of a coefficient (or packet) from one dense evaluator 612 // to another dense writable evaluator. 613 // It is parametrized by the two evaluators, and the actual assignment functor. 614 // This abstraction level permits to keep the evaluation loops as simple and as generic as possible. 615 // One can customize the assignment using this generic dense_assignment_kernel with different 616 // functors, or by completely overloading it, by-passing a functor. 617 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized> 618 class generic_dense_assignment_kernel 619 { 620 protected: 621 typedef typename DstEvaluatorTypeT::XprType DstXprType; 622 typedef typename SrcEvaluatorTypeT::XprType SrcXprType; 623 public: 624 625 typedef DstEvaluatorTypeT DstEvaluatorType; 626 typedef SrcEvaluatorTypeT SrcEvaluatorType; 627 typedef typename DstEvaluatorType::Scalar Scalar; 628 typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits; 629 typedef typename AssignmentTraits::PacketType PacketType; 630 631 632 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 633 generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) 634 : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) 635 { 636 #ifdef EIGEN_DEBUG_ASSIGN 637 AssignmentTraits::debug(); 638 #endif 639 } 640 641 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_dstExpr.size(); } 642 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index innerSize() const EIGEN_NOEXCEPT { return m_dstExpr.innerSize(); } 643 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerSize() const EIGEN_NOEXCEPT { return m_dstExpr.outerSize(); } 644 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_dstExpr.rows(); } 645 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_dstExpr.cols(); } 646 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerStride() const EIGEN_NOEXCEPT { return m_dstExpr.outerStride(); } 647 648 EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() EIGEN_NOEXCEPT { return m_dst; } 649 EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const EIGEN_NOEXCEPT { return m_src; } 650 651 /// Assign src(row,col) to dst(row,col) through the assignment functor. 652 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) 653 { 654 m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); 655 } 656 657 /// \sa assignCoeff(Index,Index) 658 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) 659 { 660 m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); 661 } 662 663 /// \sa assignCoeff(Index,Index) 664 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) 665 { 666 Index row = rowIndexByOuterInner(outer, inner); 667 Index col = colIndexByOuterInner(outer, inner); 668 assignCoeff(row, col); 669 } 670 671 672 template<int StoreMode, int LoadMode, typename PacketType> 673 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) 674 { 675 m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col)); 676 } 677 678 template<int StoreMode, int LoadMode, typename PacketType> 679 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) 680 { 681 m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index)); 682 } 683 684 template<int StoreMode, int LoadMode, typename PacketType> 685 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) 686 { 687 Index row = rowIndexByOuterInner(outer, inner); 688 Index col = colIndexByOuterInner(outer, inner); 689 assignPacket<StoreMode,LoadMode,PacketType>(row, col); 690 } 691 692 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) 693 { 694 typedef typename DstEvaluatorType::ExpressionTraits Traits; 695 return int(Traits::RowsAtCompileTime) == 1 ? 0 696 : int(Traits::ColsAtCompileTime) == 1 ? inner 697 : int(DstEvaluatorType::Flags)&RowMajorBit ? outer 698 : inner; 699 } 700 701 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) 702 { 703 typedef typename DstEvaluatorType::ExpressionTraits Traits; 704 return int(Traits::ColsAtCompileTime) == 1 ? 0 705 : int(Traits::RowsAtCompileTime) == 1 ? inner 706 : int(DstEvaluatorType::Flags)&RowMajorBit ? inner 707 : outer; 708 } 709 710 EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const 711 { 712 return m_dstExpr.data(); 713 } 714 715 protected: 716 DstEvaluatorType& m_dst; 717 const SrcEvaluatorType& m_src; 718 const Functor &m_functor; 719 // TODO find a way to avoid the needs of the original expression 720 DstXprType& m_dstExpr; 721 }; 722 723 // Special kernel used when computing small products whose operands have dynamic dimensions. It ensures that the 724 // PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used 725 // when computing the product. 726 727 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor> 728 class restricted_packet_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> 729 { 730 protected: 731 typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> Base; 732 public: 733 typedef typename Base::Scalar Scalar; 734 typedef typename Base::DstXprType DstXprType; 735 typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4> AssignmentTraits; 736 typedef typename AssignmentTraits::PacketType PacketType; 737 738 EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr) 739 : Base(dst, src, func, dstExpr) 740 { 741 } 742 }; 743 744 /*************************************************************************** 745 * Part 5 : Entry point for dense rectangular assignment 746 ***************************************************************************/ 747 748 template<typename DstXprType,typename SrcXprType, typename Functor> 749 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 750 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/) 751 { 752 EIGEN_ONLY_USED_FOR_DEBUG(dst); 753 EIGEN_ONLY_USED_FOR_DEBUG(src); 754 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); 755 } 756 757 template<typename DstXprType,typename SrcXprType, typename T1, typename T2> 758 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 759 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/) 760 { 761 Index dstRows = src.rows(); 762 Index dstCols = src.cols(); 763 if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols))) 764 dst.resize(dstRows, dstCols); 765 eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols); 766 } 767 768 template<typename DstXprType, typename SrcXprType, typename Functor> 769 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func) 770 { 771 typedef evaluator<DstXprType> DstEvaluatorType; 772 typedef evaluator<SrcXprType> SrcEvaluatorType; 773 774 SrcEvaluatorType srcEvaluator(src); 775 776 // NOTE To properly handle A = (A*A.transpose())/s with A rectangular, 777 // we need to resize the destination after the source evaluator has been created. 778 resize_if_allowed(dst, src, func); 779 780 DstEvaluatorType dstEvaluator(dst); 781 782 typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel; 783 Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); 784 785 dense_assignment_loop<Kernel>::run(kernel); 786 } 787 788 // Specialization for filling the destination with a constant value. 789 #ifndef EIGEN_GPU_COMPILE_PHASE 790 template<typename DstXprType> 791 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const Eigen::CwiseNullaryOp<Eigen::internal::scalar_constant_op<typename DstXprType::Scalar>, DstXprType>& src, const internal::assign_op<typename DstXprType::Scalar,typename DstXprType::Scalar>& func) 792 { 793 resize_if_allowed(dst, src, func); 794 std::fill_n(dst.data(), dst.size(), src.functor()()); 795 } 796 #endif 797 798 template<typename DstXprType, typename SrcXprType> 799 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) 800 { 801 call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>()); 802 } 803 804 /*************************************************************************** 805 * Part 6 : Generic assignment 806 ***************************************************************************/ 807 808 // Based on the respective shapes of the destination and source, 809 // the class AssignmentKind determine the kind of assignment mechanism. 810 // AssignmentKind must define a Kind typedef. 811 template<typename DstShape, typename SrcShape> struct AssignmentKind; 812 813 // Assignment kind defined in this file: 814 struct Dense2Dense {}; 815 struct EigenBase2EigenBase {}; 816 817 template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; 818 template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; }; 819 820 // This is the main assignment class 821 template< typename DstXprType, typename SrcXprType, typename Functor, 822 typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind, 823 typename EnableIf = void> 824 struct Assignment; 825 826 827 // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition. 828 // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated. 829 // So this intermediate function removes everything related to "assume-aliasing" such that Assignment 830 // does not has to bother about these annoying details. 831 832 template<typename Dst, typename Src> 833 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 834 void call_assignment(Dst& dst, const Src& src) 835 { 836 call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>()); 837 } 838 template<typename Dst, typename Src> 839 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 840 void call_assignment(const Dst& dst, const Src& src) 841 { 842 call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>()); 843 } 844 845 // Deal with "assume-aliasing" 846 template<typename Dst, typename Src, typename Func> 847 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 848 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0) 849 { 850 typename plain_matrix_type<Src>::type tmp(src); 851 call_assignment_no_alias(dst, tmp, func); 852 } 853 854 template<typename Dst, typename Src, typename Func> 855 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 856 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0) 857 { 858 call_assignment_no_alias(dst, src, func); 859 } 860 861 // by-pass "assume-aliasing" 862 // When there is no aliasing, we require that 'dst' has been properly resized 863 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func> 864 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 865 void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func) 866 { 867 call_assignment_no_alias(dst.expression(), src, func); 868 } 869 870 871 template<typename Dst, typename Src, typename Func> 872 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 873 void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) 874 { 875 enum { 876 NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) 877 || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1) 878 ) && int(Dst::SizeAtCompileTime) != 1 879 }; 880 881 typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned; 882 typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType; 883 ActualDstType actualDst(dst); 884 885 // TODO check whether this is the right place to perform these checks: 886 EIGEN_STATIC_ASSERT_LVALUE(Dst) 887 EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src) 888 EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); 889 890 Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func); 891 } 892 893 template<typename Dst, typename Src, typename Func> 894 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 895 void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src, const Func& func) 896 { 897 typedef evaluator<Dst> DstEvaluatorType; 898 typedef evaluator<Src> SrcEvaluatorType; 899 typedef restricted_packet_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Func> Kernel; 900 901 EIGEN_STATIC_ASSERT_LVALUE(Dst) 902 EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar); 903 904 SrcEvaluatorType srcEvaluator(src); 905 resize_if_allowed(dst, src, func); 906 907 DstEvaluatorType dstEvaluator(dst); 908 Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); 909 910 dense_assignment_loop<Kernel>::run(kernel); 911 } 912 913 template<typename Dst, typename Src> 914 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 915 void call_assignment_no_alias(Dst& dst, const Src& src) 916 { 917 call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>()); 918 } 919 920 template<typename Dst, typename Src, typename Func> 921 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 922 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func) 923 { 924 // TODO check whether this is the right place to perform these checks: 925 EIGEN_STATIC_ASSERT_LVALUE(Dst) 926 EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src) 927 EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar); 928 929 Assignment<Dst,Src,Func>::run(dst, src, func); 930 } 931 template<typename Dst, typename Src> 932 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 933 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) 934 { 935 call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>()); 936 } 937 938 // forward declaration 939 template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src); 940 941 // Generic Dense to Dense assignment 942 // Note that the last template argument "Weak" is needed to make it possible to perform 943 // both partial specialization+SFINAE without ambiguous specialization 944 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> 945 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak> 946 { 947 EIGEN_DEVICE_FUNC 948 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func) 949 { 950 #ifndef EIGEN_NO_DEBUG 951 internal::check_for_aliasing(dst, src); 952 #endif 953 954 call_dense_assignment_loop(dst, src, func); 955 } 956 }; 957 958 // Generic assignment through evalTo. 959 // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. 960 // Note that the last template argument "Weak" is needed to make it possible to perform 961 // both partial specialization+SFINAE without ambiguous specialization 962 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> 963 struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak> 964 { 965 EIGEN_DEVICE_FUNC 966 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/) 967 { 968 Index dstRows = src.rows(); 969 Index dstCols = src.cols(); 970 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) 971 dst.resize(dstRows, dstCols); 972 973 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); 974 src.evalTo(dst); 975 } 976 977 // NOTE The following two functions are templated to avoid their instantiation if not needed 978 // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type. 979 template<typename SrcScalarType> 980 EIGEN_DEVICE_FUNC 981 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/) 982 { 983 Index dstRows = src.rows(); 984 Index dstCols = src.cols(); 985 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) 986 dst.resize(dstRows, dstCols); 987 988 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); 989 src.addTo(dst); 990 } 991 992 template<typename SrcScalarType> 993 EIGEN_DEVICE_FUNC 994 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/) 995 { 996 Index dstRows = src.rows(); 997 Index dstCols = src.cols(); 998 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) 999 dst.resize(dstRows, dstCols); 1000 1001 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); 1002 src.subTo(dst); 1003 } 1004 }; 1005 1006 } // namespace internal 1007 1008 } // end namespace Eigen 1009 1010 #endif // EIGEN_ASSIGN_EVALUATOR_H 1011