xref: /aosp_15_r20/external/eigen/Eigen/src/Core/AssignEvaluator.h (revision bf2c37156dfe67e5dfebd6d394bad8b2ab5804d4)
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2011 Benoit Jacob <[email protected]>
5 // Copyright (C) 2011-2014 Gael Guennebaud <[email protected]>
6 // Copyright (C) 2011-2012 Jitse Niesen <[email protected]>
7 //
8 // This Source Code Form is subject to the terms of the Mozilla
9 // Public License v. 2.0. If a copy of the MPL was not distributed
10 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 
12 #ifndef EIGEN_ASSIGN_EVALUATOR_H
13 #define EIGEN_ASSIGN_EVALUATOR_H
14 
15 namespace Eigen {
16 
17 // This implementation is based on Assign.h
18 
19 namespace internal {
20 
21 /***************************************************************************
22 * Part 1 : the logic deciding a strategy for traversal and unrolling       *
23 ***************************************************************************/
24 
25 // copy_using_evaluator_traits is based on assign_traits
26 
27 template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = -1>
28 struct copy_using_evaluator_traits
29 {
30   typedef typename DstEvaluator::XprType Dst;
31   typedef typename Dst::Scalar DstScalar;
32 
33   enum {
34     DstFlags = DstEvaluator::Flags,
35     SrcFlags = SrcEvaluator::Flags
36   };
37 
38 public:
39   enum {
40     DstAlignment = DstEvaluator::Alignment,
41     SrcAlignment = SrcEvaluator::Alignment,
42     DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
43     JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
44   };
45 
46 private:
47   enum {
48     InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
49               : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
50               : int(Dst::RowsAtCompileTime),
51     InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
52               : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
53               : int(Dst::MaxRowsAtCompileTime),
54     RestrictedInnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(InnerSize,MaxPacketSize),
55     RestrictedLinearSize = EIGEN_SIZE_MIN_PREFER_FIXED(Dst::SizeAtCompileTime,MaxPacketSize),
56     OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
57     MaxSizeAtCompileTime = Dst::SizeAtCompileTime
58   };
59 
60   // TODO distinguish between linear traversal and inner-traversals
61   typedef typename find_best_packet<DstScalar,RestrictedLinearSize>::type LinearPacketType;
62   typedef typename find_best_packet<DstScalar,RestrictedInnerSize>::type InnerPacketType;
63 
64   enum {
65     LinearPacketSize = unpacket_traits<LinearPacketType>::size,
66     InnerPacketSize = unpacket_traits<InnerPacketType>::size
67   };
68 
69 public:
70   enum {
71     LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
72     InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
73   };
74 
75 private:
76   enum {
77     DstIsRowMajor = DstFlags&RowMajorBit,
78     SrcIsRowMajor = SrcFlags&RowMajorBit,
79     StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
80     MightVectorize = bool(StorageOrdersAgree)
81                   && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
82                   && bool(functor_traits<AssignFunc>::PacketAccess),
83     MayInnerVectorize  = MightVectorize
84                        && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
85                        && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
86                        && (EIGEN_UNALIGNED_VECTORIZE  || int(JointAlignment)>=int(InnerRequiredAlignment)),
87     MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
88     MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
89                        && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
90       /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
91          so it's only good for large enough sizes. */
92     MaySliceVectorize  = bool(MightVectorize) && bool(DstHasDirectAccess)
93                        && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
94       /* slice vectorization can be slow, so we only want it if the slices are big, which is
95          indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
96          in a fixed-size matrix
97          However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
98   };
99 
100 public:
101   enum {
102     Traversal =  int(Dst::SizeAtCompileTime) == 0 ? int(AllAtOnceTraversal) // If compile-size is zero, traversing will fail at compile-time.
103               : (int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize)) ? int(LinearVectorizedTraversal)
104               : int(MayInnerVectorize)   ? int(InnerVectorizedTraversal)
105               : int(MayLinearVectorize)  ? int(LinearVectorizedTraversal)
106               : int(MaySliceVectorize)   ? int(SliceVectorizedTraversal)
107               : int(MayLinearize)        ? int(LinearTraversal)
108                                          : int(DefaultTraversal),
109     Vectorized = int(Traversal) == InnerVectorizedTraversal
110               || int(Traversal) == LinearVectorizedTraversal
111               || int(Traversal) == SliceVectorizedTraversal
112   };
113 
114   typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
115 
116 private:
117   enum {
118     ActualPacketSize    = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
119                         : Vectorized ? InnerPacketSize
120                         : 1,
121     UnrollingLimit      = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
122     MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
123                        && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
124     MayUnrollInner      = int(InnerSize) != Dynamic
125                        && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
126   };
127 
128 public:
129   enum {
130     Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
131                 ? (
132                     int(MayUnrollCompletely) ? int(CompleteUnrolling)
133                   : int(MayUnrollInner)      ? int(InnerUnrolling)
134                                              : int(NoUnrolling)
135                   )
136               : int(Traversal) == int(LinearVectorizedTraversal)
137                 ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
138                           ? int(CompleteUnrolling)
139                           : int(NoUnrolling) )
140               : int(Traversal) == int(LinearTraversal)
141                 ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
142                                               : int(NoUnrolling) )
143 #if EIGEN_UNALIGNED_VECTORIZE
144               : int(Traversal) == int(SliceVectorizedTraversal)
145                 ? ( bool(MayUnrollInner) ? int(InnerUnrolling)
146                                          : int(NoUnrolling) )
147 #endif
148               : int(NoUnrolling)
149   };
150 
151 #ifdef EIGEN_DEBUG_ASSIGN
debugcopy_using_evaluator_traits152   static void debug()
153   {
154     std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
155     std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
156     std::cerr.setf(std::ios::hex, std::ios::basefield);
157     std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
158     std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
159     std::cerr.unsetf(std::ios::hex);
160     EIGEN_DEBUG_VAR(DstAlignment)
161     EIGEN_DEBUG_VAR(SrcAlignment)
162     EIGEN_DEBUG_VAR(LinearRequiredAlignment)
163     EIGEN_DEBUG_VAR(InnerRequiredAlignment)
164     EIGEN_DEBUG_VAR(JointAlignment)
165     EIGEN_DEBUG_VAR(InnerSize)
166     EIGEN_DEBUG_VAR(InnerMaxSize)
167     EIGEN_DEBUG_VAR(LinearPacketSize)
168     EIGEN_DEBUG_VAR(InnerPacketSize)
169     EIGEN_DEBUG_VAR(ActualPacketSize)
170     EIGEN_DEBUG_VAR(StorageOrdersAgree)
171     EIGEN_DEBUG_VAR(MightVectorize)
172     EIGEN_DEBUG_VAR(MayLinearize)
173     EIGEN_DEBUG_VAR(MayInnerVectorize)
174     EIGEN_DEBUG_VAR(MayLinearVectorize)
175     EIGEN_DEBUG_VAR(MaySliceVectorize)
176     std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
177     EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
178     EIGEN_DEBUG_VAR(DstEvaluator::CoeffReadCost)
179     EIGEN_DEBUG_VAR(Dst::SizeAtCompileTime)
180     EIGEN_DEBUG_VAR(UnrollingLimit)
181     EIGEN_DEBUG_VAR(MayUnrollCompletely)
182     EIGEN_DEBUG_VAR(MayUnrollInner)
183     std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
184     std::cerr << std::endl;
185   }
186 #endif
187 };
188 
189 /***************************************************************************
190 * Part 2 : meta-unrollers
191 ***************************************************************************/
192 
193 /************************
194 *** Default traversal ***
195 ************************/
196 
197 template<typename Kernel, int Index, int Stop>
198 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
199 {
200   // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
201   typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
202   typedef typename DstEvaluatorType::XprType DstXprType;
203 
204   enum {
205     outer = Index / DstXprType::InnerSizeAtCompileTime,
206     inner = Index % DstXprType::InnerSizeAtCompileTime
207   };
208 
runcopy_using_evaluator_DefaultTraversal_CompleteUnrolling209   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
210   {
211     kernel.assignCoeffByOuterInner(outer, inner);
212     copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
213   }
214 };
215 
216 template<typename Kernel, int Stop>
217 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
218 {
219   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
220 };
221 
222 template<typename Kernel, int Index_, int Stop>
223 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
224 {
225   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
226   {
227     kernel.assignCoeffByOuterInner(outer, Index_);
228     copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
229   }
230 };
231 
232 template<typename Kernel, int Stop>
233 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
234 {
235   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
236 };
237 
238 /***********************
239 *** Linear traversal ***
240 ***********************/
241 
242 template<typename Kernel, int Index, int Stop>
243 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
244 {
245   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
246   {
247     kernel.assignCoeff(Index);
248     copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
249   }
250 };
251 
252 template<typename Kernel, int Stop>
253 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
254 {
255   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
256 };
257 
258 /**************************
259 *** Inner vectorization ***
260 **************************/
261 
262 template<typename Kernel, int Index, int Stop>
263 struct copy_using_evaluator_innervec_CompleteUnrolling
264 {
265   // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
266   typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
267   typedef typename DstEvaluatorType::XprType DstXprType;
268   typedef typename Kernel::PacketType PacketType;
269 
270   enum {
271     outer = Index / DstXprType::InnerSizeAtCompileTime,
272     inner = Index % DstXprType::InnerSizeAtCompileTime,
273     SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
274     DstAlignment = Kernel::AssignmentTraits::DstAlignment
275   };
276 
277   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
278   {
279     kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
280     enum { NextIndex = Index + unpacket_traits<PacketType>::size };
281     copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
282   }
283 };
284 
285 template<typename Kernel, int Stop>
286 struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
287 {
288   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
289 };
290 
291 template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
292 struct copy_using_evaluator_innervec_InnerUnrolling
293 {
294   typedef typename Kernel::PacketType PacketType;
295   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
296   {
297     kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
298     enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
299     copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
300   }
301 };
302 
303 template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
304 struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
305 {
306   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
307 };
308 
309 /***************************************************************************
310 * Part 3 : implementation of all cases
311 ***************************************************************************/
312 
313 // dense_assignment_loop is based on assign_impl
314 
315 template<typename Kernel,
316          int Traversal = Kernel::AssignmentTraits::Traversal,
317          int Unrolling = Kernel::AssignmentTraits::Unrolling>
318 struct dense_assignment_loop;
319 
320 /************************
321 ***** Special Cases *****
322 ************************/
323 
324 // Zero-sized assignment is a no-op.
325 template<typename Kernel, int Unrolling>
326 struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling>
327 {
328   EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel& /*kernel*/)
329   {
330     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
331     EIGEN_STATIC_ASSERT(int(DstXprType::SizeAtCompileTime) == 0,
332       EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
333   }
334 };
335 
336 /************************
337 *** Default traversal ***
338 ************************/
339 
340 template<typename Kernel>
341 struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
342 {
343   EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
344   {
345     for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
346       for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
347         kernel.assignCoeffByOuterInner(outer, inner);
348       }
349     }
350   }
351 };
352 
353 template<typename Kernel>
354 struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
355 {
356   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
357   {
358     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
359     copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
360   }
361 };
362 
363 template<typename Kernel>
364 struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
365 {
366   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
367   {
368     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
369 
370     const Index outerSize = kernel.outerSize();
371     for(Index outer = 0; outer < outerSize; ++outer)
372       copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
373   }
374 };
375 
376 /***************************
377 *** Linear vectorization ***
378 ***************************/
379 
380 
381 // The goal of unaligned_dense_assignment_loop is simply to factorize the handling
382 // of the non vectorizable beginning and ending parts
383 
384 template <bool IsAligned = false>
385 struct unaligned_dense_assignment_loop
386 {
387   // if IsAligned = true, then do nothing
388   template <typename Kernel>
389   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
390 };
391 
392 template <>
393 struct unaligned_dense_assignment_loop<false>
394 {
395   // MSVC must not inline this functions. If it does, it fails to optimize the
396   // packet access path.
397   // FIXME check which version exhibits this issue
398 #if EIGEN_COMP_MSVC
399   template <typename Kernel>
400   static EIGEN_DONT_INLINE void run(Kernel &kernel,
401                                     Index start,
402                                     Index end)
403 #else
404   template <typename Kernel>
405   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
406                                       Index start,
407                                       Index end)
408 #endif
409   {
410     for (Index index = start; index < end; ++index)
411       kernel.assignCoeff(index);
412   }
413 };
414 
415 template<typename Kernel>
416 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
417 {
418   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
419   {
420     const Index size = kernel.size();
421     typedef typename Kernel::Scalar Scalar;
422     typedef typename Kernel::PacketType PacketType;
423     enum {
424       requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
425       packetSize = unpacket_traits<PacketType>::size,
426       dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
427       dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
428                                                             : int(Kernel::AssignmentTraits::DstAlignment),
429       srcAlignment = Kernel::AssignmentTraits::JointAlignment
430     };
431     const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
432     const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
433 
434     unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
435 
436     for(Index index = alignedStart; index < alignedEnd; index += packetSize)
437       kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
438 
439     unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
440   }
441 };
442 
443 template<typename Kernel>
444 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
445 {
446   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
447   {
448     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
449     typedef typename Kernel::PacketType PacketType;
450 
451     enum { size = DstXprType::SizeAtCompileTime,
452            packetSize =unpacket_traits<PacketType>::size,
453            alignedSize = (int(size)/packetSize)*packetSize };
454 
455     copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
456     copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
457   }
458 };
459 
460 /**************************
461 *** Inner vectorization ***
462 **************************/
463 
464 template<typename Kernel>
465 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
466 {
467   typedef typename Kernel::PacketType PacketType;
468   enum {
469     SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
470     DstAlignment = Kernel::AssignmentTraits::DstAlignment
471   };
472   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
473   {
474     const Index innerSize = kernel.innerSize();
475     const Index outerSize = kernel.outerSize();
476     const Index packetSize = unpacket_traits<PacketType>::size;
477     for(Index outer = 0; outer < outerSize; ++outer)
478       for(Index inner = 0; inner < innerSize; inner+=packetSize)
479         kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
480   }
481 };
482 
483 template<typename Kernel>
484 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
485 {
486   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
487   {
488     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
489     copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
490   }
491 };
492 
493 template<typename Kernel>
494 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
495 {
496   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
497   {
498     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
499     typedef typename Kernel::AssignmentTraits Traits;
500     const Index outerSize = kernel.outerSize();
501     for(Index outer = 0; outer < outerSize; ++outer)
502       copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
503                                                    Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
504   }
505 };
506 
507 /***********************
508 *** Linear traversal ***
509 ***********************/
510 
511 template<typename Kernel>
512 struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
513 {
514   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
515   {
516     const Index size = kernel.size();
517     for(Index i = 0; i < size; ++i)
518       kernel.assignCoeff(i);
519   }
520 };
521 
522 template<typename Kernel>
523 struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
524 {
525   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
526   {
527     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
528     copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
529   }
530 };
531 
532 /**************************
533 *** Slice vectorization ***
534 ***************************/
535 
536 template<typename Kernel>
537 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
538 {
539   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
540   {
541     typedef typename Kernel::Scalar Scalar;
542     typedef typename Kernel::PacketType PacketType;
543     enum {
544       packetSize = unpacket_traits<PacketType>::size,
545       requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
546       alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
547       dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
548       dstAlignment = alignable ? int(requestedAlignment)
549                                : int(Kernel::AssignmentTraits::DstAlignment)
550     };
551     const Scalar *dst_ptr = kernel.dstDataPtr();
552     if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
553     {
554       // the pointer is not aligned-on scalar, so alignment is not possible
555       return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
556     }
557     const Index packetAlignedMask = packetSize - 1;
558     const Index innerSize = kernel.innerSize();
559     const Index outerSize = kernel.outerSize();
560     const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
561     Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
562 
563     for(Index outer = 0; outer < outerSize; ++outer)
564     {
565       const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
566       // do the non-vectorizable part of the assignment
567       for(Index inner = 0; inner<alignedStart ; ++inner)
568         kernel.assignCoeffByOuterInner(outer, inner);
569 
570       // do the vectorizable part of the assignment
571       for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
572         kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
573 
574       // do the non-vectorizable part of the assignment
575       for(Index inner = alignedEnd; inner<innerSize ; ++inner)
576         kernel.assignCoeffByOuterInner(outer, inner);
577 
578       alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
579     }
580   }
581 };
582 
583 #if EIGEN_UNALIGNED_VECTORIZE
584 template<typename Kernel>
585 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
586 {
587   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
588   {
589     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
590     typedef typename Kernel::PacketType PacketType;
591 
592     enum { innerSize = DstXprType::InnerSizeAtCompileTime,
593            packetSize =unpacket_traits<PacketType>::size,
594            vectorizableSize = (int(innerSize) / int(packetSize)) * int(packetSize),
595            size = DstXprType::SizeAtCompileTime };
596 
597     for(Index outer = 0; outer < kernel.outerSize(); ++outer)
598     {
599       copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
600       copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, innerSize>::run(kernel, outer);
601     }
602   }
603 };
604 #endif
605 
606 
607 /***************************************************************************
608 * Part 4 : Generic dense assignment kernel
609 ***************************************************************************/
610 
611 // This class generalize the assignment of a coefficient (or packet) from one dense evaluator
612 // to another dense writable evaluator.
613 // It is parametrized by the two evaluators, and the actual assignment functor.
614 // This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
615 // One can customize the assignment using this generic dense_assignment_kernel with different
616 // functors, or by completely overloading it, by-passing a functor.
617 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
618 class generic_dense_assignment_kernel
619 {
620 protected:
621   typedef typename DstEvaluatorTypeT::XprType DstXprType;
622   typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
623 public:
624 
625   typedef DstEvaluatorTypeT DstEvaluatorType;
626   typedef SrcEvaluatorTypeT SrcEvaluatorType;
627   typedef typename DstEvaluatorType::Scalar Scalar;
628   typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
629   typedef typename AssignmentTraits::PacketType PacketType;
630 
631 
632   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
633   generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
634     : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
635   {
636     #ifdef EIGEN_DEBUG_ASSIGN
637     AssignmentTraits::debug();
638     #endif
639   }
640 
641   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_dstExpr.size(); }
642   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index innerSize() const EIGEN_NOEXCEPT { return m_dstExpr.innerSize(); }
643   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerSize() const EIGEN_NOEXCEPT { return m_dstExpr.outerSize(); }
644   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_dstExpr.rows(); }
645   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_dstExpr.cols(); }
646   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerStride() const EIGEN_NOEXCEPT { return m_dstExpr.outerStride(); }
647 
648   EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() EIGEN_NOEXCEPT { return m_dst; }
649   EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const EIGEN_NOEXCEPT { return m_src; }
650 
651   /// Assign src(row,col) to dst(row,col) through the assignment functor.
652   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
653   {
654     m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
655   }
656 
657   /// \sa assignCoeff(Index,Index)
658   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
659   {
660     m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
661   }
662 
663   /// \sa assignCoeff(Index,Index)
664   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
665   {
666     Index row = rowIndexByOuterInner(outer, inner);
667     Index col = colIndexByOuterInner(outer, inner);
668     assignCoeff(row, col);
669   }
670 
671 
672   template<int StoreMode, int LoadMode, typename PacketType>
673   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
674   {
675     m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
676   }
677 
678   template<int StoreMode, int LoadMode, typename PacketType>
679   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
680   {
681     m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
682   }
683 
684   template<int StoreMode, int LoadMode, typename PacketType>
685   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
686   {
687     Index row = rowIndexByOuterInner(outer, inner);
688     Index col = colIndexByOuterInner(outer, inner);
689     assignPacket<StoreMode,LoadMode,PacketType>(row, col);
690   }
691 
692   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
693   {
694     typedef typename DstEvaluatorType::ExpressionTraits Traits;
695     return int(Traits::RowsAtCompileTime) == 1 ? 0
696       : int(Traits::ColsAtCompileTime) == 1 ? inner
697       : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
698       : inner;
699   }
700 
701   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
702   {
703     typedef typename DstEvaluatorType::ExpressionTraits Traits;
704     return int(Traits::ColsAtCompileTime) == 1 ? 0
705       : int(Traits::RowsAtCompileTime) == 1 ? inner
706       : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
707       : outer;
708   }
709 
710   EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
711   {
712     return m_dstExpr.data();
713   }
714 
715 protected:
716   DstEvaluatorType& m_dst;
717   const SrcEvaluatorType& m_src;
718   const Functor &m_functor;
719   // TODO find a way to avoid the needs of the original expression
720   DstXprType& m_dstExpr;
721 };
722 
723 // Special kernel used when computing small products whose operands have dynamic dimensions.  It ensures that the
724 // PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used
725 // when computing the product.
726 
727 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
728 class restricted_packet_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn>
729 {
730 protected:
731   typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> Base;
732  public:
733     typedef typename Base::Scalar Scalar;
734     typedef typename Base::DstXprType DstXprType;
735     typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4> AssignmentTraits;
736     typedef typename AssignmentTraits::PacketType PacketType;
737 
738     EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)
739     : Base(dst, src, func, dstExpr)
740   {
741   }
742  };
743 
744 /***************************************************************************
745 * Part 5 : Entry point for dense rectangular assignment
746 ***************************************************************************/
747 
748 template<typename DstXprType,typename SrcXprType, typename Functor>
749 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
750 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/)
751 {
752   EIGEN_ONLY_USED_FOR_DEBUG(dst);
753   EIGEN_ONLY_USED_FOR_DEBUG(src);
754   eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
755 }
756 
757 template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
758 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
759 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/)
760 {
761   Index dstRows = src.rows();
762   Index dstCols = src.cols();
763   if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
764     dst.resize(dstRows, dstCols);
765   eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
766 }
767 
768 template<typename DstXprType, typename SrcXprType, typename Functor>
769 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
770 {
771   typedef evaluator<DstXprType> DstEvaluatorType;
772   typedef evaluator<SrcXprType> SrcEvaluatorType;
773 
774   SrcEvaluatorType srcEvaluator(src);
775 
776   // NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
777   // we need to resize the destination after the source evaluator has been created.
778   resize_if_allowed(dst, src, func);
779 
780   DstEvaluatorType dstEvaluator(dst);
781 
782   typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
783   Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
784 
785   dense_assignment_loop<Kernel>::run(kernel);
786 }
787 
788 // Specialization for filling the destination with a constant value.
789 #ifndef EIGEN_GPU_COMPILE_PHASE
790 template<typename DstXprType>
791 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const Eigen::CwiseNullaryOp<Eigen::internal::scalar_constant_op<typename DstXprType::Scalar>, DstXprType>& src, const internal::assign_op<typename DstXprType::Scalar,typename DstXprType::Scalar>& func)
792 {
793   resize_if_allowed(dst, src, func);
794   std::fill_n(dst.data(), dst.size(), src.functor()());
795 }
796 #endif
797 
798 template<typename DstXprType, typename SrcXprType>
799 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
800 {
801   call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
802 }
803 
804 /***************************************************************************
805 * Part 6 : Generic assignment
806 ***************************************************************************/
807 
808 // Based on the respective shapes of the destination and source,
809 // the class AssignmentKind determine the kind of assignment mechanism.
810 // AssignmentKind must define a Kind typedef.
811 template<typename DstShape, typename SrcShape> struct AssignmentKind;
812 
813 // Assignment kind defined in this file:
814 struct Dense2Dense {};
815 struct EigenBase2EigenBase {};
816 
817 template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
818 template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
819 
820 // This is the main assignment class
821 template< typename DstXprType, typename SrcXprType, typename Functor,
822           typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
823           typename EnableIf = void>
824 struct Assignment;
825 
826 
827 // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
828 // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
829 // So this intermediate function removes everything related to "assume-aliasing" such that Assignment
830 // does not has to bother about these annoying details.
831 
832 template<typename Dst, typename Src>
833 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
834 void call_assignment(Dst& dst, const Src& src)
835 {
836   call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
837 }
838 template<typename Dst, typename Src>
839 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
840 void call_assignment(const Dst& dst, const Src& src)
841 {
842   call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
843 }
844 
845 // Deal with "assume-aliasing"
846 template<typename Dst, typename Src, typename Func>
847 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
848 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
849 {
850   typename plain_matrix_type<Src>::type tmp(src);
851   call_assignment_no_alias(dst, tmp, func);
852 }
853 
854 template<typename Dst, typename Src, typename Func>
855 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
856 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
857 {
858   call_assignment_no_alias(dst, src, func);
859 }
860 
861 // by-pass "assume-aliasing"
862 // When there is no aliasing, we require that 'dst' has been properly resized
863 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
864 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
865 void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
866 {
867   call_assignment_no_alias(dst.expression(), src, func);
868 }
869 
870 
871 template<typename Dst, typename Src, typename Func>
872 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
873 void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
874 {
875   enum {
876     NeedToTranspose = (    (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
877                         || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
878                       ) && int(Dst::SizeAtCompileTime) != 1
879   };
880 
881   typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
882   typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
883   ActualDstType actualDst(dst);
884 
885   // TODO check whether this is the right place to perform these checks:
886   EIGEN_STATIC_ASSERT_LVALUE(Dst)
887   EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
888   EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
889 
890   Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
891 }
892 
893 template<typename Dst, typename Src, typename Func>
894 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
895 void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
896 {
897     typedef evaluator<Dst> DstEvaluatorType;
898     typedef evaluator<Src> SrcEvaluatorType;
899     typedef restricted_packet_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Func> Kernel;
900 
901     EIGEN_STATIC_ASSERT_LVALUE(Dst)
902     EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
903 
904     SrcEvaluatorType srcEvaluator(src);
905     resize_if_allowed(dst, src, func);
906 
907     DstEvaluatorType dstEvaluator(dst);
908     Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
909 
910     dense_assignment_loop<Kernel>::run(kernel);
911 }
912 
913 template<typename Dst, typename Src>
914 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
915 void call_assignment_no_alias(Dst& dst, const Src& src)
916 {
917   call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
918 }
919 
920 template<typename Dst, typename Src, typename Func>
921 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
922 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
923 {
924   // TODO check whether this is the right place to perform these checks:
925   EIGEN_STATIC_ASSERT_LVALUE(Dst)
926   EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
927   EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
928 
929   Assignment<Dst,Src,Func>::run(dst, src, func);
930 }
931 template<typename Dst, typename Src>
932 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
933 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
934 {
935   call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
936 }
937 
938 // forward declaration
939 template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
940 
941 // Generic Dense to Dense assignment
942 // Note that the last template argument "Weak" is needed to make it possible to perform
943 // both partial specialization+SFINAE without ambiguous specialization
944 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
945 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
946 {
947   EIGEN_DEVICE_FUNC
948   static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
949   {
950 #ifndef EIGEN_NO_DEBUG
951     internal::check_for_aliasing(dst, src);
952 #endif
953 
954     call_dense_assignment_loop(dst, src, func);
955   }
956 };
957 
958 // Generic assignment through evalTo.
959 // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
960 // Note that the last template argument "Weak" is needed to make it possible to perform
961 // both partial specialization+SFINAE without ambiguous specialization
962 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
963 struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
964 {
965   EIGEN_DEVICE_FUNC
966   static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
967   {
968     Index dstRows = src.rows();
969     Index dstCols = src.cols();
970     if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
971       dst.resize(dstRows, dstCols);
972 
973     eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
974     src.evalTo(dst);
975   }
976 
977   // NOTE The following two functions are templated to avoid their instantiation if not needed
978   //      This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
979   template<typename SrcScalarType>
980   EIGEN_DEVICE_FUNC
981   static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
982   {
983     Index dstRows = src.rows();
984     Index dstCols = src.cols();
985     if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
986       dst.resize(dstRows, dstCols);
987 
988     eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
989     src.addTo(dst);
990   }
991 
992   template<typename SrcScalarType>
993   EIGEN_DEVICE_FUNC
994   static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
995   {
996     Index dstRows = src.rows();
997     Index dstCols = src.cols();
998     if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
999       dst.resize(dstRows, dstCols);
1000 
1001     eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
1002     src.subTo(dst);
1003   }
1004 };
1005 
1006 } // namespace internal
1007 
1008 } // end namespace Eigen
1009 
1010 #endif // EIGEN_ASSIGN_EVALUATOR_H
1011