1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2008-2015 Gael Guennebaud <[email protected]> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H 11 #define EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H 12 13 namespace Eigen { 14 15 namespace internal { 16 17 template<typename Lhs, typename Rhs, typename ResultType> 18 static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false) 19 { 20 typedef typename remove_all<Lhs>::type::Scalar LhsScalar; 21 typedef typename remove_all<Rhs>::type::Scalar RhsScalar; 22 typedef typename remove_all<ResultType>::type::Scalar ResScalar; 23 24 // make sure to call innerSize/outerSize since we fake the storage order. 25 Index rows = lhs.innerSize(); 26 Index cols = rhs.outerSize(); 27 eigen_assert(lhs.outerSize() == rhs.innerSize()); 28 29 ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0); 30 ei_declare_aligned_stack_constructed_variable(ResScalar, values, rows, 0); 31 ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0); 32 33 std::memset(mask,0,sizeof(bool)*rows); 34 35 evaluator<Lhs> lhsEval(lhs); 36 evaluator<Rhs> rhsEval(rhs); 37 38 // estimate the number of non zero entries 39 // given a rhs column containing Y non zeros, we assume that the respective Y columns 40 // of the lhs differs in average of one non zeros, thus the number of non zeros for 41 // the product of a rhs column with the lhs is X+Y where X is the average number of non zero 42 // per column of the lhs. 43 // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) 44 Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate(); 45 46 res.setZero(); 47 res.reserve(Index(estimated_nnz_prod)); 48 // we compute each column of the result, one after the other 49 for (Index j=0; j<cols; ++j) 50 { 51 52 res.startVec(j); 53 Index nnz = 0; 54 for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) 55 { 56 RhsScalar y = rhsIt.value(); 57 Index k = rhsIt.index(); 58 for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) 59 { 60 Index i = lhsIt.index(); 61 LhsScalar x = lhsIt.value(); 62 if(!mask[i]) 63 { 64 mask[i] = true; 65 values[i] = x * y; 66 indices[nnz] = i; 67 ++nnz; 68 } 69 else 70 values[i] += x * y; 71 } 72 } 73 if(!sortedInsertion) 74 { 75 // unordered insertion 76 for(Index k=0; k<nnz; ++k) 77 { 78 Index i = indices[k]; 79 res.insertBackByOuterInnerUnordered(j,i) = values[i]; 80 mask[i] = false; 81 } 82 } 83 else 84 { 85 // alternative ordered insertion code: 86 const Index t200 = rows/11; // 11 == (log2(200)*1.39) 87 const Index t = (rows*100)/139; 88 89 // FIXME reserve nnz non zeros 90 // FIXME implement faster sorting algorithms for very small nnz 91 // if the result is sparse enough => use a quick sort 92 // otherwise => loop through the entire vector 93 // In order to avoid to perform an expensive log2 when the 94 // result is clearly very sparse we use a linear bound up to 200. 95 if((nnz<200 && nnz<t200) || nnz * numext::log2(int(nnz)) < t) 96 { 97 if(nnz>1) std::sort(indices,indices+nnz); 98 for(Index k=0; k<nnz; ++k) 99 { 100 Index i = indices[k]; 101 res.insertBackByOuterInner(j,i) = values[i]; 102 mask[i] = false; 103 } 104 } 105 else 106 { 107 // dense path 108 for(Index i=0; i<rows; ++i) 109 { 110 if(mask[i]) 111 { 112 mask[i] = false; 113 res.insertBackByOuterInner(j,i) = values[i]; 114 } 115 } 116 } 117 } 118 } 119 res.finalize(); 120 } 121 122 123 } // end namespace internal 124 125 namespace internal { 126 127 template<typename Lhs, typename Rhs, typename ResultType, 128 int LhsStorageOrder = (traits<Lhs>::Flags&RowMajorBit) ? RowMajor : ColMajor, 129 int RhsStorageOrder = (traits<Rhs>::Flags&RowMajorBit) ? RowMajor : ColMajor, 130 int ResStorageOrder = (traits<ResultType>::Flags&RowMajorBit) ? RowMajor : ColMajor> 131 struct conservative_sparse_sparse_product_selector; 132 133 template<typename Lhs, typename Rhs, typename ResultType> 134 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor> 135 { 136 typedef typename remove_all<Lhs>::type LhsCleaned; 137 typedef typename LhsCleaned::Scalar Scalar; 138 139 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 140 { 141 typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; 142 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrixAux; 143 typedef typename sparse_eval<ColMajorMatrixAux,ResultType::RowsAtCompileTime,ResultType::ColsAtCompileTime,ColMajorMatrixAux::Flags>::type ColMajorMatrix; 144 145 // If the result is tall and thin (in the extreme case a column vector) 146 // then it is faster to sort the coefficients inplace instead of transposing twice. 147 // FIXME, the following heuristic is probably not very good. 148 if(lhs.rows()>rhs.cols()) 149 { 150 ColMajorMatrix resCol(lhs.rows(),rhs.cols()); 151 // perform sorted insertion 152 internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol, true); 153 res = resCol.markAsRValue(); 154 } 155 else 156 { 157 ColMajorMatrixAux resCol(lhs.rows(),rhs.cols()); 158 // resort to transpose to sort the entries 159 internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrixAux>(lhs, rhs, resCol, false); 160 RowMajorMatrix resRow(resCol); 161 res = resRow.markAsRValue(); 162 } 163 } 164 }; 165 166 template<typename Lhs, typename Rhs, typename ResultType> 167 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,ColMajor> 168 { 169 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 170 { 171 typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRhs; 172 typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes; 173 RowMajorRhs rhsRow = rhs; 174 RowMajorRes resRow(lhs.rows(), rhs.cols()); 175 internal::conservative_sparse_sparse_product_impl<RowMajorRhs,Lhs,RowMajorRes>(rhsRow, lhs, resRow); 176 res = resRow; 177 } 178 }; 179 180 template<typename Lhs, typename Rhs, typename ResultType> 181 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,ColMajor> 182 { 183 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 184 { 185 typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorLhs; 186 typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes; 187 RowMajorLhs lhsRow = lhs; 188 RowMajorRes resRow(lhs.rows(), rhs.cols()); 189 internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorLhs,RowMajorRes>(rhs, lhsRow, resRow); 190 res = resRow; 191 } 192 }; 193 194 template<typename Lhs, typename Rhs, typename ResultType> 195 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,ColMajor> 196 { 197 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 198 { 199 typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; 200 RowMajorMatrix resRow(lhs.rows(), rhs.cols()); 201 internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow); 202 res = resRow; 203 } 204 }; 205 206 207 template<typename Lhs, typename Rhs, typename ResultType> 208 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,RowMajor> 209 { 210 typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar; 211 212 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 213 { 214 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; 215 ColMajorMatrix resCol(lhs.rows(), rhs.cols()); 216 internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol); 217 res = resCol; 218 } 219 }; 220 221 template<typename Lhs, typename Rhs, typename ResultType> 222 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,RowMajor> 223 { 224 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 225 { 226 typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs; 227 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes; 228 ColMajorLhs lhsCol = lhs; 229 ColMajorRes resCol(lhs.rows(), rhs.cols()); 230 internal::conservative_sparse_sparse_product_impl<ColMajorLhs,Rhs,ColMajorRes>(lhsCol, rhs, resCol); 231 res = resCol; 232 } 233 }; 234 235 template<typename Lhs, typename Rhs, typename ResultType> 236 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,RowMajor> 237 { 238 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 239 { 240 typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs; 241 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes; 242 ColMajorRhs rhsCol = rhs; 243 ColMajorRes resCol(lhs.rows(), rhs.cols()); 244 internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorRhs,ColMajorRes>(lhs, rhsCol, resCol); 245 res = resCol; 246 } 247 }; 248 249 template<typename Lhs, typename Rhs, typename ResultType> 250 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,RowMajor> 251 { 252 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 253 { 254 typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; 255 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; 256 RowMajorMatrix resRow(lhs.rows(),rhs.cols()); 257 internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow); 258 // sort the non zeros: 259 ColMajorMatrix resCol(resRow); 260 res = resCol; 261 } 262 }; 263 264 } // end namespace internal 265 266 267 namespace internal { 268 269 template<typename Lhs, typename Rhs, typename ResultType> 270 static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res) 271 { 272 typedef typename remove_all<Lhs>::type::Scalar LhsScalar; 273 typedef typename remove_all<Rhs>::type::Scalar RhsScalar; 274 Index cols = rhs.outerSize(); 275 eigen_assert(lhs.outerSize() == rhs.innerSize()); 276 277 evaluator<Lhs> lhsEval(lhs); 278 evaluator<Rhs> rhsEval(rhs); 279 280 for (Index j=0; j<cols; ++j) 281 { 282 for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) 283 { 284 RhsScalar y = rhsIt.value(); 285 Index k = rhsIt.index(); 286 for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) 287 { 288 Index i = lhsIt.index(); 289 LhsScalar x = lhsIt.value(); 290 res.coeffRef(i,j) += x * y; 291 } 292 } 293 } 294 } 295 296 297 } // end namespace internal 298 299 namespace internal { 300 301 template<typename Lhs, typename Rhs, typename ResultType, 302 int LhsStorageOrder = (traits<Lhs>::Flags&RowMajorBit) ? RowMajor : ColMajor, 303 int RhsStorageOrder = (traits<Rhs>::Flags&RowMajorBit) ? RowMajor : ColMajor> 304 struct sparse_sparse_to_dense_product_selector; 305 306 template<typename Lhs, typename Rhs, typename ResultType> 307 struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor> 308 { 309 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 310 { 311 internal::sparse_sparse_to_dense_product_impl<Lhs,Rhs,ResultType>(lhs, rhs, res); 312 } 313 }; 314 315 template<typename Lhs, typename Rhs, typename ResultType> 316 struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor> 317 { 318 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 319 { 320 typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs; 321 ColMajorLhs lhsCol(lhs); 322 internal::sparse_sparse_to_dense_product_impl<ColMajorLhs,Rhs,ResultType>(lhsCol, rhs, res); 323 } 324 }; 325 326 template<typename Lhs, typename Rhs, typename ResultType> 327 struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor> 328 { 329 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 330 { 331 typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs; 332 ColMajorRhs rhsCol(rhs); 333 internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorRhs,ResultType>(lhs, rhsCol, res); 334 } 335 }; 336 337 template<typename Lhs, typename Rhs, typename ResultType> 338 struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor> 339 { 340 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 341 { 342 Transpose<ResultType> trRes(res); 343 internal::sparse_sparse_to_dense_product_impl<Rhs,Lhs,Transpose<ResultType> >(rhs, lhs, trRes); 344 } 345 }; 346 347 348 } // end namespace internal 349 350 } // end namespace Eigen 351 352 #endif // EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H 353