1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2010 Gael Guennebaud <[email protected]> 5 // Copyright (C) 2010-2016 Konstantinos Margaritis <[email protected]> 6 // 7 // This Source Code Form is subject to the terms of the Mozilla 8 // Public License v. 2.0. If a copy of the MPL was not distributed 9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 11 #ifndef EIGEN_COMPLEX32_ALTIVEC_H 12 #define EIGEN_COMPLEX32_ALTIVEC_H 13 14 namespace Eigen { 15 16 namespace internal { 17 18 static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; 19 #ifdef __VSX__ 20 #if defined(_BIG_ENDIAN) 21 static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; 22 static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; 23 #else 24 static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; 25 static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; 26 #endif 27 #endif 28 29 //---------- float ---------- 30 struct Packet2cf 31 { Packet2cfPacket2cf32 EIGEN_STRONG_INLINE explicit Packet2cf() {} Packet2cfPacket2cf33 EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {} 34 pmulPacket2cf35 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) 36 { 37 Packet4f v1, v2; 38 39 // Permute and multiply the real parts of a and b 40 v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD); 41 // Get the imaginary parts of a 42 v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN); 43 // multiply a_re * b 44 v1 = vec_madd(v1, b.v, p4f_ZERO); 45 // multiply a_im * b and get the conjugate result 46 v2 = vec_madd(v2, b.v, p4f_ZERO); 47 v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); 48 // permute back to a proper order 49 v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV); 50 51 return Packet2cf(padd<Packet4f>(v1, v2)); 52 } 53 54 EIGEN_STRONG_INLINE Packet2cf& operator*=(const Packet2cf& b) { 55 v = pmul(Packet2cf(*this), b).v; 56 return *this; 57 } 58 EIGEN_STRONG_INLINE Packet2cf operator*(const Packet2cf& b) const { 59 return Packet2cf(*this) *= b; 60 } 61 62 EIGEN_STRONG_INLINE Packet2cf& operator+=(const Packet2cf& b) { 63 v = padd(v, b.v); 64 return *this; 65 } 66 EIGEN_STRONG_INLINE Packet2cf operator+(const Packet2cf& b) const { 67 return Packet2cf(*this) += b; 68 } 69 EIGEN_STRONG_INLINE Packet2cf& operator-=(const Packet2cf& b) { 70 v = psub(v, b.v); 71 return *this; 72 } 73 EIGEN_STRONG_INLINE Packet2cf operator-(const Packet2cf& b) const { 74 return Packet2cf(*this) -= b; 75 } 76 EIGEN_STRONG_INLINE Packet2cf operator-(void) const { 77 return Packet2cf(-v); 78 } 79 80 Packet4f v; 81 }; 82 83 template<> struct packet_traits<std::complex<float> > : default_packet_traits 84 { 85 typedef Packet2cf type; 86 typedef Packet2cf half; 87 typedef Packet4f as_real; 88 enum { 89 Vectorizable = 1, 90 AlignedOnScalar = 1, 91 size = 2, 92 HasHalfPacket = 0, 93 94 HasAdd = 1, 95 HasSub = 1, 96 HasMul = 1, 97 HasDiv = 1, 98 HasNegate = 1, 99 HasAbs = 0, 100 HasAbs2 = 0, 101 HasMin = 0, 102 HasMax = 0, 103 #ifdef __VSX__ 104 HasBlend = 1, 105 #endif 106 HasSetLinear = 0 107 }; 108 }; 109 110 template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; typedef Packet4f as_real; }; 111 112 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) 113 { 114 Packet2cf res; 115 if((std::ptrdiff_t(&from) % 16) == 0) 116 res.v = pload<Packet4f>((const float *)&from); 117 else 118 res.v = ploadu<Packet4f>((const float *)&from); 119 res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI); 120 return res; 121 } 122 123 template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { return Packet2cf(pload<Packet4f>((const float *) from)); } 124 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { return Packet2cf(ploadu<Packet4f>((const float*) from)); } 125 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); } 126 127 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstore((float*)to, from.v); } 128 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstoreu((float*)to, from.v); } 129 130 EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex<float>* from0, const std::complex<float>* from1) 131 { 132 Packet4f res0, res1; 133 #ifdef __VSX__ 134 __asm__ ("lxsdx %x0,%y1" : "=wa" (res0) : "Z" (*from0)); 135 __asm__ ("lxsdx %x0,%y1" : "=wa" (res1) : "Z" (*from1)); 136 #ifdef _BIG_ENDIAN 137 __asm__ ("xxpermdi %x0, %x1, %x2, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1)); 138 #else 139 __asm__ ("xxpermdi %x0, %x2, %x1, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1)); 140 #endif 141 #else 142 *reinterpret_cast<std::complex<float> *>(&res0) = *from0; 143 *reinterpret_cast<std::complex<float> *>(&res1) = *from1; 144 res0 = vec_perm(res0, res1, p16uc_TRANSPOSE64_HI); 145 #endif 146 return Packet2cf(res0); 147 } 148 149 template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride) 150 { 151 EIGEN_ALIGN16 std::complex<float> af[2]; 152 af[0] = from[0*stride]; 153 af[1] = from[1*stride]; 154 return pload<Packet2cf>(af); 155 } 156 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride) 157 { 158 EIGEN_ALIGN16 std::complex<float> af[2]; 159 pstore<std::complex<float> >((std::complex<float> *) af, from); 160 to[0*stride] = af[0]; 161 to[1*stride] = af[1]; 162 } 163 164 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v + b.v); } 165 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v - b.v); } 166 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); } 167 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); } 168 169 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v, b.v)); } 170 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v, b.v)); } 171 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v, b.v)); } 172 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v, b.v)); } 173 174 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_PPC_PREFETCH(addr); } 175 176 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) 177 { 178 EIGEN_ALIGN16 std::complex<float> res[2]; 179 pstore((float *)&res, a.v); 180 181 return res[0]; 182 } 183 184 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) 185 { 186 Packet4f rev_a; 187 rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2); 188 return Packet2cf(rev_a); 189 } 190 191 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a) 192 { 193 Packet4f b; 194 b = vec_sld(a.v, a.v, 8); 195 b = padd<Packet4f>(a.v, b); 196 return pfirst<Packet2cf>(Packet2cf(b)); 197 } 198 199 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a) 200 { 201 Packet4f b; 202 Packet2cf prod; 203 b = vec_sld(a.v, a.v, 8); 204 prod = pmul<Packet2cf>(a, Packet2cf(b)); 205 206 return pfirst<Packet2cf>(prod); 207 } 208 209 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) 210 211 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) 212 { 213 // TODO optimize it for AltiVec 214 Packet2cf res = pmul(a, pconj(b)); 215 Packet4f s = pmul<Packet4f>(b.v, b.v); 216 return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV)))); 217 } 218 219 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x) 220 { 221 return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV)); 222 } 223 224 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel) 225 { 226 Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI); 227 kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO); 228 kernel.packet[0].v = tmp; 229 } 230 231 template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) { 232 Packet4f eq = reinterpret_cast<Packet4f>(vec_cmpeq(a.v,b.v)); 233 return Packet2cf(vec_and(eq, vec_perm(eq, eq, p16uc_COMPLEX32_REV))); 234 } 235 236 #ifdef __VSX__ 237 template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) { 238 Packet2cf result; 239 result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v))); 240 return result; 241 } 242 #endif 243 244 template<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) 245 { 246 return psqrt_complex<Packet2cf>(a); 247 } 248 249 //---------- double ---------- 250 #ifdef __VSX__ 251 struct Packet1cd 252 { 253 EIGEN_STRONG_INLINE Packet1cd() {} 254 EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {} 255 256 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) 257 { 258 Packet2d a_re, a_im, v1, v2; 259 260 // Permute and multiply the real parts of a and b 261 a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI); 262 // Get the imaginary parts of a 263 a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO); 264 // multiply a_re * b 265 v1 = vec_madd(a_re, b.v, p2d_ZERO); 266 // multiply a_im * b and get the conjugate result 267 v2 = vec_madd(a_im, b.v, p2d_ZERO); 268 v2 = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v2), reinterpret_cast<Packet4ui>(v2), 8)); 269 v2 = pxor(v2, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR1)); 270 271 return Packet1cd(padd<Packet2d>(v1, v2)); 272 } 273 274 EIGEN_STRONG_INLINE Packet1cd& operator*=(const Packet1cd& b) { 275 v = pmul(Packet1cd(*this), b).v; 276 return *this; 277 } 278 EIGEN_STRONG_INLINE Packet1cd operator*(const Packet1cd& b) const { 279 return Packet1cd(*this) *= b; 280 } 281 282 EIGEN_STRONG_INLINE Packet1cd& operator+=(const Packet1cd& b) { 283 v = padd(v, b.v); 284 return *this; 285 } 286 EIGEN_STRONG_INLINE Packet1cd operator+(const Packet1cd& b) const { 287 return Packet1cd(*this) += b; 288 } 289 EIGEN_STRONG_INLINE Packet1cd& operator-=(const Packet1cd& b) { 290 v = psub(v, b.v); 291 return *this; 292 } 293 EIGEN_STRONG_INLINE Packet1cd operator-(const Packet1cd& b) const { 294 return Packet1cd(*this) -= b; 295 } 296 EIGEN_STRONG_INLINE Packet1cd operator-(void) const { 297 return Packet1cd(-v); 298 } 299 300 Packet2d v; 301 }; 302 303 template<> struct packet_traits<std::complex<double> > : default_packet_traits 304 { 305 typedef Packet1cd type; 306 typedef Packet1cd half; 307 typedef Packet2d as_real; 308 enum { 309 Vectorizable = 1, 310 AlignedOnScalar = 0, 311 size = 1, 312 HasHalfPacket = 0, 313 314 HasAdd = 1, 315 HasSub = 1, 316 HasMul = 1, 317 HasDiv = 1, 318 HasNegate = 1, 319 HasAbs = 0, 320 HasAbs2 = 0, 321 HasMin = 0, 322 HasMax = 0, 323 HasSetLinear = 0 324 }; 325 }; 326 327 template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; typedef Packet2d as_real; }; 328 329 template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); } 330 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); } 331 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstore((double*)to, from.v); } 332 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstoreu((double*)to, from.v); } 333 334 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from) 335 { /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); } 336 337 template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index) 338 { 339 return pload<Packet1cd>(from); 340 } 341 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index) 342 { 343 pstore<std::complex<double> >(to, from); 344 } 345 346 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); } 347 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); } 348 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); } 349 template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR2))); } 350 351 template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); } 352 template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); } 353 template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); } 354 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pandnot(a.v, b.v)); } 355 356 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); } 357 358 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_PPC_PREFETCH(addr); } 359 360 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) 361 { 362 EIGEN_ALIGN16 std::complex<double> res[2]; 363 pstore<std::complex<double> >(res, a); 364 365 return res[0]; 366 } 367 368 template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; } 369 370 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); } 371 372 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); } 373 374 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) 375 376 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) 377 { 378 // TODO optimize it for AltiVec 379 Packet1cd res = pmul(a,pconj(b)); 380 Packet2d s = pmul<Packet2d>(b.v, b.v); 381 return Packet1cd(pdiv(res.v, padd<Packet2d>(s, vec_perm(s, s, p16uc_REVERSE64)))); 382 } 383 384 EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x) 385 { 386 return Packet1cd(preverse(Packet2d(x.v))); 387 } 388 389 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel) 390 { 391 Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI); 392 kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO); 393 kernel.packet[0].v = tmp; 394 } 395 396 template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) { 397 // Compare real and imaginary parts of a and b to get the mask vector: 398 // [re(a)==re(b), im(a)==im(b)] 399 Packet2d eq = reinterpret_cast<Packet2d>(vec_cmpeq(a.v,b.v)); 400 // Swap real/imag elements in the mask in to get: 401 // [im(a)==im(b), re(a)==re(b)] 402 Packet2d eq_swapped = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(eq), reinterpret_cast<Packet4ui>(eq), 8)); 403 // Return re(a)==re(b) & im(a)==im(b) by computing bitwise AND of eq and eq_swapped 404 return Packet1cd(vec_and(eq, eq_swapped)); 405 } 406 407 template<> EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) 408 { 409 return psqrt_complex<Packet1cd>(a); 410 } 411 412 #endif // __VSX__ 413 } // end namespace internal 414 415 } // end namespace Eigen 416 417 #endif // EIGEN_COMPLEX32_ALTIVEC_H 418