1 /* Copyright (c) 2013 Julien Pommier ( [email protected] ) 2 Copyright (c) 2020 Hayati Ayguen ( [email protected] ) 3 4 Based on original fortran 77 code from FFTPACKv4 from NETLIB 5 (http://www.netlib.org/fftpack), authored by Dr Paul Swarztrauber 6 of NCAR, in 1985. 7 8 As confirmed by the NCAR fftpack software curators, the following 9 FFTPACKv5 license applies to FFTPACKv4 sources. My changes are 10 released under the same terms. 11 12 FFTPACK license: 13 14 http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html 15 16 Copyright (c) 2004 the University Corporation for Atmospheric 17 Research ("UCAR"). All rights reserved. Developed by NCAR's 18 Computational and Information Systems Laboratory, UCAR, 19 www.cisl.ucar.edu. 20 21 Redistribution and use of the Software in source and binary forms, 22 with or without modification, is permitted provided that the 23 following conditions are met: 24 25 - Neither the names of NCAR's Computational and Information Systems 26 Laboratory, the University Corporation for Atmospheric Research, 27 nor the names of its sponsors or contributors may be used to 28 endorse or promote products derived from this Software without 29 specific prior written permission. 30 31 - Redistributions of source code must retain the above copyright 32 notices, this list of conditions, and the disclaimer below. 33 34 - Redistributions in binary form must reproduce the above copyright 35 notice, this list of conditions, and the disclaimer below in the 36 documentation and/or other materials provided with the 37 distribution. 38 39 THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 40 EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF 41 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 42 NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT 43 HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, 44 EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN 45 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 46 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE 47 SOFTWARE. 48 49 50 PFFFT : a Pretty Fast FFT. 51 52 This file is largerly based on the original FFTPACK implementation, modified in 53 order to take advantage of SIMD instructions of modern CPUs. 54 */ 55 56 /* 57 ChangeLog: 58 - 2011/10/02, version 1: This is the very first release of this file. 59 */ 60 61 #include "pffft.h" 62 63 /* detect compiler flavour */ 64 #if defined(_MSC_VER) 65 # define COMPILER_MSVC 66 #elif defined(__GNUC__) 67 # define COMPILER_GCC 68 #endif 69 70 #include <stdlib.h> 71 #include <stdint.h> 72 #include <stdio.h> 73 #include <math.h> 74 #include <assert.h> 75 76 #if defined(COMPILER_GCC) 77 # define ALWAYS_INLINE(return_type) inline return_type __attribute__ ((always_inline)) 78 # define NEVER_INLINE(return_type) return_type __attribute__ ((noinline)) 79 # define RESTRICT __restrict 80 # define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__]; 81 #elif defined(COMPILER_MSVC) 82 # define ALWAYS_INLINE(return_type) __forceinline return_type 83 # define NEVER_INLINE(return_type) __declspec(noinline) return_type 84 # define RESTRICT __restrict 85 # define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__)) 86 #endif 87 88 89 #ifdef COMPILER_MSVC 90 #pragma warning( disable : 4244 4305 4204 4456 ) 91 #endif 92 93 /* 94 vector support macros: the rest of the code is independant of 95 SSE/Altivec/NEON -- adding support for other platforms with 4-element 96 vectors should be limited to these macros 97 */ 98 #include "simd/pf_float.h" 99 100 /* have code comparable with this definition */ 101 #define SETUP_STRUCT PFFFT_Setup 102 #define FUNC_NEW_SETUP pffft_new_setup 103 #define FUNC_DESTROY pffft_destroy_setup 104 #define FUNC_TRANSFORM_UNORDRD pffft_transform 105 #define FUNC_TRANSFORM_ORDERED pffft_transform_ordered 106 #define FUNC_ZREORDER pffft_zreorder 107 #define FUNC_ZCONVOLVE_ACCUMULATE pffft_zconvolve_accumulate 108 #define FUNC_ZCONVOLVE_NO_ACCU pffft_zconvolve_no_accu 109 110 #define FUNC_ALIGNED_MALLOC pffft_aligned_malloc 111 #define FUNC_ALIGNED_FREE pffft_aligned_free 112 #define FUNC_SIMD_SIZE pffft_simd_size 113 #define FUNC_SIMD_ARCH pffft_simd_arch 114 #define FUNC_VALIDATE_SIMD_A validate_pffft_simd 115 #define FUNC_VALIDATE_SIMD_EX validate_pffft_simd_ex 116 117 #define FUNC_CPLX_FINALIZE pffft_cplx_finalize 118 #define FUNC_CPLX_PREPROCESS pffft_cplx_preprocess 119 #define FUNC_REAL_PREPROCESS_4X4 pffft_real_preprocess_4x4 120 #define FUNC_REAL_PREPROCESS pffft_real_preprocess 121 #define FUNC_REAL_FINALIZE_4X4 pffft_real_finalize_4x4 122 #define FUNC_REAL_FINALIZE pffft_real_finalize 123 #define FUNC_TRANSFORM_INTERNAL pffft_transform_internal 124 125 #define FUNC_COS cosf 126 #define FUNC_SIN sinf 127 128 129 #include "pffft_priv_impl.h" 130 131 132