5b93090ce017 — Chris Cannam a month ago
Add SLEEF FFT support
4 files changed, 360 insertions(+), 5 deletions(-)

M meson.build
M meson_options.txt
M src/common/Allocators.h
M src/common/FFT.cpp
M meson.build +35 -0
@@ 112,6 112,8 @@ foreach d: get_option('extra_include_dir
 endforeach
 
 fftw3_dep = dependency('fftw3', version: '>= 3.0.0', required: false)
+sleef_dep = dependency('sleef', version: '>= 3.3.0', required: false)
+sleefdft_dep = dependency('sleefdft', version: '>= 3.3.0', required: false)
 samplerate_dep = dependency('samplerate', version: '>= 0.1.8', required: false)
 sndfile_dep = dependency('sndfile', version: '>= 1.0.16', required: false)
 vamp_dep = dependency('vamp-sdk', version: '>= 2.9', required: false)

          
@@ 164,6 166,9 @@ if fft == 'builtin'
   if fftw3_dep.found()
     message('(to use FFTW instead, reconfigure with -Dfft=fftw)')
   endif
+  if sleef_dep.found()
+    message('(to use SLEEF instead, reconfigure with -Dfft=sleef)')
+  endif
   feature_defines += ['-DUSE_BUILTIN_FFT']
 
 elif fft == 'kissfft'

          
@@ 172,6 177,9 @@ elif fft == 'kissfft'
   if fftw3_dep.found()
     message('(to use FFTW instead, reconfigure with -Dfft=fftw)')
   endif
+  if sleef_dep.found()
+    message('(to use SLEEF instead, reconfigure with -Dfft=sleef)')
+  endif
   feature_sources += ['src/ext/kissfft/kiss_fft.c', 'src/ext/kissfft/kiss_fftr.c']
   feature_defines += ['-DHAVE_KISSFFT']
   general_include_dirs += 'src/ext/kissfft'

          
@@ 180,6 188,9 @@ elif fft == 'fftw'
   if fftw3_dep.found()
     config_summary += { 'FFT': 'FFTW' }
     message('For FFT: using FFTW')
+    if sleef_dep.found()
+      message('(to use SLEEF instead, reconfigure with -Dfft=sleef)')
+    endif
     pkgconfig_requirements += fftw3_dep
   else 
     fftw_dep = cpp.find_library('fftw3',

          
@@ 187,10 198,34 @@ elif fft == 'fftw'
                                 has_headers: ['fftw3.h'],
                                 header_args: extra_include_args,
                                 required: true)
+    config_summary += { 'FFT': 'FFTW' }
   endif
   feature_dependencies += fftw3_dep
   feature_defines += ['-DHAVE_FFTW3', '-DFFTW_DOUBLE_ONLY']
 
+elif fft == 'sleef'
+  if sleefdft_dep.found() and sleef_dep.found()
+    config_summary += { 'FFT': 'SLEEF' }
+    message('For FFT: using SLEEF')
+    pkgconfig_requirements += sleefdft_dep
+    pkgconfig_requirements += sleef_dep
+  else 
+    sleefdft_dep = cpp.find_library('sleefdft',
+                                    dirs: get_option('extra_lib_dirs'),
+                                    has_headers: ['sleefdft.h'],
+                                    header_args: extra_include_args,
+                                    required: true)
+    sleef_dep = cpp.find_library('sleef',
+                                 dirs: get_option('extra_lib_dirs'),
+                                 has_headers: ['sleef.h'],
+                                 header_args: extra_include_args,
+                                 required: true)
+    config_summary += { 'FFT': 'SLEEF' }
+  endif
+  feature_dependencies += sleefdft_dep
+  feature_dependencies += sleef_dep
+  feature_defines += ['-DHAVE_SLEEF']
+
 elif fft == 'vdsp'
   config_summary += { 'FFT': 'vDSP' }
   message('For FFT: using vDSP')

          
M meson_options.txt +1 -1
@@ 1,7 1,7 @@ 
 
 option('fft',
        type: 'combo',
-       choices: ['auto', 'builtin', 'kissfft', 'fftw', 'vdsp', 'ipp'],
+       choices: ['auto', 'builtin', 'kissfft', 'fftw', 'sleef', 'vdsp', 'ipp'],
        value: 'auto',
        description: 'FFT library to use. The default (auto) will use vDSP if available, the builtin implementation otherwise.')
 

          
M src/common/Allocators.h +3 -3
@@ 85,9 85,9 @@ T *allocate(size_t count)
 #else /* !MALLOC_IS_ALIGNED */
 
     // That's the "sufficiently aligned" functions dealt with, the
-    // rest need a specific alignment provided to the call. 32-byte
-    // alignment is required for at least OpenMAX
-    static const int alignment = 32;
+    // rest need a specific alignment provided to the call. 64-byte
+    // alignment is enough for 8x8 double operations
+    static const int alignment = 64;
 
 #ifdef HAVE__ALIGNED_MALLOC
     ptr = _aligned_malloc(count * sizeof(T), alignment);

          
M src/common/FFT.cpp +321 -1
@@ 53,6 53,13 @@ 
 #include <fftw3.h>
 #endif
 
+#ifdef HAVE_SLEEF
+extern "C" {
+#include <sleef.h>
+#include <sleefdft.h>
+}
+#endif
+
 #ifdef HAVE_VDSP
 #include <Accelerate/Accelerate.h>
 #endif

          
@@ 63,6 70,7 @@ 
 
 #ifndef HAVE_IPP
 #ifndef HAVE_FFTW3
+#ifndef HAVE_SLEEF
 #ifndef HAVE_KISSFFT
 #ifndef USE_BUILTIN_FFT
 #ifndef HAVE_VDSP

          
@@ 72,6 80,7 @@ 
 #endif
 #endif
 #endif
+#endif
 
 #include <cmath>
 #include <iostream>

          
@@ 1425,6 1434,302 @@ pthread_mutex_t D_FFTW::m_commonMutex = 
 
 #endif /* HAVE_FFTW3 */
 
+#ifdef HAVE_SLEEF
+
+class D_SLEEF : public FFTImpl
+{
+    bool isAligned(const void *ptr) {
+        return ! ((uintptr_t)ptr & 63);
+    }
+    
+public:
+    D_SLEEF(int size) :
+        m_fplanf(0), m_fplani(0), m_fbuf(0), m_fpacked(0),
+        m_dplanf(0), m_dplani(0), m_dbuf(0), m_dpacked(0),
+        m_size(size)
+    {
+    }
+
+    ~D_SLEEF() {
+        if (m_fplanf) {
+            SleefDFT_dispose(m_fplanf);
+            SleefDFT_dispose(m_fplani);
+            Sleef_free(m_fbuf);
+            Sleef_free(m_fpacked);
+        }
+        if (m_dplanf) {
+            SleefDFT_dispose(m_dplanf);
+            SleefDFT_dispose(m_dplani);
+            Sleef_free(m_dbuf);
+            Sleef_free(m_dpacked);
+        }
+    }
+
+    int getSize() const {
+        return m_size;
+    }
+
+    FFT::Precisions
+    getSupportedPrecisions() const {
+        return FFT::SinglePrecision | FFT::DoublePrecision;
+    }
+
+    void initFloat() {
+        if (m_fplanf) return;
+
+        m_fbuf = static_cast<float *>
+            (Sleef_malloc(m_size * sizeof(float)));
+        m_fpacked = static_cast<float *>
+            (Sleef_malloc((m_size + 2) * sizeof(float)));
+
+        m_fplanf = SleefDFT_float_init1d
+            (m_size, m_fbuf, m_fpacked,
+             SLEEF_MODE_FORWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE);
+
+        m_fplani = SleefDFT_float_init1d
+            (m_size, m_fpacked, m_fbuf,
+             SLEEF_MODE_BACKWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE);
+    }
+
+    void initDouble() {
+        if (m_dplanf) return;
+
+        m_dbuf = static_cast<double *>
+            (Sleef_malloc(m_size * sizeof(double)));
+        m_dpacked = static_cast<double *>
+            (Sleef_malloc((m_size + 2) * sizeof(double)));
+
+        m_dplanf = SleefDFT_double_init1d
+            (m_size, m_dbuf, m_dpacked,
+             SLEEF_MODE_FORWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE);
+
+        m_dplani = SleefDFT_double_init1d
+            (m_size, m_dpacked, m_dbuf,
+             SLEEF_MODE_BACKWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE);
+    }
+
+    void packFloat(const float *BQ_R__ re, const float *BQ_R__ im) {
+        const float *src[2] = { re, im };
+        v_interleave(m_fpacked, src, 2, m_size/2 + 1);
+    }
+
+    void packDouble(const double *BQ_R__ re, const double *BQ_R__ im) {
+        const double *src[2] = { re, im };
+        v_interleave(m_dpacked, src, 2, m_size/2 + 1);
+    }
+
+    void unpackFloat(float *BQ_R__ re, float *BQ_R__ im) {
+        float *dst[2] = { re, im };
+        v_deinterleave(dst, m_fpacked, 2, m_size/2 + 1);
+    }        
+
+    void unpackDouble(double *BQ_R__ re, double *BQ_R__ im) {
+        double *dst[2] = { re, im };
+        v_deinterleave(dst, m_dpacked, 2, m_size/2 + 1);
+    }        
+
+    void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) {
+        if (!m_dplanf) initDouble();
+        if (isAligned(realIn)) {
+            SleefDFT_double_execute(m_dplanf, realIn, 0);
+        } else {
+            v_copy(m_dbuf, realIn, m_size);
+            SleefDFT_double_execute(m_dplanf, 0, 0);
+        }
+        unpackDouble(realOut, imagOut);
+    }
+
+    void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) {
+        if (!m_dplanf) initDouble();
+        if (isAligned(realIn) && isAligned(complexOut)) {
+            SleefDFT_double_execute(m_dplanf, realIn, complexOut);
+        } else {
+            v_copy(m_dbuf, realIn, m_size);
+            SleefDFT_double_execute(m_dplanf, 0, 0);
+            v_copy(complexOut, m_dpacked, m_size + 2);
+        }
+    }
+
+    void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) {
+        if (!m_dplanf) initDouble();
+        if (isAligned(realIn)) {
+            SleefDFT_double_execute(m_dplanf, realIn, 0);
+        } else {
+            v_copy(m_dbuf, realIn, m_size);
+            SleefDFT_double_execute(m_dplanf, 0, 0);
+        }
+        v_cartesian_interleaved_to_polar(magOut, phaseOut, m_dpacked, m_size/2+1);
+    }
+
+    void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) {
+        if (!m_dplanf) initDouble();
+        if (isAligned(realIn)) {
+            SleefDFT_double_execute(m_dplanf, realIn, 0);
+        } else {
+            v_copy(m_dbuf, realIn, m_size);
+            SleefDFT_double_execute(m_dplanf, 0, 0);
+        }
+        v_cartesian_interleaved_to_magnitudes(magOut, m_dpacked, m_size/2+1);
+    }
+
+    void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) {
+        if (!m_fplanf) initFloat();
+        if (isAligned(realIn)) {
+            SleefDFT_float_execute(m_fplanf, realIn, 0);
+        } else {
+            v_copy(m_fbuf, realIn, m_size);
+            SleefDFT_float_execute(m_fplanf, 0, 0);
+        }
+        unpackFloat(realOut, imagOut);
+    }
+
+    void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) {
+        if (!m_fplanf) initFloat();
+        if (isAligned(realIn) && isAligned(complexOut)) {
+            SleefDFT_float_execute(m_fplanf, realIn, complexOut);
+        } else {
+            v_copy(m_fbuf, realIn, m_size);
+            SleefDFT_float_execute(m_fplanf, 0, 0);
+            v_copy(complexOut, m_fpacked, m_size + 2);
+        }
+    }
+
+    void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) {
+        if (!m_fplanf) initFloat();
+        if (isAligned(realIn)) {
+            SleefDFT_float_execute(m_fplanf, realIn, 0);
+        } else {
+            v_copy(m_fbuf, realIn, m_size);
+            SleefDFT_float_execute(m_fplanf, 0, 0);
+        }
+        v_cartesian_interleaved_to_polar(magOut, phaseOut, m_fpacked, m_size/2+1);
+    }
+
+    void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) {
+        if (!m_fplanf) initFloat();
+        if (isAligned(realIn)) {
+            SleefDFT_float_execute(m_fplanf, realIn, 0);
+        } else {
+            v_copy(m_fbuf, realIn, m_size);
+            SleefDFT_float_execute(m_fplanf, 0, 0);
+        }
+        v_cartesian_interleaved_to_magnitudes(magOut, m_fpacked, m_size/2+1);
+    }
+
+    void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) {
+        if (!m_dplanf) initDouble();
+        packDouble(realIn, imagIn);
+        if (isAligned(realOut)) {
+            SleefDFT_double_execute(m_dplani, 0, realOut);
+        } else {
+            SleefDFT_double_execute(m_dplani, 0, 0);
+            v_copy(realOut, m_dbuf, m_size);
+        }
+    }
+
+    void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) {
+        if (!m_dplanf) initDouble();
+        if (isAligned(complexIn) && isAligned(realOut)) {
+            SleefDFT_double_execute(m_dplani, complexIn, realOut);
+        } else {            
+            v_copy(m_dpacked, complexIn, m_size + 2);
+            SleefDFT_double_execute(m_dplani, 0, 0);
+            v_copy(realOut, m_dbuf, m_size);
+        }
+    }
+
+    void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) {
+        if (!m_dplanf) initDouble();
+        v_polar_to_cartesian_interleaved(m_dpacked, magIn, phaseIn, m_size/2+1);
+        if (isAligned(realOut)) {
+            SleefDFT_double_execute(m_dplani, 0, realOut);
+        } else {
+            SleefDFT_double_execute(m_dplani, 0, 0);
+            v_copy(realOut, m_dbuf, m_size);
+        }
+    }
+
+    void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) {
+        if (!m_dplanf) initDouble();
+        const int hs = m_size/2;
+        for (int i = 0; i <= hs; ++i) {
+            m_dpacked[i*2] = log(magIn[i] + 0.000001);
+            m_dpacked[i*2+1] = 0.0;
+        }
+        if (isAligned(cepOut)) {
+            SleefDFT_double_execute(m_dplani, 0, cepOut);
+        } else {
+            SleefDFT_double_execute(m_dplani, 0, 0);
+            v_copy(cepOut, m_dbuf, m_size);
+        }
+    }
+
+    void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) {
+        if (!m_fplanf) initFloat();
+        packFloat(realIn, imagIn);
+        if (isAligned(realOut)) {
+            SleefDFT_float_execute(m_dplani, 0, realOut);
+        } else {
+            SleefDFT_float_execute(m_fplani, 0, 0);
+            v_copy(realOut, m_fbuf, m_size);
+        }
+    }
+
+    void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) {
+        if (!m_fplanf) initFloat();
+        if (isAligned(complexIn) && isAligned(realOut)) {
+            SleefDFT_float_execute(m_fplani, complexIn, realOut);
+        } else {
+            v_copy(m_fpacked, complexIn, m_size + 2);
+            SleefDFT_float_execute(m_fplani, 0, 0);
+            v_copy(realOut, m_fbuf, m_size);
+        }
+    }
+
+    void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) {
+        if (!m_fplanf) initFloat();
+        v_polar_to_cartesian_interleaved(m_fpacked, magIn, phaseIn, m_size/2+1);
+        if (isAligned(realOut)) {
+            SleefDFT_float_execute(m_fplani, 0, realOut);
+        } else {
+            SleefDFT_float_execute(m_fplani, 0, 0);
+            v_copy(realOut, m_fbuf, m_size);
+        }
+    }
+
+    void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) {
+        if (!m_fplanf) initFloat();
+        const int hs = m_size/2;
+        for (int i = 0; i <= hs; ++i) {
+            m_fpacked[i*2] = logf(magIn[i] + 0.000001f);
+            m_fpacked[i*2+1] = 0.0;
+        }
+        if (isAligned(cepOut)) {
+            SleefDFT_float_execute(m_fplani, 0, cepOut);
+        } else {
+            SleefDFT_float_execute(m_fplani, 0, 0);
+            v_copy(cepOut, m_fbuf, m_size);
+        }
+    }
+
+private:
+    SleefDFT *m_fplanf;
+    SleefDFT *m_fplani;
+
+    float *m_fbuf;
+    float *m_fpacked;
+    
+    SleefDFT *m_dplanf;
+    SleefDFT *m_dplani;
+
+    double *m_dbuf;
+    double *m_dpacked;
+    
+    const int m_size;
+};
+
+#endif /* HAVE_SLEEF */
+
 #ifdef HAVE_KISSFFT
 
 class D_KISSFFT : public FFTImpl

          
@@ 2266,6 2571,9 @@ getImplementationDetails()
 #ifdef HAVE_FFTW3
     impls["fftw"] = SizeConstraintNone;
 #endif
+#ifdef HAVE_SLEEF
+    impls["sleef"] = SizeConstraintEvenPowerOfTwo;
+#endif
 #ifdef HAVE_KISSFFT
     impls["kissfft"] = SizeConstraintEven;
 #endif

          
@@ 2310,7 2618,7 @@ pickImplementation(int size)
     } 
     
     std::string preference[] = {
-        "ipp", "vdsp", "fftw", "builtin", "kissfft"
+        "ipp", "vdsp", "sleef", "fftw", "builtin", "kissfft"
     };
 
     for (int i = 0; i < int(sizeof(preference)/sizeof(preference[0])); ++i) {

          
@@ 2391,6 2699,10 @@ FFT::FFT(int size, int debugLevel) :
 #ifdef HAVE_FFTW3
         d = new FFTs::D_FFTW(size);
 #endif
+    } else if (impl == "sleef") {
+#ifdef HAVE_SLEEF
+        d = new FFTs::D_SLEEF(size);
+#endif
     } else if (impl == "kissfft") {        
 #ifdef HAVE_KISSFFT
         d = new FFTs::D_KISSFFT(size);

          
@@ 2650,6 2962,14 @@ FFT::tune()
         candidates["fftw"] = d;
 #endif
 
+#ifdef HAVE_SLEEF
+        os << "Constructing new SLEEF FFT object for size " << size << "..." << std::endl;
+        d = new FFTs::D_SLEEF(size);
+        d->initFloat();
+        d->initDouble();
+        candidates["sleef"] = d;
+#endif
+
 #ifdef HAVE_KISSFFT
         os << "Constructing new KISSFFT object for size " << size << "..." << std::endl;
         d = new FFTs::D_KISSFFT(size);