@@ 4,8 4,8 @@ bqfft
A small library wrapping various FFT implementations for some common
audio processing use cases. Contains a built-in implementation and
-wrappers for FFTW3, KissFFT, Intel IPP, and Apple vDSP. Suitable for
-Windows, Mac, Linux, and mobile platforms.
+wrappers for FFTW3, SLEEF, KissFFT, Intel IPP, and Apple vDSP.
+Suitable for Windows, Mac, Linux, and mobile platforms.
Note this is not a general FFT interface, as it handles only real
signals on the time-domain side.
@@ 15,25 15,28 @@ that bqfft does not know how to calculat
that have been compiled in, a simple slow DFT will be used instead. A
warning will be printed to stderr if this happens.
-Of the available libraries, vDSP, IPP, and the built-in implementation
-support power-of-two FFT lengths only, KissFFT supports any multiple
-of two, and FFTW supports any length. You can compile in more than one
-library, so for example if you compile in Accelerate and KissFFT, the
-former will be used for powers of two and the latter for other even
-lengths.
+Of the available libraries, vDSP, IPP, SLEEF, and the built-in
+implementation support power-of-two FFT lengths only, KissFFT supports
+any multiple of two, and FFTW supports any length. You can compile in
+more than one library, so for example if you compile in Accelerate and
+KissFFT, the former will be used for powers of two and the latter for
+other even lengths.
Here are some other pros and cons of the supported libraries:
- * Intel IPP - By far the fastest on actual Intel hardware. Of
- uncertain benefit with other manufacturers. Not available beyond
- x86/amd64, not open source.
+ * Intel IPP - The fastest on actual Intel hardware. Of uncertain
+ benefit with other manufacturers. Not available beyond x86/amd64,
+ not open source.
- * Apple vDSP - Faster than the open source libraries on all Apple
- hardware, and provided with the OS. There is seldom any good reason
- not to use this on Apple platforms.
+ * Apple vDSP - Generally the fastest on all Apple hardware, and
+ provided with the OS. There is seldom any good reason not to use
+ this on Apple platforms.
- * FFTW3 - Fastest open source library and portable, but its bulk and
- GPL licence may be an issue.
+ * SLEEF - Typically very fast, unencumbered, portable, open source
+ vector library; complex and (at the time of writing) rather new.
+
+ * FFTW3 - Fast, open source, and portable, but its bulk and GPL
+ licence may be an issue.
* KissFFT - As used here it is single-precision throughout, so it may
be a good choice for platforms on which double-precision arithmetic
@@ 44,7 47,7 @@ Here are some other pros and cons of the
* Built-in implementation - Double precision, so more precise than
KissFFT, and faster on typical 64-bit desktop and modern mobile
- hardware. Slower than IPP, vDSP, and FFTW3.
+ hardware. Slower than IPP, vDSP, SLEEF, and FFTW3.
Requires the bqvec library.
@@ 69,5 72,5 @@ C++ standard required: C++98 (does not u
[![Build status](https://builds.sr.ht/~breakfastquay/bqfft.svg)](https://builds.sr.ht/~breakfastquay/bqfft?)
-Copyright 2007-2021 Particular Programs Ltd. See the file COPYING for
+Copyright 2007-2022 Particular Programs Ltd. See the file COPYING for
(BSD/MIT-style) licence terms.
@@ 63,6 63,13 @@
#include <fftw3.h>
#endif
+#ifdef HAVE_SLEEF
+extern "C" {
+#include <sleef.h>
+#include <sleefdft.h>
+}
+#endif
+
#ifdef HAVE_VDSP
#include <Accelerate/Accelerate.h>
#endif
@@ 73,6 80,7 @@
#ifndef HAVE_IPP
#ifndef HAVE_FFTW3
+#ifndef HAVE_SLEEF
#ifndef HAVE_KISSFFT
#ifndef USE_BUILTIN_FFT
#ifndef HAVE_VDSP
@@ 82,6 90,7 @@
#endif
#endif
#endif
+#endif
#include <cmath>
#include <iostream>
@@ 1437,6 1446,302 @@ pthread_mutex_t D_FFTW::m_commonMutex =
#endif /* HAVE_FFTW3 */
+#ifdef HAVE_SLEEF
+
+class D_SLEEF : public FFTImpl
+{
+ bool isAligned(const void *ptr) {
+ return ! ((uintptr_t)ptr & 63);
+ }
+
+public:
+ D_SLEEF(int size) :
+ m_fplanf(0), m_fplani(0), m_fbuf(0), m_fpacked(0),
+ m_dplanf(0), m_dplani(0), m_dbuf(0), m_dpacked(0),
+ m_size(size)
+ {
+ }
+
+ ~D_SLEEF() {
+ if (m_fplanf) {
+ SleefDFT_dispose(m_fplanf);
+ SleefDFT_dispose(m_fplani);
+ Sleef_free(m_fbuf);
+ Sleef_free(m_fpacked);
+ }
+ if (m_dplanf) {
+ SleefDFT_dispose(m_dplanf);
+ SleefDFT_dispose(m_dplani);
+ Sleef_free(m_dbuf);
+ Sleef_free(m_dpacked);
+ }
+ }
+
+ int getSize() const {
+ return m_size;
+ }
+
+ FFT::Precisions
+ getSupportedPrecisions() const {
+ return FFT::SinglePrecision | FFT::DoublePrecision;
+ }
+
+ void initFloat() {
+ if (m_fplanf) return;
+
+ m_fbuf = static_cast<float *>
+ (Sleef_malloc(m_size * sizeof(float)));
+ m_fpacked = static_cast<float *>
+ (Sleef_malloc((m_size + 2) * sizeof(float)));
+
+ m_fplanf = SleefDFT_float_init1d
+ (m_size, m_fbuf, m_fpacked,
+ SLEEF_MODE_FORWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE);
+
+ m_fplani = SleefDFT_float_init1d
+ (m_size, m_fpacked, m_fbuf,
+ SLEEF_MODE_BACKWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE);
+ }
+
+ void initDouble() {
+ if (m_dplanf) return;
+
+ m_dbuf = static_cast<double *>
+ (Sleef_malloc(m_size * sizeof(double)));
+ m_dpacked = static_cast<double *>
+ (Sleef_malloc((m_size + 2) * sizeof(double)));
+
+ m_dplanf = SleefDFT_double_init1d
+ (m_size, m_dbuf, m_dpacked,
+ SLEEF_MODE_FORWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE);
+
+ m_dplani = SleefDFT_double_init1d
+ (m_size, m_dpacked, m_dbuf,
+ SLEEF_MODE_BACKWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE);
+ }
+
+ void packFloat(const float *BQ_R__ re, const float *BQ_R__ im) {
+ const float *src[2] = { re, im };
+ v_interleave(m_fpacked, src, 2, m_size/2 + 1);
+ }
+
+ void packDouble(const double *BQ_R__ re, const double *BQ_R__ im) {
+ const double *src[2] = { re, im };
+ v_interleave(m_dpacked, src, 2, m_size/2 + 1);
+ }
+
+ void unpackFloat(float *BQ_R__ re, float *BQ_R__ im) {
+ float *dst[2] = { re, im };
+ v_deinterleave(dst, m_fpacked, 2, m_size/2 + 1);
+ }
+
+ void unpackDouble(double *BQ_R__ re, double *BQ_R__ im) {
+ double *dst[2] = { re, im };
+ v_deinterleave(dst, m_dpacked, 2, m_size/2 + 1);
+ }
+
+ void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) {
+ if (!m_dplanf) initDouble();
+ if (isAligned(realIn)) {
+ SleefDFT_double_execute(m_dplanf, realIn, 0);
+ } else {
+ v_copy(m_dbuf, realIn, m_size);
+ SleefDFT_double_execute(m_dplanf, 0, 0);
+ }
+ unpackDouble(realOut, imagOut);
+ }
+
+ void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) {
+ if (!m_dplanf) initDouble();
+ if (isAligned(realIn) && isAligned(complexOut)) {
+ SleefDFT_double_execute(m_dplanf, realIn, complexOut);
+ } else {
+ v_copy(m_dbuf, realIn, m_size);
+ SleefDFT_double_execute(m_dplanf, 0, 0);
+ v_copy(complexOut, m_dpacked, m_size + 2);
+ }
+ }
+
+ void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) {
+ if (!m_dplanf) initDouble();
+ if (isAligned(realIn)) {
+ SleefDFT_double_execute(m_dplanf, realIn, 0);
+ } else {
+ v_copy(m_dbuf, realIn, m_size);
+ SleefDFT_double_execute(m_dplanf, 0, 0);
+ }
+ v_cartesian_interleaved_to_polar(magOut, phaseOut, m_dpacked, m_size/2+1);
+ }
+
+ void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) {
+ if (!m_dplanf) initDouble();
+ if (isAligned(realIn)) {
+ SleefDFT_double_execute(m_dplanf, realIn, 0);
+ } else {
+ v_copy(m_dbuf, realIn, m_size);
+ SleefDFT_double_execute(m_dplanf, 0, 0);
+ }
+ v_cartesian_interleaved_to_magnitudes(magOut, m_dpacked, m_size/2+1);
+ }
+
+ void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) {
+ if (!m_fplanf) initFloat();
+ if (isAligned(realIn)) {
+ SleefDFT_float_execute(m_fplanf, realIn, 0);
+ } else {
+ v_copy(m_fbuf, realIn, m_size);
+ SleefDFT_float_execute(m_fplanf, 0, 0);
+ }
+ unpackFloat(realOut, imagOut);
+ }
+
+ void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) {
+ if (!m_fplanf) initFloat();
+ if (isAligned(realIn) && isAligned(complexOut)) {
+ SleefDFT_float_execute(m_fplanf, realIn, complexOut);
+ } else {
+ v_copy(m_fbuf, realIn, m_size);
+ SleefDFT_float_execute(m_fplanf, 0, 0);
+ v_copy(complexOut, m_fpacked, m_size + 2);
+ }
+ }
+
+ void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) {
+ if (!m_fplanf) initFloat();
+ if (isAligned(realIn)) {
+ SleefDFT_float_execute(m_fplanf, realIn, 0);
+ } else {
+ v_copy(m_fbuf, realIn, m_size);
+ SleefDFT_float_execute(m_fplanf, 0, 0);
+ }
+ v_cartesian_interleaved_to_polar(magOut, phaseOut, m_fpacked, m_size/2+1);
+ }
+
+ void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) {
+ if (!m_fplanf) initFloat();
+ if (isAligned(realIn)) {
+ SleefDFT_float_execute(m_fplanf, realIn, 0);
+ } else {
+ v_copy(m_fbuf, realIn, m_size);
+ SleefDFT_float_execute(m_fplanf, 0, 0);
+ }
+ v_cartesian_interleaved_to_magnitudes(magOut, m_fpacked, m_size/2+1);
+ }
+
+ void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) {
+ if (!m_dplanf) initDouble();
+ packDouble(realIn, imagIn);
+ if (isAligned(realOut)) {
+ SleefDFT_double_execute(m_dplani, 0, realOut);
+ } else {
+ SleefDFT_double_execute(m_dplani, 0, 0);
+ v_copy(realOut, m_dbuf, m_size);
+ }
+ }
+
+ void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) {
+ if (!m_dplanf) initDouble();
+ if (isAligned(complexIn) && isAligned(realOut)) {
+ SleefDFT_double_execute(m_dplani, complexIn, realOut);
+ } else {
+ v_copy(m_dpacked, complexIn, m_size + 2);
+ SleefDFT_double_execute(m_dplani, 0, 0);
+ v_copy(realOut, m_dbuf, m_size);
+ }
+ }
+
+ void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) {
+ if (!m_dplanf) initDouble();
+ v_polar_to_cartesian_interleaved(m_dpacked, magIn, phaseIn, m_size/2+1);
+ if (isAligned(realOut)) {
+ SleefDFT_double_execute(m_dplani, 0, realOut);
+ } else {
+ SleefDFT_double_execute(m_dplani, 0, 0);
+ v_copy(realOut, m_dbuf, m_size);
+ }
+ }
+
+ void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) {
+ if (!m_dplanf) initDouble();
+ const int hs = m_size/2;
+ for (int i = 0; i <= hs; ++i) {
+ m_dpacked[i*2] = log(magIn[i] + 0.000001);
+ m_dpacked[i*2+1] = 0.0;
+ }
+ if (isAligned(cepOut)) {
+ SleefDFT_double_execute(m_dplani, 0, cepOut);
+ } else {
+ SleefDFT_double_execute(m_dplani, 0, 0);
+ v_copy(cepOut, m_dbuf, m_size);
+ }
+ }
+
+ void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) {
+ if (!m_fplanf) initFloat();
+ packFloat(realIn, imagIn);
+ if (isAligned(realOut)) {
+ SleefDFT_float_execute(m_dplani, 0, realOut);
+ } else {
+ SleefDFT_float_execute(m_fplani, 0, 0);
+ v_copy(realOut, m_fbuf, m_size);
+ }
+ }
+
+ void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) {
+ if (!m_fplanf) initFloat();
+ if (isAligned(complexIn) && isAligned(realOut)) {
+ SleefDFT_float_execute(m_fplani, complexIn, realOut);
+ } else {
+ v_copy(m_fpacked, complexIn, m_size + 2);
+ SleefDFT_float_execute(m_fplani, 0, 0);
+ v_copy(realOut, m_fbuf, m_size);
+ }
+ }
+
+ void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) {
+ if (!m_fplanf) initFloat();
+ v_polar_to_cartesian_interleaved(m_fpacked, magIn, phaseIn, m_size/2+1);
+ if (isAligned(realOut)) {
+ SleefDFT_float_execute(m_fplani, 0, realOut);
+ } else {
+ SleefDFT_float_execute(m_fplani, 0, 0);
+ v_copy(realOut, m_fbuf, m_size);
+ }
+ }
+
+ void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) {
+ if (!m_fplanf) initFloat();
+ const int hs = m_size/2;
+ for (int i = 0; i <= hs; ++i) {
+ m_fpacked[i*2] = logf(magIn[i] + 0.000001f);
+ m_fpacked[i*2+1] = 0.0;
+ }
+ if (isAligned(cepOut)) {
+ SleefDFT_float_execute(m_fplani, 0, cepOut);
+ } else {
+ SleefDFT_float_execute(m_fplani, 0, 0);
+ v_copy(cepOut, m_fbuf, m_size);
+ }
+ }
+
+private:
+ SleefDFT *m_fplanf;
+ SleefDFT *m_fplani;
+
+ float *m_fbuf;
+ float *m_fpacked;
+
+ SleefDFT *m_dplanf;
+ SleefDFT *m_dplani;
+
+ double *m_dbuf;
+ double *m_dpacked;
+
+ const int m_size;
+};
+
+#endif /* HAVE_SLEEF */
+
#ifdef HAVE_KISSFFT
class D_KISSFFT : public FFTImpl
@@ 2278,6 2583,9 @@ getImplementationDetails()
#ifdef HAVE_FFTW3
impls["fftw"] = SizeConstraintNone;
#endif
+#ifdef HAVE_SLEEF
+ impls["sleef"] = SizeConstraintEvenPowerOfTwo;
+#endif
#ifdef HAVE_KISSFFT
impls["kissfft"] = SizeConstraintEven;
#endif
@@ 2322,7 2630,7 @@ pickImplementation(int size)
}
std::string preference[] = {
- "ipp", "vdsp", "fftw", "builtin", "kissfft"
+ "ipp", "vdsp", "sleef", "fftw", "builtin", "kissfft"
};
for (int i = 0; i < int(sizeof(preference)/sizeof(preference[0])); ++i) {
@@ 2403,6 2711,10 @@ FFT::FFT(int size, int debugLevel) :
#ifdef HAVE_FFTW3
d = new FFTs::D_FFTW(size);
#endif
+ } else if (impl == "sleef") {
+#ifdef HAVE_SLEEF
+ d = new FFTs::D_SLEEF(size);
+#endif
} else if (impl == "kissfft") {
#ifdef HAVE_KISSFFT
d = new FFTs::D_KISSFFT(size);
@@ 2661,6 2973,14 @@ FFT::tune()
d->initDouble();
candidates["fftw"] = d;
#endif
+
+#ifdef HAVE_SLEEF
+ os << "Constructing new SLEEF FFT object for size " << size << "..." << std::endl;
+ d = new FFTs::D_SLEEF(size);
+ d->initFloat();
+ d->initDouble();
+ candidates["sleef"] = d;
+#endif
#ifdef HAVE_KISSFFT
os << "Constructing new KISSFFT object for size " << size << "..." << std::endl;
@@ 2705,19 3025,21 @@ FFT::tune()
int iterations = 500;
os << "Iterations: " << iterations << std::endl;
- double *da = new double[size];
- double *db = new double[size];
- double *dc = new double[size];
- double *dd = new double[size];
- double *di = new double[size + 2];
- double *dj = new double[size + 2];
-
- float *fa = new float[size];
- float *fb = new float[size];
- float *fc = new float[size];
- float *fd = new float[size];
- float *fi = new float[size + 2];
- float *fj = new float[size + 2];
+ double *da = allocate_and_zero<double>(size);
+ double *db = allocate_and_zero<double>(size);
+ double *dc = allocate_and_zero<double>(size);
+ double *dd = allocate_and_zero<double>(size);
+
+ double *di = allocate_and_zero<double>(size + 2);
+ double *dj = allocate_and_zero<double>(size + 2);
+
+ float *fa = allocate_and_zero<float>(size);
+ float *fb = allocate_and_zero<float>(size);
+ float *fc = allocate_and_zero<float>(size);
+ float *fd = allocate_and_zero<float>(size);
+
+ float *fi = allocate_and_zero<float>(size + 2);
+ float *fj = allocate_and_zero<float>(size + 2);
for (int type = 0; type < 16; ++type) {
@@ 2846,15 3168,22 @@ FFT::tune()
wins[low]++;
}
+
+ deallocate(da);
+ deallocate(db);
+ deallocate(dc);
+ deallocate(dd);
+
+ deallocate(di);
+ deallocate(dj);
- delete[] fa;
- delete[] fb;
- delete[] fc;
- delete[] fd;
- delete[] da;
- delete[] db;
- delete[] dc;
- delete[] dd;
+ deallocate(fa);
+ deallocate(fb);
+ deallocate(fc);
+ deallocate(fd);
+
+ deallocate(fi);
+ deallocate(fj);
}
while (!candidates.empty()) {