5b4821916a23 — Chris Cannam 9 months ago
Add some basic SLEEF support - I don't think any of this is useful as it stands though as it takes no advantage of the actual vector functions yet
4 files changed, 106 insertions(+), 2 deletions(-)

M bqvec/Allocators.h
M bqvec/VectorOps.h
M bqvec/VectorOpsComplex.h
A => build/Makefile.linux.sleef
M bqvec/Allocators.h +19 -2
@@ 89,6 89,12 @@ 
 #include <errno.h>
 #endif
 
+#ifdef HAVE_SLEEF
+extern "C" {
+#include <sleef.h>
+}
+#endif
+
 #ifndef NO_EXCEPTIONS
 #ifdef LACK_BAD_ALLOC
 namespace std {

          
@@ 111,6 117,11 @@ T *allocate(size_t count)
     // allocate<float> and allocate<double> below, where we're more
     // likely to get away with it.
 
+    // The SLEEF allocator does accept a size_t however
+#ifdef HAVE_SLEEF
+    ptr = Sleef_malloc(count * sizeof(T));
+#else /* !HAVE_SLEEF */
+    
 #ifdef MALLOC_IS_ALIGNED
     ptr = malloc(count * sizeof(T));
 #else /* !MALLOC_IS_ALIGNED */

          
@@ 158,12 169,13 @@ T *allocate(size_t count)
 
 #else /* !USE_OWN_ALIGNED_MALLOC */
 
-#error "No aligned malloc available: define MALLOC_IS_ALIGNED to use system malloc, HAVE_POSIX_MEMALIGN if posix_memalign is available, HAVE__ALIGNED_MALLOC if _aligned_malloc is available, or USE_OWN_ALIGNED_MALLOC to roll our own"
+#error "No aligned malloc available: define MALLOC_IS_ALIGNED to use system malloc, HAVE_POSIX_MEMALIGN if posix_memalign is available, HAVE__ALIGNED_MALLOC if _aligned_malloc is available, HAVE_SLEEF if using the SLEEF library, or USE_OWN_ALIGNED_MALLOC to roll our own"
 
 #endif /* !USE_OWN_ALIGNED_MALLOC */
 #endif /* !HAVE_POSIX_MEMALIGN */
 #endif /* !HAVE__ALIGNED_MALLOC */
 #endif /* !MALLOC_IS_ALIGNED */
+#endif /* !HAVE_SLEEF */
 
     if (!ptr) {
 #ifndef NO_EXCEPTIONS

          
@@ 204,6 216,10 @@ template <typename T>
 void deallocate(T *ptr)
 {
     if (!ptr) return;
+
+#ifdef HAVE_SLEEF
+    Sleef_free((void *)ptr);
+#else /* !HAVE_SLEEF */
     
 #ifdef MALLOC_IS_ALIGNED
     free((void *)ptr);

          
@@ 221,12 237,13 @@ void deallocate(T *ptr)
     free(((void **)ptr)[-1]);
 #else /* !USE_OWN_ALIGNED_MALLOC */
 
-#error "No aligned malloc available: define MALLOC_IS_ALIGNED to use system malloc, HAVE_POSIX_MEMALIGN if posix_memalign is available, or USE_OWN_ALIGNED_MALLOC to roll our own"
+#error "No aligned malloc available: define MALLOC_IS_ALIGNED to use system malloc, HAVE_POSIX_MEMALIGN if posix_memalign is available, HAVE__ALIGNED_MALLOC if _aligned_malloc is available, HAVE_SLEEF if using the SLEEF library, or USE_OWN_ALIGNED_MALLOC to roll our own"
 
 #endif /* !USE_OWN_ALIGNED_MALLOC */
 #endif /* !HAVE_POSIX_MEMALIGN */
 #endif /* !HAVE__ALIGNED_MALLOC */
 #endif /* !MALLOC_IS_ALIGNED */
+#endif /* !HAVE_SLEEF */
 }
 
 #ifdef HAVE_IPP

          
M bqvec/VectorOps.h +57 -0
@@ 53,6 53,12 @@ 
 #include <alloca.h>
 #endif
 
+#ifdef HAVE_SLEEF
+extern "C" {
+#include <sleef.h>
+}
+#endif
+
 #include <cstring>
 #include <cmath>
 

          
@@ 886,6 892,23 @@ inline void v_log(double *const BQ_R__ s
 {
     ippsLn_64f_I(srcdst, count);
 }
+#elif defined HAVE_SLEEF
+template<>
+inline void v_log(float *const BQ_R__ srcdst,
+                  const int count)
+{
+    for (int i = 0; i < count; ++i) {
+        srcdst[i] = Sleef_logf_u10(srcdst[i]);
+    }
+}
+template<>
+inline void v_log(double *const BQ_R__ srcdst,
+                  const int count)
+{
+    for (int i = 0; i < count; ++i) {
+        srcdst[i] = Sleef_log_u10(srcdst[i]);
+    }
+}
 #elif defined HAVE_VDSP
 // no in-place vForce functions for these -- can we use the
 // out-of-place functions with equal input and output vectors? can we

          
@@ 937,6 960,23 @@ inline void v_exp(double *const BQ_R__ s
 {
     ippsExp_64f_I(srcdst, count);
 }
+#elif defined HAVE_SLEEF
+template<>
+inline void v_exp(float *const BQ_R__ srcdst,
+                  const int count)
+{
+    for (int i = 0; i < count; ++i) {
+        srcdst[i] = Sleef_expf_u10(srcdst[i]);
+    }
+}
+template<>
+inline void v_exp(double *const BQ_R__ srcdst,
+                  const int count)
+{
+    for (int i = 0; i < count; ++i) {
+        srcdst[i] = Sleef_exp_u10(srcdst[i]);
+    }
+}
 #elif defined HAVE_VDSP
 // no in-place vForce functions for these -- can we use the
 // out-of-place functions with equal input and output vectors? can we

          
@@ 988,6 1028,23 @@ inline void v_sqrt(double *const BQ_R__ 
 {
     ippsSqrt_64f_I(srcdst, count);
 }
+#elif defined HAVE_SLEEF
+template<>
+inline void v_sqrt(float *const BQ_R__ srcdst,
+                  const int count)
+{
+    for (int i = 0; i < count; ++i) {
+        srcdst[i] = Sleef_sqrtf_u05(srcdst[i]);
+    }
+}
+template<>
+inline void v_sqrt(double *const BQ_R__ srcdst,
+                  const int count)
+{
+    for (int i = 0; i < count; ++i) {
+        srcdst[i] = Sleef_sqrt_u05(srcdst[i]);
+    }
+}
 #elif defined HAVE_VDSP
 // no in-place vForce functions for these -- can we use the
 // out-of-place functions with equal input and output vectors? can we

          
M bqvec/VectorOpsComplex.h +20 -0
@@ 422,6 422,16 @@ inline void c_phasor(T *real, T *imag, T
     } else {
         vvsincos((double *)imag, (double *)real, (const double *)&phase, &one);
     }
+#elif defined HAVE_SLEEF
+    if (sizeof(T) == sizeof(float)) {
+        Sleef_float2 out = Sleef_sincosf_u10(float(phase));
+        *imag = out.x;
+        *real = out.y;
+    } else {
+        Sleef_double2 out = Sleef_sincos_u10(double(phase));
+        *imag = out.x;
+        *real = out.y;
+    }
 #elif defined LACK_SINCOS
     if (sizeof(T) == sizeof(float)) {
         *real = cosf(phase);

          
@@ 454,8 464,18 @@ inline void c_phasor(T *real, T *imag, T
 template<typename T>
 inline void c_magphase(T *mag, T *phase, T real, T imag)
 {
+#if defined HAVE_SLEEF
+    if (sizeof(T) == sizeof(float)) {
+        *mag = Sleef_sqrtf_u05(real * real + imag * imag);
+        *phase = Sleef_atan2f_u10(imag, real);
+    } else {
+        *mag = Sleef_sqrt_u35(real * real + imag * imag);
+        *phase = Sleef_atan2_u35(imag, real);
+    }
+#else
     *mag = sqrt(real * real + imag * imag);
     *phase = atan2(imag, real);
+#endif
 }
 
 #if defined USE_APPROXIMATE_ATAN2

          
A => build/Makefile.linux.sleef +10 -0
@@ 0,0 1,10 @@ 
+
+VECTOR_DEFINES		:= -DHAVE_SLEEF
+
+ALLOCATOR_DEFINES 	:= -DHAVE_POSIX_MEMALIGN
+
+THIRD_PARTY_INCLUDES	:= -I/usr/local/include
+THIRD_PARTY_LIBS	:= -L/usr/local/lib -lsleef
+
+include build/Makefile.inc
+