M bqvec/Allocators.h +19 -2
@@ 89,6 89,12 @@
#include <errno.h>
#endif
+#ifdef HAVE_SLEEF
+extern "C" {
+#include <sleef.h>
+}
+#endif
+
#ifndef NO_EXCEPTIONS
#ifdef LACK_BAD_ALLOC
namespace std {
@@ 111,6 117,11 @@ T *allocate(size_t count)
// allocate<float> and allocate<double> below, where we're more
// likely to get away with it.
+ // The SLEEF allocator does accept a size_t however
+#ifdef HAVE_SLEEF
+ ptr = Sleef_malloc(count * sizeof(T));
+#else /* !HAVE_SLEEF */
+
#ifdef MALLOC_IS_ALIGNED
ptr = malloc(count * sizeof(T));
#else /* !MALLOC_IS_ALIGNED */
@@ 158,12 169,13 @@ T *allocate(size_t count)
#else /* !USE_OWN_ALIGNED_MALLOC */
-#error "No aligned malloc available: define MALLOC_IS_ALIGNED to use system malloc, HAVE_POSIX_MEMALIGN if posix_memalign is available, HAVE__ALIGNED_MALLOC if _aligned_malloc is available, or USE_OWN_ALIGNED_MALLOC to roll our own"
+#error "No aligned malloc available: define MALLOC_IS_ALIGNED to use system malloc, HAVE_POSIX_MEMALIGN if posix_memalign is available, HAVE__ALIGNED_MALLOC if _aligned_malloc is available, HAVE_SLEEF if using the SLEEF library, or USE_OWN_ALIGNED_MALLOC to roll our own"
#endif /* !USE_OWN_ALIGNED_MALLOC */
#endif /* !HAVE_POSIX_MEMALIGN */
#endif /* !HAVE__ALIGNED_MALLOC */
#endif /* !MALLOC_IS_ALIGNED */
+#endif /* !HAVE_SLEEF */
if (!ptr) {
#ifndef NO_EXCEPTIONS
@@ 204,6 216,10 @@ template <typename T>
void deallocate(T *ptr)
{
if (!ptr) return;
+
+#ifdef HAVE_SLEEF
+ Sleef_free((void *)ptr);
+#else /* !HAVE_SLEEF */
#ifdef MALLOC_IS_ALIGNED
free((void *)ptr);
@@ 221,12 237,13 @@ void deallocate(T *ptr)
free(((void **)ptr)[-1]);
#else /* !USE_OWN_ALIGNED_MALLOC */
-#error "No aligned malloc available: define MALLOC_IS_ALIGNED to use system malloc, HAVE_POSIX_MEMALIGN if posix_memalign is available, or USE_OWN_ALIGNED_MALLOC to roll our own"
+#error "No aligned malloc available: define MALLOC_IS_ALIGNED to use system malloc, HAVE_POSIX_MEMALIGN if posix_memalign is available, HAVE__ALIGNED_MALLOC if _aligned_malloc is available, HAVE_SLEEF if using the SLEEF library, or USE_OWN_ALIGNED_MALLOC to roll our own"
#endif /* !USE_OWN_ALIGNED_MALLOC */
#endif /* !HAVE_POSIX_MEMALIGN */
#endif /* !HAVE__ALIGNED_MALLOC */
#endif /* !MALLOC_IS_ALIGNED */
+#endif /* !HAVE_SLEEF */
}
#ifdef HAVE_IPP
M bqvec/VectorOps.h +57 -0
@@ 53,6 53,12 @@
#include <alloca.h>
#endif
+#ifdef HAVE_SLEEF
+extern "C" {
+#include <sleef.h>
+}
+#endif
+
#include <cstring>
#include <cmath>
@@ 886,6 892,23 @@ inline void v_log(double *const BQ_R__ s
{
ippsLn_64f_I(srcdst, count);
}
+#elif defined HAVE_SLEEF
+template<>
+inline void v_log(float *const BQ_R__ srcdst,
+ const int count)
+{
+ for (int i = 0; i < count; ++i) {
+ srcdst[i] = Sleef_logf_u10(srcdst[i]);
+ }
+}
+template<>
+inline void v_log(double *const BQ_R__ srcdst,
+ const int count)
+{
+ for (int i = 0; i < count; ++i) {
+ srcdst[i] = Sleef_log_u10(srcdst[i]);
+ }
+}
#elif defined HAVE_VDSP
// no in-place vForce functions for these -- can we use the
// out-of-place functions with equal input and output vectors? can we
@@ 937,6 960,23 @@ inline void v_exp(double *const BQ_R__ s
{
ippsExp_64f_I(srcdst, count);
}
+#elif defined HAVE_SLEEF
+template<>
+inline void v_exp(float *const BQ_R__ srcdst,
+ const int count)
+{
+ for (int i = 0; i < count; ++i) {
+ srcdst[i] = Sleef_expf_u10(srcdst[i]);
+ }
+}
+template<>
+inline void v_exp(double *const BQ_R__ srcdst,
+ const int count)
+{
+ for (int i = 0; i < count; ++i) {
+ srcdst[i] = Sleef_exp_u10(srcdst[i]);
+ }
+}
#elif defined HAVE_VDSP
// no in-place vForce functions for these -- can we use the
// out-of-place functions with equal input and output vectors? can we
@@ 988,6 1028,23 @@ inline void v_sqrt(double *const BQ_R__
{
ippsSqrt_64f_I(srcdst, count);
}
+#elif defined HAVE_SLEEF
+template<>
+inline void v_sqrt(float *const BQ_R__ srcdst,
+ const int count)
+{
+ for (int i = 0; i < count; ++i) {
+ srcdst[i] = Sleef_sqrtf_u05(srcdst[i]);
+ }
+}
+template<>
+inline void v_sqrt(double *const BQ_R__ srcdst,
+ const int count)
+{
+ for (int i = 0; i < count; ++i) {
+ srcdst[i] = Sleef_sqrt_u05(srcdst[i]);
+ }
+}
#elif defined HAVE_VDSP
// no in-place vForce functions for these -- can we use the
// out-of-place functions with equal input and output vectors? can we
M bqvec/VectorOpsComplex.h +20 -0
@@ 422,6 422,16 @@ inline void c_phasor(T *real, T *imag, T
} else {
vvsincos((double *)imag, (double *)real, (const double *)&phase, &one);
}
+#elif defined HAVE_SLEEF
+ if (sizeof(T) == sizeof(float)) {
+ Sleef_float2 out = Sleef_sincosf_u10(float(phase));
+ *imag = out.x;
+ *real = out.y;
+ } else {
+ Sleef_double2 out = Sleef_sincos_u10(double(phase));
+ *imag = out.x;
+ *real = out.y;
+ }
#elif defined LACK_SINCOS
if (sizeof(T) == sizeof(float)) {
*real = cosf(phase);
@@ 454,8 464,18 @@ inline void c_phasor(T *real, T *imag, T
template<typename T>
inline void c_magphase(T *mag, T *phase, T real, T imag)
{
+#if defined HAVE_SLEEF
+ if (sizeof(T) == sizeof(float)) {
+ *mag = Sleef_sqrtf_u05(real * real + imag * imag);
+ *phase = Sleef_atan2f_u10(imag, real);
+ } else {
+ *mag = Sleef_sqrt_u35(real * real + imag * imag);
+ *phase = Sleef_atan2_u35(imag, real);
+ }
+#else
*mag = sqrt(real * real + imag * imag);
*phase = atan2(imag, real);
+#endif
}
#if defined USE_APPROXIMATE_ATAN2
A => build/Makefile.linux.sleef +10 -0
@@ 0,0 1,10 @@
+
+VECTOR_DEFINES := -DHAVE_SLEEF
+
+ALLOCATOR_DEFINES := -DHAVE_POSIX_MEMALIGN
+
+THIRD_PARTY_INCLUDES := -I/usr/local/include
+THIRD_PARTY_LIBS := -L/usr/local/lib -lsleef
+
+include build/Makefile.inc
+