M Makefile +2 -3
@@ 6,17 6,16 @@
#
# -DHAVE_IPP Intel's Integrated Performance Primitives are available
# -DHAVE_VDSP Apple's Accelerate framework is available
+# -DHAVE_SLEEF The SLEEF Vectorized Math Library is available
#
# The above are optional (they affect performance, not function) and
# you may define more than one of them.
#
-# The following two options trade off speed against precision for single-
+# The following option trades off speed against precision for single-
# precision paths in cases where IPP and VDSP are not available:
#
# -DUSE_POMMIER_MATHFUN Use Julien Pommier's SSE/NEON implementation
# of sincos in 32-bit polar-to-cartesian conversion
-# -DUSE_APPROXIMATE_ATAN2 Use a quick but *very* approximate atan2
-# function in 32-bit cartesian-to-polar conversion
#
# And a handful of miscellaneous flags:
#
M README.md +1 -1
@@ 33,6 33,6 @@ C++ standard required: C++98 (does not u
[](https://builds.sr.ht/~breakfastquay/bqvec?)
-Copyright 2007-2021 Particular Programs Ltd. See the file COPYING for
+Copyright 2007-2022 Particular Programs Ltd. See the file COPYING for
(BSD/MIT-style) licence terms.
M bqvec/VectorOpsComplex.h +1 -13
@@ 6,7 6,7 @@
A small library for vector arithmetic and allocation in C++ using
raw C pointer arrays.
- Copyright 2007-2021 Particular Programs Ltd.
+ Copyright 2007-2022 Particular Programs Ltd.
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
@@ 460,24 460,12 @@ inline void c_magphase(T *mag, T *phase,
*phase = atan2(imag, real);
}
-#if defined USE_APPROXIMATE_ATAN2
-// NB arguments in opposite order from usual for atan2f
-extern float approximate_atan2f(float real, float imag);
-template<>
-inline void c_magphase(float *mag, float *phase, float real, float imag)
-{
- float atan = approximate_atan2f(real, imag);
- *phase = atan;
- *mag = sqrtf(real * real + imag * imag);
-}
-#else // !USE_APPROXIMATE_ATAN2
template<>
inline void c_magphase(float *mag, float *phase, float real, float imag)
{
*mag = sqrtf(real * real + imag * imag);
*phase = atan2f(imag, real);
}
-#endif
template<typename S, typename T> // S source, T target
void v_polar_to_cartesian(T *const BQ_R__ real,
M build/Makefile.linux.min +1 -1
@@ 3,7 3,7 @@
# small device without library support. It's mainly here as another CI
# test target that is quite different from the default configuration
-VECTOR_DEFINES := -DNO_EXCEPTIONS -DUSE_SINGLE_PRECISION_COMPLEX -DUSE_POMMIER_MATHFUN -DUSE_APPROXIMATE_ATAN2
+VECTOR_DEFINES := -DNO_EXCEPTIONS -DUSE_SINGLE_PRECISION_COMPLEX -DUSE_POMMIER_MATHFUN
ALLOCATOR_DEFINES := -DUSE_OWN_ALIGNED_MALLOC -DLACK_POSIX_MEMALIGN -DMALLOC_IS_NOT_ALIGNED
M src/VectorOpsComplex.cpp +27 -53
@@ 6,7 6,7 @@
A small library for vector arithmetic and allocation in C++ using
raw C pointer arrays.
- Copyright 2007-2021 Particular Programs Ltd.
+ Copyright 2007-2022 Particular Programs Ltd.
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
@@ 58,40 58,6 @@ using namespace std;
namespace breakfastquay {
-#ifdef USE_APPROXIMATE_ATAN2
-float approximate_atan2f(float real, float imag)
-{
- static const float pi = M_PI;
- static const float pi2 = M_PI / 2;
-
- float atan;
-
- if (real == 0.f) {
-
- if (imag > 0.0f) atan = pi2;
- else if (imag == 0.0f) atan = 0.0f;
- else atan = -pi2;
-
- } else {
-
- float z = imag/real;
-
- if (fabsf(z) < 1.f) {
- atan = z / (1.f + 0.28f * z * z);
- if (real < 0.f) {
- if (imag < 0.f) atan -= pi;
- else atan += pi;
- }
- } else {
- atan = pi2 - z / (z * z + 0.28f);
- if (imag < 0.f) atan -= pi;
- }
- }
-
- return atan;
-}
-#endif
-
#if defined USE_POMMIER_MATHFUN
#ifdef __ARMEL__
@@ 221,34 187,42 @@ v_polar_to_cartesian_interleaved_pommier
#if defined USE_POMMIER_MATHFUN
-//!!! further tests reqd. This is only single precision but it seems
-//!!! to be much faster than normal math library sincos. The comments
-//!!! note that precision suffers for high arguments to sincos though,
-//!!! and that is probably a common case for us
-
void
v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
const bq_complex_element_t *const BQ_R__ src,
const int count)
{
- int idx = 0, tidx = 0;
+ if (sizeof(bq_complex_element_t) == sizeof(float)) {
+
+ int idx = 0, tidx = 0;
+
+ for (int i = 0; i < count; i += 4) {
- for (int i = 0; i < count; i += 4) {
+ V4SF fmag, fphase, fre, fim;
- V4SF fmag, fphase, fre, fim;
+ for (int j = 0; j < 3; ++j) {
+ fmag.f[j] = src[idx++];
+ fphase.f[j] = src[idx++];
+ }
+
+ sincos_ps(fphase.v, &fim.v, &fre.v);
- for (int j = 0; j < 3; ++j) {
- fmag.f[j] = src[idx++];
- fphase.f[j] = src[idx++];
+ for (int j = 0; j < 3; ++j) {
+ dst[tidx].re = fre.f[j] * fmag.f[j];
+ dst[tidx++].im = fim.f[j] * fmag.f[j];
+ }
}
-
- sincos_ps(fphase.v, &fim.v, &fre.v);
-
- for (int j = 0; j < 3; ++j) {
- dst[tidx].re = fre.f[j] * fmag.f[j];
- dst[tidx++].im = fim.f[j] * fmag.f[j];
+ } else {
+ bq_complex_element_t mag, phase;
+ int idx = 0;
+ for (int i = 0; i < count; ++i) {
+ mag = src[idx++];
+ phase = src[idx++];
+ dst[i] = c_phasor(phase);
+ dst[i].re *= mag;
+ dst[i].im *= mag;
}
- }
+ }
}
#elif (defined HAVE_IPP || defined HAVE_VDSP)
M test/TestVectorOpsComplex.cpp +0 -5
@@ 18,10 18,6 @@ using namespace std;
BOOST_AUTO_TEST_SUITE(TestVectorOpsComplex)
-#ifdef USE_APPROXIMATE_ATAN2
-static const double eps = 5.0e-3;
-static const double eps_approx = eps;
-#else
#ifdef USE_SINGLE_PRECISION_COMPLEX
static const double eps = 1.0e-7;
static const double eps_approx = 1.0e-5;
@@ 29,7 25,6 @@ static const double eps_approx = 1.0e-5;
static const double eps = 1.0e-14;
static const double eps_approx = 1.0e-8;
#endif
-#endif
#define COMPARE_N(a, b, n) \
for (int cmp_i = 0; cmp_i < n; ++cmp_i) { \