23ac0b35b085 — Chris Cannam a month ago
Tidy, and remove the very approximate atan2f
M Makefile +2 -3
@@ 6,17 6,16 @@ 
 #
 #  -DHAVE_IPP    Intel's Integrated Performance Primitives are available
 #  -DHAVE_VDSP   Apple's Accelerate framework is available
+#  -DHAVE_SLEEF  The SLEEF Vectorized Math Library is available
 #
 # The above are optional (they affect performance, not function) and
 # you may define more than one of them.
 #
-# The following two options trade off speed against precision for single-
+# The following option trades off speed against precision for single-
 # precision paths in cases where IPP and VDSP are not available:
 #
 #  -DUSE_POMMIER_MATHFUN Use Julien Pommier's SSE/NEON implementation
 #   of sincos in 32-bit polar-to-cartesian conversion
-#  -DUSE_APPROXIMATE_ATAN2 Use a quick but *very* approximate atan2
-#   function in 32-bit cartesian-to-polar conversion
 #
 # And a handful of miscellaneous flags:
 #

          
M README.md +1 -1
@@ 33,6 33,6 @@ C++ standard required: C++98 (does not u
 
 [![Build status](https://builds.sr.ht/~breakfastquay/bqvec.svg)](https://builds.sr.ht/~breakfastquay/bqvec?)
 
-Copyright 2007-2021 Particular Programs Ltd. See the file COPYING for
+Copyright 2007-2022 Particular Programs Ltd. See the file COPYING for
 (BSD/MIT-style) licence terms.
 

          
M bqvec/VectorOpsComplex.h +1 -13
@@ 6,7 6,7 @@ 
     A small library for vector arithmetic and allocation in C++ using
     raw C pointer arrays.
 
-    Copyright 2007-2021 Particular Programs Ltd.
+    Copyright 2007-2022 Particular Programs Ltd.
 
     Permission is hereby granted, free of charge, to any person
     obtaining a copy of this software and associated documentation

          
@@ 460,24 460,12 @@ inline void c_magphase(T *mag, T *phase,
     *phase = atan2(imag, real);
 }
 
-#if defined USE_APPROXIMATE_ATAN2
-// NB arguments in opposite order from usual for atan2f
-extern float approximate_atan2f(float real, float imag);
-template<>
-inline void c_magphase(float *mag, float *phase, float real, float imag)
-{
-    float atan = approximate_atan2f(real, imag);
-    *phase = atan;
-    *mag = sqrtf(real * real + imag * imag);
-}
-#else // !USE_APPROXIMATE_ATAN2
 template<>
 inline void c_magphase(float *mag, float *phase, float real, float imag)
 {
     *mag = sqrtf(real * real + imag * imag);
     *phase = atan2f(imag, real);
 }
-#endif
 
 template<typename S, typename T> // S source, T target
 void v_polar_to_cartesian(T *const BQ_R__ real,

          
M build/Makefile.linux.min +1 -1
@@ 3,7 3,7 @@ 
 # small device without library support. It's mainly here as another CI
 # test target that is quite different from the default configuration
 
-VECTOR_DEFINES		:= -DNO_EXCEPTIONS -DUSE_SINGLE_PRECISION_COMPLEX -DUSE_POMMIER_MATHFUN -DUSE_APPROXIMATE_ATAN2
+VECTOR_DEFINES		:= -DNO_EXCEPTIONS -DUSE_SINGLE_PRECISION_COMPLEX -DUSE_POMMIER_MATHFUN
 
 ALLOCATOR_DEFINES 	:= -DUSE_OWN_ALIGNED_MALLOC -DLACK_POSIX_MEMALIGN -DMALLOC_IS_NOT_ALIGNED
 

          
M src/VectorOpsComplex.cpp +27 -53
@@ 6,7 6,7 @@ 
     A small library for vector arithmetic and allocation in C++ using
     raw C pointer arrays.
 
-    Copyright 2007-2021 Particular Programs Ltd.
+    Copyright 2007-2022 Particular Programs Ltd.
 
     Permission is hereby granted, free of charge, to any person
     obtaining a copy of this software and associated documentation

          
@@ 58,40 58,6 @@ using namespace std;
 
 namespace breakfastquay {
 
-#ifdef USE_APPROXIMATE_ATAN2
-float approximate_atan2f(float real, float imag)
-{
-    static const float pi = M_PI;
-    static const float pi2 = M_PI / 2;
-
-    float atan;
-
-    if (real == 0.f) {
-
-        if (imag > 0.0f) atan = pi2;
-        else if (imag == 0.0f) atan = 0.0f;
-        else atan = -pi2;
-
-    } else {
-
-        float z = imag/real;
-
-        if (fabsf(z) < 1.f) {
-            atan = z / (1.f + 0.28f * z * z);
-            if (real < 0.f) {
-                if (imag < 0.f) atan -= pi;
-                else atan += pi;
-            }
-        } else {
-            atan = pi2 - z / (z * z + 0.28f);
-            if (imag < 0.f) atan -= pi;
-        }
-    }
-
-    return atan;
-}
-#endif
-
 #if defined USE_POMMIER_MATHFUN
 
 #ifdef __ARMEL__

          
@@ 221,34 187,42 @@ v_polar_to_cartesian_interleaved_pommier
 
 #if defined USE_POMMIER_MATHFUN
 
-//!!! further tests reqd.  This is only single precision but it seems
-//!!! to be much faster than normal math library sincos.  The comments
-//!!! note that precision suffers for high arguments to sincos though,
-//!!! and that is probably a common case for us
-
 void
 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
 				 const bq_complex_element_t *const BQ_R__ src,
 				 const int count)
 {
-    int idx = 0, tidx = 0;
+    if (sizeof(bq_complex_element_t) == sizeof(float)) {
+    
+        int idx = 0, tidx = 0;
+
+        for (int i = 0; i < count; i += 4) {
 
-    for (int i = 0; i < count; i += 4) {
+            V4SF fmag, fphase, fre, fim;
 
-	V4SF fmag, fphase, fre, fim;
+            for (int j = 0; j < 3; ++j) {
+                fmag.f[j] = src[idx++];
+                fphase.f[j] = src[idx++];
+            }
+
+            sincos_ps(fphase.v, &fim.v, &fre.v);
 
-        for (int j = 0; j < 3; ++j) {
-            fmag.f[j] = src[idx++];
-            fphase.f[j] = src[idx++];
+            for (int j = 0; j < 3; ++j) {
+                dst[tidx].re = fre.f[j] * fmag.f[j];
+                dst[tidx++].im = fim.f[j] * fmag.f[j];
+            }
         }
-
-	sincos_ps(fphase.v, &fim.v, &fre.v);
-
-        for (int j = 0; j < 3; ++j) {
-            dst[tidx].re = fre.f[j] * fmag.f[j];
-            dst[tidx++].im = fim.f[j] * fmag.f[j];
+    } else {
+        bq_complex_element_t mag, phase;
+        int idx = 0;
+        for (int i = 0; i < count; ++i) {
+            mag = src[idx++];
+            phase = src[idx++];
+            dst[i] = c_phasor(phase);
+            dst[i].re *= mag;
+            dst[i].im *= mag;
         }
-    }
+    }        
 }    
 
 #elif (defined HAVE_IPP || defined HAVE_VDSP)

          
M test/TestVectorOpsComplex.cpp +0 -5
@@ 18,10 18,6 @@ using namespace std;
 
 BOOST_AUTO_TEST_SUITE(TestVectorOpsComplex)
 
-#ifdef USE_APPROXIMATE_ATAN2
-static const double eps = 5.0e-3;
-static const double eps_approx = eps;
-#else
 #ifdef USE_SINGLE_PRECISION_COMPLEX
 static const double eps = 1.0e-7;
 static const double eps_approx = 1.0e-5;

          
@@ 29,7 25,6 @@ static const double eps_approx = 1.0e-5;
 static const double eps = 1.0e-14;
 static const double eps_approx = 1.0e-8;
 #endif
-#endif
     
 #define COMPARE_N(a, b, n) \
     for (int cmp_i = 0; cmp_i < n; ++cmp_i) { \