Add complex vector divide and dot
3 files changed, 143 insertions(+), 15 deletions(-)

M bqvec/VectorOps.h
M bqvec/VectorOpsComplex.h
M test/TestVectorOpsComplex.cpp
M bqvec/VectorOps.h +41 -1
@@ 625,7 625,7 @@ inline void v_multiply(double *const BQ_
 #endif // HAVE_IPP
 
 /**
- * v_multiply
+ * v_multiply_to
  *
  * Multiply the corresponding elements of the vectors \arg src1 and
  * \arg src2, both of length arg \count, and write the results into

          
@@ 701,6 701,46 @@ inline void v_divide(double *const BQ_R_
 #endif // HAVE_IPP
 
 /**
+ * v_divide_to
+ *
+ * Divide the corresponding elements of the vectors \arg src1 by those
+ * in \arg src2, both of length arg \count, and write the results into
+ * \arg dst.
+ *
+ * Caller guarantees that \arg src1, \arg src2 and \arg dst are
+ * non-overlapping.
+ */
+template<typename T, typename S>
+inline void v_divide_to(T *const BQ_R__ dst,
+                          const T *const BQ_R__ src1,
+                          const S *const BQ_R__ src2,
+                          const int count)
+{
+    for (int i = 0; i < count; ++i) {
+        dst[i] = src1[i] / src2[i];
+    }
+}
+
+#if defined HAVE_IPP 
+template<>
+inline void v_divide_to(float *const BQ_R__ dst,
+                          const float *const BQ_R__ src1,
+                          const float *const BQ_R__ src2,
+                          const int count)
+{
+    ippsDiv_32f(src1, src2, dst, count);
+}    
+template<>
+inline void v_divide_to(double *const BQ_R__ dst,
+                          const double *const BQ_R__ src1,
+                          const double *const BQ_R__ src2,
+                          const int count)
+{
+    ippsDiv_64f(src1, src2, dst, count);
+}
+#endif // HAVE_IPP
+
+/**
  * v_multiply_and_add
  *
  * Multiply the corresponding elements of the vectors \arg src1 and

          
M bqvec/VectorOpsComplex.h +83 -14
@@ 174,6 174,28 @@ inline void c_multiply_and_add(bq_comple
     c_add(srcdst, tmp);
 }
 
+inline void c_divide(bq_complex_t &dst,
+                     const bq_complex_t &src1,
+                     const bq_complex_t &src2)
+{
+    bq_complex_element_t scale = bq_complex_element_t
+        (1.0 / (src2.re * src2.re + src2.im * src2.im));
+
+    bq_complex_element_t real =
+        scale * (src1.re * src2.re + src1.im * src2.im);
+    bq_complex_element_t imag =
+        scale * (src1.im * src2.re - src1.re * src2.im);
+
+    dst.re = real;
+    dst.im = imag;
+}
+
+inline void c_divide(bq_complex_t &srcdst,
+                     const bq_complex_t &src)
+{
+    c_divide(srcdst, srcdst, src);
+}
+
 template<>
 inline void v_add(bq_complex_t *const BQ_R__ srcdst,
                   const bq_complex_t *const BQ_R__ src,

          
@@ 245,6 267,45 @@ inline void v_multiply_to(bq_complex_t *
 }
 
 template<>
+inline void v_divide(bq_complex_t *const BQ_R__ srcdst,
+                     const bq_complex_t *const BQ_R__ src,
+                     const int count)
+{
+#ifdef HAVE_IPP
+    if (sizeof(bq_complex_element_t) == sizeof(float)) {
+        ippsDiv_32fc_I((const Ipp32fc *)src, (Ipp32fc *)srcdst, count);
+    } else {
+        ippsDiv_64fc_I((const Ipp64fc *)src, (Ipp64fc *)srcdst, count);
+    }
+#else
+    for (int i = 0; i < count; ++i) {
+        c_divide(srcdst[i], src[i]);
+    }
+#endif // HAVE_IPP
+}
+
+template<>
+inline void v_divide_to(bq_complex_t *const BQ_R__ dst,
+                        const bq_complex_t *const BQ_R__ src1,
+                        const bq_complex_t *const BQ_R__ src2,
+                        const int count)
+{
+#ifdef HAVE_IPP
+    if (sizeof(bq_complex_element_t) == sizeof(float)) {
+        ippsDiv_32fc((const Ipp32fc *)src1, (const Ipp32fc *)src2,
+                     (Ipp32fc *)dst, count);
+    } else {
+        ippsDiv_64fc((const Ipp64fc *)src1, (const Ipp64fc *)src2,
+                     (Ipp64fc *)dst, count);
+    }
+#else
+    for (int i = 0; i < count; ++i) {
+        c_divide(dst[i], src1[i], src2[i]);
+    }
+#endif // HAVE_IPP
+}
+
+template<>
 inline void v_multiply_and_add(bq_complex_t *const BQ_R__ srcdst,
                                const bq_complex_t *const BQ_R__ src1,
                                const bq_complex_t *const BQ_R__ src2,

          
@@ 265,27 326,35 @@ inline void v_multiply_and_add(bq_comple
 #endif // HAVE_IPP
 }
 
-#if defined( __GNUC__ ) && defined( _WIN32 )
-// MinGW doesn't appear to have sincos, so define it -- it's
-// a single x87 instruction anyway
-static inline void sincos(double x, double *sin, double *cos) {
-    __asm__ ("fsincos;" : "=t" (*cos), "=u" (*sin) : "0" (x) : "st(7)");
+template<>
+inline bq_complex_t v_multiply_and_sum(const bq_complex_t *const BQ_R__ src1,
+                                       const bq_complex_t *const BQ_R__ src2,
+                                       const int count)
+{
+    bq_complex_t result = bq_complex_t();
+#ifdef HAVE_IPP
+    if (sizeof(bq_complex_element_t) == sizeof(float)) {
+        ippsDotProd_32fc((const Ipp32fc *)src1, (const Ipp32fc *)src2,
+                         count, (Ipp32fc *)&result);
+    } else {
+        ippsDotProd_64fc((const Ipp64fc *)src1, (const Ipp64fc *)src2,
+                         count, (Ipp64fc *)&result);
+    }
+#else
+    bq_complex_t out = bq_complex_t();
+    for (int i = 0; i < count; ++i) {
+        c_multiply(out, src1[i], src2[i]);
+        c_add(result, out);
+    }
+#endif
+    return result;
 }
-static inline void sincosf(float fx, float *fsin, float *fcos) {
-    double sin, cos;
-    sincos(fx, &sin, &cos);
-    *fsin = sin;
-    *fcos = cos;
-}
-#endif
 
 #endif // !NO_COMPLEX_TYPES
 
 template<typename T>
 inline void c_phasor(T *real, T *imag, T phase)
 {
-    //!!! IPP contains ippsSinCos_xxx in ippvm.h -- these are
-    //!!! fixed-accuracy, test and compare
 #if defined HAVE_VDSP
     int one = 1;
     if (sizeof(T) == sizeof(float)) {

          
M test/TestVectorOpsComplex.cpp +19 -0
@@ 81,6 81,25 @@ BOOST_AUTO_TEST_CASE(multiply_to)
     COMPARE_CPLX_N(o, expected, 2);
 }
 
+BOOST_AUTO_TEST_CASE(divide)
+{
+    bq_complex_t a[] = { { 0.0, 0.0 }, { 1.0, 0.0 }, { 1.0, 0.0 }, { -1.0, 0.0 }, { 0.0, 1.0 }, { 2.0, -1000.5 } };
+    bq_complex_t b[] = { { 1.0, 0.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 0.4, 2.0 } };
+    bq_complex_t expected[] = { { 0.0, 0.0 }, { 1.0, 0.0 }, { 0.0, -1.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { -480.817307692, -97.16346154 } };
+    v_divide(a, b, 2);
+    COMPARE_CPLX_N(a, expected, 2);
+}
+
+BOOST_AUTO_TEST_CASE(divide_to)
+{
+    bq_complex_t a[] = { { 0.0, 0.0 }, { 1.0, 0.0 }, { 1.0, 0.0 }, { -1.0, 0.0 }, { 0.0, 1.0 }, { 2.0, -1000.5 } };
+    bq_complex_t b[] = { { 1.0, 0.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 0.4, 2.0 } };
+    bq_complex_t o[6];
+    bq_complex_t expected[] = { { 0.0, 0.0 }, { 1.0, 0.0 }, { 0.0, -1.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { -480.817307692, -97.16346154 } };
+    v_divide_to(o, a, b, 2);
+    COMPARE_CPLX_N(o, expected, 2);
+}
+
 BOOST_AUTO_TEST_CASE(cartesian_to_magnitudes_bq)
 {
     bq_complex_t a[] = { { 1.0, 2.0 }, { 3.0, -4.0 } };