Tests and vDSP implementation for complex divide/dot
3 files changed, 97 insertions(+), 22 deletions(-)

M bqvec/VectorOpsComplex.h
M build/Makefile.inc
M test/TestVectorOpsComplex.cpp
M bqvec/VectorOpsComplex.h +72 -12
@@ 232,7 232,7 @@ inline void v_multiply(bq_complex_t *con
                        const bq_complex_t *const BQ_R__ src,
                        const int count)
 {
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
     if (sizeof(bq_complex_element_t) == sizeof(float)) {
         ippsMul_32fc_I((const Ipp32fc *)src, (Ipp32fc *)srcdst, count);
     } else {

          
@@ 251,7 251,7 @@ inline void v_multiply_to(bq_complex_t *
                           const bq_complex_t *const BQ_R__ src2,
                           const int count)
 {
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
     if (sizeof(bq_complex_element_t) == sizeof(float)) {
         ippsMul_32fc((const Ipp32fc *)src1, (const Ipp32fc *)src2,
                      (Ipp32fc *)dst, count);

          
@@ 259,11 259,31 @@ inline void v_multiply_to(bq_complex_t *
         ippsMul_64fc((const Ipp64fc *)src1, (const Ipp64fc *)src2,
                      (Ipp64fc *)dst, count);
     }
+#elif defined HAVE_VDSP
+    if (sizeof(bq_complex_element_t) == sizeof(float)) {
+        DSPSplitComplex sdst, ssrc1, ssrc2;
+        sdst.realp = (float *)dst;
+        sdst.imagp = sdst.realp + 1;
+        ssrc1.realp = (float *)src1;
+        ssrc1.imagp = ssrc1.realp + 1;
+        ssrc2.realp = (float *)src2;
+        ssrc2.imagp = ssrc2.realp + 1;
+        vDSP_zvmul(&ssrc1, 2, &ssrc2, 2, &sdst, 2, count, 1);
+    } else {
+        DSPDoubleSplitComplex sdst, ssrc1, ssrc2;
+        sdst.realp = (double *)dst;
+        sdst.imagp = sdst.realp + 1;
+        ssrc1.realp = (double *)src1;
+        ssrc1.imagp = ssrc1.realp + 1;
+        ssrc2.realp = (double *)src2;
+        ssrc2.imagp = ssrc2.realp + 1;
+        vDSP_zvmulD(&ssrc1, 2, &ssrc2, 2, &sdst, 2, count, 1);
+    }
 #else
     for (int i = 0; i < count; ++i) {
         c_multiply(dst[i], src1[i], src2[i]);
     }
-#endif // HAVE_IPP
+#endif
 }
 
 template<>

          
@@ 271,7 291,7 @@ inline void v_divide(bq_complex_t *const
                      const bq_complex_t *const BQ_R__ src,
                      const int count)
 {
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
     if (sizeof(bq_complex_element_t) == sizeof(float)) {
         ippsDiv_32fc_I((const Ipp32fc *)src, (Ipp32fc *)srcdst, count);
     } else {

          
@@ 290,7 310,7 @@ inline void v_divide_to(bq_complex_t *co
                         const bq_complex_t *const BQ_R__ src2,
                         const int count)
 {
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
     if (sizeof(bq_complex_element_t) == sizeof(float)) {
         ippsDiv_32fc((const Ipp32fc *)src1, (const Ipp32fc *)src2,
                      (Ipp32fc *)dst, count);

          
@@ 298,11 318,31 @@ inline void v_divide_to(bq_complex_t *co
         ippsDiv_64fc((const Ipp64fc *)src1, (const Ipp64fc *)src2,
                      (Ipp64fc *)dst, count);
     }
+#elif defined HAVE_VDSP
+    if (sizeof(bq_complex_element_t) == sizeof(float)) {
+        DSPSplitComplex sdst, ssrc1, ssrc2;
+        sdst.realp = (float *)dst;
+        sdst.imagp = sdst.realp + 1;
+        ssrc1.realp = (float *)src1;
+        ssrc1.imagp = ssrc1.realp + 1;
+        ssrc2.realp = (float *)src2;
+        ssrc2.imagp = ssrc2.realp + 1;
+        vDSP_zvdiv(&ssrc2, 2, &ssrc1, 2, &sdst, 2, count);
+    } else {
+        DSPDoubleSplitComplex sdst, ssrc1, ssrc2;
+        sdst.realp = (double *)dst;
+        sdst.imagp = sdst.realp + 1;
+        ssrc1.realp = (double *)src1;
+        ssrc1.imagp = ssrc1.realp + 1;
+        ssrc2.realp = (double *)src2;
+        ssrc2.imagp = ssrc2.realp + 1;
+        vDSP_zvdivD(&ssrc2, 2, &ssrc1, 2, &sdst, 2, count);
+    }
 #else
     for (int i = 0; i < count; ++i) {
         c_divide(dst[i], src1[i], src2[i]);
     }
-#endif // HAVE_IPP
+#endif
 }
 
 template<>

          
@@ 311,7 351,7 @@ inline void v_multiply_and_add(bq_comple
                                const bq_complex_t *const BQ_R__ src2,
                                const int count)
 {
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
     if (sizeof(bq_complex_element_t) == sizeof(float)) {
         ippsAddProduct_32fc((const Ipp32fc *)src1, (const Ipp32fc *)src2,
                             (Ipp32fc *)srcdst, count);

          
@@ 332,7 372,7 @@ inline bq_complex_t v_multiply_and_sum(c
                                        const int count)
 {
     bq_complex_t result = bq_complex_t();
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
     if (sizeof(bq_complex_element_t) == sizeof(float)) {
         ippsDotProd_32fc((const Ipp32fc *)src1, (const Ipp32fc *)src2,
                          count, (Ipp32fc *)&result);

          
@@ 340,6 380,26 @@ inline bq_complex_t v_multiply_and_sum(c
         ippsDotProd_64fc((const Ipp64fc *)src1, (const Ipp64fc *)src2,
                          count, (Ipp64fc *)&result);
     }
+#elif defined HAVE_VDSP
+    if (sizeof(bq_complex_element_t) == sizeof(float)) {
+        DSPSplitComplex sdst, ssrc1, ssrc2;
+        sdst.realp = (float *)&result;
+        sdst.imagp = sdst.realp + 1;
+        ssrc1.realp = (float *)src1;
+        ssrc1.imagp = ssrc1.realp + 1;
+        ssrc2.realp = (float *)src2;
+        ssrc2.imagp = ssrc2.realp + 1;
+        vDSP_zdotpr(&ssrc1, 2, &ssrc2, 2, &sdst, count);
+    } else {
+        DSPDoubleSplitComplex sdst, ssrc1, ssrc2;
+        sdst.realp = (double *)&result;
+        sdst.imagp = sdst.realp + 1;
+        ssrc1.realp = (double *)src1;
+        ssrc1.imagp = ssrc1.realp + 1;
+        ssrc2.realp = (double *)src2;
+        ssrc2.imagp = ssrc2.realp + 1;
+        vDSP_zdotprD(&ssrc1, 2, &ssrc2, 2, &sdst, count);
+    }
 #else
     bq_complex_t out = bq_complex_t();
     for (int i = 0; i < count; ++i) {

          
@@ 398,7 458,7 @@ inline void c_magphase(T *mag, T *phase,
     *phase = atan2(imag, real);
 }
 
-#ifdef USE_APPROXIMATE_ATAN2
+#if defined USE_APPROXIMATE_ATAN2
 // NB arguments in opposite order from usual for atan2f
 extern float approximate_atan2f(float real, float imag);
 template<>

          
@@ 506,7 566,7 @@ void v_polar_to_cartesian_interleaved(T 
     }
 }    
 
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
 template<>
 inline void v_polar_to_cartesian(float *const BQ_R__ real,
                                  float *const BQ_R__ imag,

          
@@ 609,7 669,7 @@ void v_cartesian_interleaved_to_polar(T 
     }
 }
 
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
 
 template<>
 inline void v_cartesian_to_polar(float *const BQ_R__ mag,

          
@@ 716,7 776,7 @@ void v_cartesian_interleaved_to_magnitud
     }
 }
 
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
 template<>
 inline void v_cartesian_to_magnitudes(float *const BQ_R__ mag,
                                       const float *const BQ_R__ real,

          
M build/Makefile.inc +4 -4
@@ 24,7 24,7 @@ LIBRARY	:= libbqvec.a
 all:	$(LIBRARY) timings
 
 test:	$(LIBRARY) timings test-allocators test-vectorops test-vectorops-complex
-	./test-allocators && ./test-vectorops && ./test-vectorops-complex
+	DYLD_LIBRARY_PATH=/opt/boost/lib ./test-allocators && DYLD_LIBRARY_PATH=/opt/boost/lib ./test-vectorops && DYLD_LIBRARY_PATH=/opt/boost/lib ./test-vectorops-complex
 
 valgrind:	$(LIBRARY) timings test-allocators test-vectorops test-vectorops-complex
 	valgrind ./test-allocators && valgrind ./test-vectorops && valgrind ./test-vectorops-complex

          
@@ 36,13 36,13 @@ timings: $(TIMINGS_OBJECTS) $(LIBRARY)
 	$(CXX) $(CXXFLAGS) -o $@ $^ $(THIRD_PARTY_LIBS)
 
 test-allocators:	test/TestAllocators.o $(LIBRARY)
-	$(CXX) $(CXXFLAGS) -o $@ $^ -lboost_unit_test_framework -L. -lbqvec $(THIRD_PARTY_LIBS)
+	$(CXX) $(CXXFLAGS) -o $@ $^ -L. -lbqvec $(THIRD_PARTY_LIBS) -lboost_unit_test_framework
 
 test-vectorops:	test/TestVectorOps.o $(LIBRARY)
-	$(CXX) $(CXXFLAGS) -o $@ $^ -lboost_unit_test_framework -L. -lbqvec $(THIRD_PARTY_LIBS)
+	$(CXX) $(CXXFLAGS) -o $@ $^ -L. -lbqvec $(THIRD_PARTY_LIBS) -lboost_unit_test_framework
 
 test-vectorops-complex:	test/TestVectorOpsComplex.o $(LIBRARY)
-	$(CXX) $(CXXFLAGS) -o $@ $^ -lboost_unit_test_framework -L. -lbqvec $(THIRD_PARTY_LIBS)
+	$(CXX) $(CXXFLAGS) -o $@ $^ -L. -lbqvec $(THIRD_PARTY_LIBS) -lboost_unit_test_framework
 
 clean:		
 	rm -f $(OBJECTS) $(TEST_OBJECTS) $(TIMINGS_OBJECTS)

          
M test/TestVectorOpsComplex.cpp +21 -6
@@ 44,6 44,12 @@ static const double eps = 1.0e-14;
         BOOST_CHECK_SMALL(a[cmp_i].im - b[cmp_i].im, (bq_complex_element_t) eps); \
     }
 
+#define COMPARE_CPLX_N_APPROX(a, b, n)						\
+    for (int cmp_i = 0; cmp_i < n; ++cmp_i) { \
+        BOOST_CHECK_SMALL(a[cmp_i].re - b[cmp_i].re, (bq_complex_element_t) 1e-7); \
+        BOOST_CHECK_SMALL(a[cmp_i].im - b[cmp_i].im, (bq_complex_element_t) 1e-7); \
+    }
+
 BOOST_AUTO_TEST_CASE(add)
 {
     bq_complex_t a[] = { { 1.0, 2.0 }, { 3.0, -4.0 } };

          
@@ 81,23 87,32 @@ BOOST_AUTO_TEST_CASE(multiply_to)
     COMPARE_CPLX_N(o, expected, 2);
 }
 
+BOOST_AUTO_TEST_CASE(multiply_and_sum)
+{
+    bq_complex_t a[] = { { 1.0, 2.0 }, { 3.0, -4.0 } };
+    bq_complex_t b[] = { { -1.0, 3.0 }, { -4.5, 0.0 } };
+    bq_complex_t o = v_multiply_and_sum(a, b, 2);
+    bq_complex_t expected = { -20.5, 19.0 };
+    COMPARE_CPLX_N((&o), (&expected), 1);
+}
+
 BOOST_AUTO_TEST_CASE(divide)
 {
     bq_complex_t a[] = { { 0.0, 0.0 }, { 1.0, 0.0 }, { 1.0, 0.0 }, { -1.0, 0.0 }, { 0.0, 1.0 }, { 2.0, -1000.5 } };
-    bq_complex_t b[] = { { 1.0, 0.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 0.4, 2.0 } };
+    bq_complex_t b[] = { { 1.0, 0.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 0.0, 1.0 }, { 0.0, 1.0 }, { 0.4, 2.0 } };
     bq_complex_t expected[] = { { 0.0, 0.0 }, { 1.0, 0.0 }, { 0.0, -1.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { -480.817307692, -97.16346154 } };
-    v_divide(a, b, 2);
-    COMPARE_CPLX_N(a, expected, 2);
+    v_divide(a, b, 6);
+    COMPARE_CPLX_N_APPROX(a, expected, 6);
 }
 
 BOOST_AUTO_TEST_CASE(divide_to)
 {
     bq_complex_t a[] = { { 0.0, 0.0 }, { 1.0, 0.0 }, { 1.0, 0.0 }, { -1.0, 0.0 }, { 0.0, 1.0 }, { 2.0, -1000.5 } };
-    bq_complex_t b[] = { { 1.0, 0.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 0.4, 2.0 } };
+    bq_complex_t b[] = { { 1.0, 0.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 0.0, 1.0 }, { 0.0, 1.0 }, { 0.4, 2.0 } };
     bq_complex_t o[6];
     bq_complex_t expected[] = { { 0.0, 0.0 }, { 1.0, 0.0 }, { 0.0, -1.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { -480.817307692, -97.16346154 } };
-    v_divide_to(o, a, b, 2);
-    COMPARE_CPLX_N(o, expected, 2);
+    v_divide_to(o, a, b, 6);
+    COMPARE_CPLX_N_APPROX(o, expected, 6);
 }
 
 BOOST_AUTO_TEST_CASE(cartesian_to_magnitudes_bq)