M bqvec/VectorOpsComplex.h +72 -12
@@ 232,7 232,7 @@ inline void v_multiply(bq_complex_t *con
const bq_complex_t *const BQ_R__ src,
const int count)
{
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
if (sizeof(bq_complex_element_t) == sizeof(float)) {
ippsMul_32fc_I((const Ipp32fc *)src, (Ipp32fc *)srcdst, count);
} else {
@@ 251,7 251,7 @@ inline void v_multiply_to(bq_complex_t *
const bq_complex_t *const BQ_R__ src2,
const int count)
{
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
if (sizeof(bq_complex_element_t) == sizeof(float)) {
ippsMul_32fc((const Ipp32fc *)src1, (const Ipp32fc *)src2,
(Ipp32fc *)dst, count);
@@ 259,11 259,31 @@ inline void v_multiply_to(bq_complex_t *
ippsMul_64fc((const Ipp64fc *)src1, (const Ipp64fc *)src2,
(Ipp64fc *)dst, count);
}
+#elif defined HAVE_VDSP
+ if (sizeof(bq_complex_element_t) == sizeof(float)) {
+ DSPSplitComplex sdst, ssrc1, ssrc2;
+ sdst.realp = (float *)dst;
+ sdst.imagp = sdst.realp + 1;
+ ssrc1.realp = (float *)src1;
+ ssrc1.imagp = ssrc1.realp + 1;
+ ssrc2.realp = (float *)src2;
+ ssrc2.imagp = ssrc2.realp + 1;
+ vDSP_zvmul(&ssrc1, 2, &ssrc2, 2, &sdst, 2, count, 1);
+ } else {
+ DSPDoubleSplitComplex sdst, ssrc1, ssrc2;
+ sdst.realp = (double *)dst;
+ sdst.imagp = sdst.realp + 1;
+ ssrc1.realp = (double *)src1;
+ ssrc1.imagp = ssrc1.realp + 1;
+ ssrc2.realp = (double *)src2;
+ ssrc2.imagp = ssrc2.realp + 1;
+ vDSP_zvmulD(&ssrc1, 2, &ssrc2, 2, &sdst, 2, count, 1);
+ }
#else
for (int i = 0; i < count; ++i) {
c_multiply(dst[i], src1[i], src2[i]);
}
-#endif // HAVE_IPP
+#endif
}
template<>
@@ 271,7 291,7 @@ inline void v_divide(bq_complex_t *const
const bq_complex_t *const BQ_R__ src,
const int count)
{
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
if (sizeof(bq_complex_element_t) == sizeof(float)) {
ippsDiv_32fc_I((const Ipp32fc *)src, (Ipp32fc *)srcdst, count);
} else {
@@ 290,7 310,7 @@ inline void v_divide_to(bq_complex_t *co
const bq_complex_t *const BQ_R__ src2,
const int count)
{
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
if (sizeof(bq_complex_element_t) == sizeof(float)) {
ippsDiv_32fc((const Ipp32fc *)src1, (const Ipp32fc *)src2,
(Ipp32fc *)dst, count);
@@ 298,11 318,31 @@ inline void v_divide_to(bq_complex_t *co
ippsDiv_64fc((const Ipp64fc *)src1, (const Ipp64fc *)src2,
(Ipp64fc *)dst, count);
}
+#elif defined HAVE_VDSP
+ if (sizeof(bq_complex_element_t) == sizeof(float)) {
+ DSPSplitComplex sdst, ssrc1, ssrc2;
+ sdst.realp = (float *)dst;
+ sdst.imagp = sdst.realp + 1;
+ ssrc1.realp = (float *)src1;
+ ssrc1.imagp = ssrc1.realp + 1;
+ ssrc2.realp = (float *)src2;
+ ssrc2.imagp = ssrc2.realp + 1;
+ vDSP_zvdiv(&ssrc2, 2, &ssrc1, 2, &sdst, 2, count);
+ } else {
+ DSPDoubleSplitComplex sdst, ssrc1, ssrc2;
+ sdst.realp = (double *)dst;
+ sdst.imagp = sdst.realp + 1;
+ ssrc1.realp = (double *)src1;
+ ssrc1.imagp = ssrc1.realp + 1;
+ ssrc2.realp = (double *)src2;
+ ssrc2.imagp = ssrc2.realp + 1;
+ vDSP_zvdivD(&ssrc2, 2, &ssrc1, 2, &sdst, 2, count);
+ }
#else
for (int i = 0; i < count; ++i) {
c_divide(dst[i], src1[i], src2[i]);
}
-#endif // HAVE_IPP
+#endif
}
template<>
@@ 311,7 351,7 @@ inline void v_multiply_and_add(bq_comple
const bq_complex_t *const BQ_R__ src2,
const int count)
{
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
if (sizeof(bq_complex_element_t) == sizeof(float)) {
ippsAddProduct_32fc((const Ipp32fc *)src1, (const Ipp32fc *)src2,
(Ipp32fc *)srcdst, count);
@@ 332,7 372,7 @@ inline bq_complex_t v_multiply_and_sum(c
const int count)
{
bq_complex_t result = bq_complex_t();
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
if (sizeof(bq_complex_element_t) == sizeof(float)) {
ippsDotProd_32fc((const Ipp32fc *)src1, (const Ipp32fc *)src2,
count, (Ipp32fc *)&result);
@@ 340,6 380,26 @@ inline bq_complex_t v_multiply_and_sum(c
ippsDotProd_64fc((const Ipp64fc *)src1, (const Ipp64fc *)src2,
count, (Ipp64fc *)&result);
}
+#elif defined HAVE_VDSP
+ if (sizeof(bq_complex_element_t) == sizeof(float)) {
+ DSPSplitComplex sdst, ssrc1, ssrc2;
+ sdst.realp = (float *)&result;
+ sdst.imagp = sdst.realp + 1;
+ ssrc1.realp = (float *)src1;
+ ssrc1.imagp = ssrc1.realp + 1;
+ ssrc2.realp = (float *)src2;
+ ssrc2.imagp = ssrc2.realp + 1;
+ vDSP_zdotpr(&ssrc1, 2, &ssrc2, 2, &sdst, count);
+ } else {
+ DSPDoubleSplitComplex sdst, ssrc1, ssrc2;
+ sdst.realp = (double *)&result;
+ sdst.imagp = sdst.realp + 1;
+ ssrc1.realp = (double *)src1;
+ ssrc1.imagp = ssrc1.realp + 1;
+ ssrc2.realp = (double *)src2;
+ ssrc2.imagp = ssrc2.realp + 1;
+ vDSP_zdotprD(&ssrc1, 2, &ssrc2, 2, &sdst, count);
+ }
#else
bq_complex_t out = bq_complex_t();
for (int i = 0; i < count; ++i) {
@@ 398,7 458,7 @@ inline void c_magphase(T *mag, T *phase,
*phase = atan2(imag, real);
}
-#ifdef USE_APPROXIMATE_ATAN2
+#if defined USE_APPROXIMATE_ATAN2
// NB arguments in opposite order from usual for atan2f
extern float approximate_atan2f(float real, float imag);
template<>
@@ 506,7 566,7 @@ void v_polar_to_cartesian_interleaved(T
}
}
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
template<>
inline void v_polar_to_cartesian(float *const BQ_R__ real,
float *const BQ_R__ imag,
@@ 609,7 669,7 @@ void v_cartesian_interleaved_to_polar(T
}
}
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
template<>
inline void v_cartesian_to_polar(float *const BQ_R__ mag,
@@ 716,7 776,7 @@ void v_cartesian_interleaved_to_magnitud
}
}
-#ifdef HAVE_IPP
+#if defined HAVE_IPP
template<>
inline void v_cartesian_to_magnitudes(float *const BQ_R__ mag,
const float *const BQ_R__ real,
M build/Makefile.inc +4 -4
@@ 24,7 24,7 @@ LIBRARY := libbqvec.a
all: $(LIBRARY) timings
test: $(LIBRARY) timings test-allocators test-vectorops test-vectorops-complex
- ./test-allocators && ./test-vectorops && ./test-vectorops-complex
+ DYLD_LIBRARY_PATH=/opt/boost/lib ./test-allocators && DYLD_LIBRARY_PATH=/opt/boost/lib ./test-vectorops && DYLD_LIBRARY_PATH=/opt/boost/lib ./test-vectorops-complex
valgrind: $(LIBRARY) timings test-allocators test-vectorops test-vectorops-complex
valgrind ./test-allocators && valgrind ./test-vectorops && valgrind ./test-vectorops-complex
@@ 36,13 36,13 @@ timings: $(TIMINGS_OBJECTS) $(LIBRARY)
$(CXX) $(CXXFLAGS) -o $@ $^ $(THIRD_PARTY_LIBS)
test-allocators: test/TestAllocators.o $(LIBRARY)
- $(CXX) $(CXXFLAGS) -o $@ $^ -lboost_unit_test_framework -L. -lbqvec $(THIRD_PARTY_LIBS)
+ $(CXX) $(CXXFLAGS) -o $@ $^ -L. -lbqvec $(THIRD_PARTY_LIBS) -lboost_unit_test_framework
test-vectorops: test/TestVectorOps.o $(LIBRARY)
- $(CXX) $(CXXFLAGS) -o $@ $^ -lboost_unit_test_framework -L. -lbqvec $(THIRD_PARTY_LIBS)
+ $(CXX) $(CXXFLAGS) -o $@ $^ -L. -lbqvec $(THIRD_PARTY_LIBS) -lboost_unit_test_framework
test-vectorops-complex: test/TestVectorOpsComplex.o $(LIBRARY)
- $(CXX) $(CXXFLAGS) -o $@ $^ -lboost_unit_test_framework -L. -lbqvec $(THIRD_PARTY_LIBS)
+ $(CXX) $(CXXFLAGS) -o $@ $^ -L. -lbqvec $(THIRD_PARTY_LIBS) -lboost_unit_test_framework
clean:
rm -f $(OBJECTS) $(TEST_OBJECTS) $(TIMINGS_OBJECTS)
M test/TestVectorOpsComplex.cpp +21 -6
@@ 44,6 44,12 @@ static const double eps = 1.0e-14;
BOOST_CHECK_SMALL(a[cmp_i].im - b[cmp_i].im, (bq_complex_element_t) eps); \
}
+#define COMPARE_CPLX_N_APPROX(a, b, n) \
+ for (int cmp_i = 0; cmp_i < n; ++cmp_i) { \
+ BOOST_CHECK_SMALL(a[cmp_i].re - b[cmp_i].re, (bq_complex_element_t) 1e-7); \
+ BOOST_CHECK_SMALL(a[cmp_i].im - b[cmp_i].im, (bq_complex_element_t) 1e-7); \
+ }
+
BOOST_AUTO_TEST_CASE(add)
{
bq_complex_t a[] = { { 1.0, 2.0 }, { 3.0, -4.0 } };
@@ 81,23 87,32 @@ BOOST_AUTO_TEST_CASE(multiply_to)
COMPARE_CPLX_N(o, expected, 2);
}
+BOOST_AUTO_TEST_CASE(multiply_and_sum)
+{
+ bq_complex_t a[] = { { 1.0, 2.0 }, { 3.0, -4.0 } };
+ bq_complex_t b[] = { { -1.0, 3.0 }, { -4.5, 0.0 } };
+ bq_complex_t o = v_multiply_and_sum(a, b, 2);
+ bq_complex_t expected = { -20.5, 19.0 };
+ COMPARE_CPLX_N((&o), (&expected), 1);
+}
+
BOOST_AUTO_TEST_CASE(divide)
{
bq_complex_t a[] = { { 0.0, 0.0 }, { 1.0, 0.0 }, { 1.0, 0.0 }, { -1.0, 0.0 }, { 0.0, 1.0 }, { 2.0, -1000.5 } };
- bq_complex_t b[] = { { 1.0, 0.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 0.4, 2.0 } };
+ bq_complex_t b[] = { { 1.0, 0.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 0.0, 1.0 }, { 0.0, 1.0 }, { 0.4, 2.0 } };
bq_complex_t expected[] = { { 0.0, 0.0 }, { 1.0, 0.0 }, { 0.0, -1.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { -480.817307692, -97.16346154 } };
- v_divide(a, b, 2);
- COMPARE_CPLX_N(a, expected, 2);
+ v_divide(a, b, 6);
+ COMPARE_CPLX_N_APPROX(a, expected, 6);
}
BOOST_AUTO_TEST_CASE(divide_to)
{
bq_complex_t a[] = { { 0.0, 0.0 }, { 1.0, 0.0 }, { 1.0, 0.0 }, { -1.0, 0.0 }, { 0.0, 1.0 }, { 2.0, -1000.5 } };
- bq_complex_t b[] = { { 1.0, 0.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 0.4, 2.0 } };
+ bq_complex_t b[] = { { 1.0, 0.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 0.0, 1.0 }, { 0.0, 1.0 }, { 0.4, 2.0 } };
bq_complex_t o[6];
bq_complex_t expected[] = { { 0.0, 0.0 }, { 1.0, 0.0 }, { 0.0, -1.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { -480.817307692, -97.16346154 } };
- v_divide_to(o, a, b, 2);
- COMPARE_CPLX_N(o, expected, 2);
+ v_divide_to(o, a, b, 6);
+ COMPARE_CPLX_N_APPROX(o, expected, 6);
}
BOOST_AUTO_TEST_CASE(cartesian_to_magnitudes_bq)