M bqvec/VectorOps.h +41 -1
@@ 625,7 625,7 @@ inline void v_multiply(double *const BQ_
#endif // HAVE_IPP
/**
- * v_multiply
+ * v_multiply_to
*
* Multiply the corresponding elements of the vectors \arg src1 and
* \arg src2, both of length arg \count, and write the results into
@@ 701,6 701,46 @@ inline void v_divide(double *const BQ_R_
#endif // HAVE_IPP
/**
+ * v_divide_to
+ *
+ * Divide the corresponding elements of the vectors \arg src1 by those
+ * in \arg src2, both of length arg \count, and write the results into
+ * \arg dst.
+ *
+ * Caller guarantees that \arg src1, \arg src2 and \arg dst are
+ * non-overlapping.
+ */
+template<typename T, typename S>
+inline void v_divide_to(T *const BQ_R__ dst,
+ const T *const BQ_R__ src1,
+ const S *const BQ_R__ src2,
+ const int count)
+{
+ for (int i = 0; i < count; ++i) {
+ dst[i] = src1[i] / src2[i];
+ }
+}
+
+#if defined HAVE_IPP
+template<>
+inline void v_divide_to(float *const BQ_R__ dst,
+ const float *const BQ_R__ src1,
+ const float *const BQ_R__ src2,
+ const int count)
+{
+ ippsDiv_32f(src1, src2, dst, count);
+}
+template<>
+inline void v_divide_to(double *const BQ_R__ dst,
+ const double *const BQ_R__ src1,
+ const double *const BQ_R__ src2,
+ const int count)
+{
+ ippsDiv_64f(src1, src2, dst, count);
+}
+#endif // HAVE_IPP
+
+/**
* v_multiply_and_add
*
* Multiply the corresponding elements of the vectors \arg src1 and
M bqvec/VectorOpsComplex.h +83 -14
@@ 174,6 174,28 @@ inline void c_multiply_and_add(bq_comple
c_add(srcdst, tmp);
}
+inline void c_divide(bq_complex_t &dst,
+ const bq_complex_t &src1,
+ const bq_complex_t &src2)
+{
+ bq_complex_element_t scale = bq_complex_element_t
+ (1.0 / (src2.re * src2.re + src2.im * src2.im));
+
+ bq_complex_element_t real =
+ scale * (src1.re * src2.re + src1.im * src2.im);
+ bq_complex_element_t imag =
+ scale * (src1.im * src2.re - src1.re * src2.im);
+
+ dst.re = real;
+ dst.im = imag;
+}
+
+inline void c_divide(bq_complex_t &srcdst,
+ const bq_complex_t &src)
+{
+ c_divide(srcdst, srcdst, src);
+}
+
template<>
inline void v_add(bq_complex_t *const BQ_R__ srcdst,
const bq_complex_t *const BQ_R__ src,
@@ 245,6 267,45 @@ inline void v_multiply_to(bq_complex_t *
}
template<>
+inline void v_divide(bq_complex_t *const BQ_R__ srcdst,
+ const bq_complex_t *const BQ_R__ src,
+ const int count)
+{
+#ifdef HAVE_IPP
+ if (sizeof(bq_complex_element_t) == sizeof(float)) {
+ ippsDiv_32fc_I((const Ipp32fc *)src, (Ipp32fc *)srcdst, count);
+ } else {
+ ippsDiv_64fc_I((const Ipp64fc *)src, (Ipp64fc *)srcdst, count);
+ }
+#else
+ for (int i = 0; i < count; ++i) {
+ c_divide(srcdst[i], src[i]);
+ }
+#endif // HAVE_IPP
+}
+
+template<>
+inline void v_divide_to(bq_complex_t *const BQ_R__ dst,
+ const bq_complex_t *const BQ_R__ src1,
+ const bq_complex_t *const BQ_R__ src2,
+ const int count)
+{
+#ifdef HAVE_IPP
+ if (sizeof(bq_complex_element_t) == sizeof(float)) {
+ ippsDiv_32fc((const Ipp32fc *)src1, (const Ipp32fc *)src2,
+ (Ipp32fc *)dst, count);
+ } else {
+ ippsDiv_64fc((const Ipp64fc *)src1, (const Ipp64fc *)src2,
+ (Ipp64fc *)dst, count);
+ }
+#else
+ for (int i = 0; i < count; ++i) {
+ c_divide(dst[i], src1[i], src2[i]);
+ }
+#endif // HAVE_IPP
+}
+
+template<>
inline void v_multiply_and_add(bq_complex_t *const BQ_R__ srcdst,
const bq_complex_t *const BQ_R__ src1,
const bq_complex_t *const BQ_R__ src2,
@@ 265,27 326,35 @@ inline void v_multiply_and_add(bq_comple
#endif // HAVE_IPP
}
-#if defined( __GNUC__ ) && defined( _WIN32 )
-// MinGW doesn't appear to have sincos, so define it -- it's
-// a single x87 instruction anyway
-static inline void sincos(double x, double *sin, double *cos) {
- __asm__ ("fsincos;" : "=t" (*cos), "=u" (*sin) : "0" (x) : "st(7)");
+template<>
+inline bq_complex_t v_multiply_and_sum(const bq_complex_t *const BQ_R__ src1,
+ const bq_complex_t *const BQ_R__ src2,
+ const int count)
+{
+ bq_complex_t result = bq_complex_t();
+#ifdef HAVE_IPP
+ if (sizeof(bq_complex_element_t) == sizeof(float)) {
+ ippsDotProd_32fc((const Ipp32fc *)src1, (const Ipp32fc *)src2,
+ count, (Ipp32fc *)&result);
+ } else {
+ ippsDotProd_64fc((const Ipp64fc *)src1, (const Ipp64fc *)src2,
+ count, (Ipp64fc *)&result);
+ }
+#else
+ bq_complex_t out = bq_complex_t();
+ for (int i = 0; i < count; ++i) {
+ c_multiply(out, src1[i], src2[i]);
+ c_add(result, out);
+ }
+#endif
+ return result;
}
-static inline void sincosf(float fx, float *fsin, float *fcos) {
- double sin, cos;
- sincos(fx, &sin, &cos);
- *fsin = sin;
- *fcos = cos;
-}
-#endif
#endif // !NO_COMPLEX_TYPES
template<typename T>
inline void c_phasor(T *real, T *imag, T phase)
{
- //!!! IPP contains ippsSinCos_xxx in ippvm.h -- these are
- //!!! fixed-accuracy, test and compare
#if defined HAVE_VDSP
int one = 1;
if (sizeof(T) == sizeof(float)) {
M test/TestVectorOpsComplex.cpp +19 -0
@@ 81,6 81,25 @@ BOOST_AUTO_TEST_CASE(multiply_to)
COMPARE_CPLX_N(o, expected, 2);
}
+BOOST_AUTO_TEST_CASE(divide)
+{
+ bq_complex_t a[] = { { 0.0, 0.0 }, { 1.0, 0.0 }, { 1.0, 0.0 }, { -1.0, 0.0 }, { 0.0, 1.0 }, { 2.0, -1000.5 } };
+ bq_complex_t b[] = { { 1.0, 0.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 0.4, 2.0 } };
+ bq_complex_t expected[] = { { 0.0, 0.0 }, { 1.0, 0.0 }, { 0.0, -1.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { -480.817307692, -97.16346154 } };
+ v_divide(a, b, 2);
+ COMPARE_CPLX_N(a, expected, 2);
+}
+
+BOOST_AUTO_TEST_CASE(divide_to)
+{
+ bq_complex_t a[] = { { 0.0, 0.0 }, { 1.0, 0.0 }, { 1.0, 0.0 }, { -1.0, 0.0 }, { 0.0, 1.0 }, { 2.0, -1000.5 } };
+ bq_complex_t b[] = { { 1.0, 0.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { 0.0, 1.0 }, { 0.4, 2.0 } };
+ bq_complex_t o[6];
+ bq_complex_t expected[] = { { 0.0, 0.0 }, { 1.0, 0.0 }, { 0.0, -1.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { -480.817307692, -97.16346154 } };
+ v_divide_to(o, a, b, 2);
+ COMPARE_CPLX_N(o, expected, 2);
+}
+
BOOST_AUTO_TEST_CASE(cartesian_to_magnitudes_bq)
{
bq_complex_t a[] = { { 1.0, 2.0 }, { 3.0, -4.0 } };