summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Foster <nick@ettus.com>2011-11-09 11:32:47 -0800
committerJosh Blum <josh@joshknows.com>2011-12-01 09:08:00 -0500
commit488f0c614f0e951314c686e7f168bc62cea250d5 (patch)
tree46db451c2cd11c209101346de0823079054a3d06
parentd56564f5e3d9ccefc9ac34c81dc8d061298301e7 (diff)
Volk: added 32fc x scalar multiply, implemented in Orc & generic. Orc/SSE tested 10x faster than generic.
-rw-r--r--volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h46
-rw-r--r--volk/lib/testqa.cc2
-rw-r--r--volk/orc/volk_32fc_s32fc_multiply_32fc_a_orc_impl.orc18
3 files changed, 65 insertions, 1 deletions
diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
new file mode 100644
index 0000000000..b27a7259f0
--- /dev/null
+++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
@@ -0,0 +1,46 @@
+#ifndef INCLUDED_volk_32fc_s32fc_multiply_32fc_a_H
+#define INCLUDED_volk_32fc_s32fc_multiply_32fc_a_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <volk/volk_complex.h>
+#include <float.h>
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Multiplies the two input complex vectors and stores their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+ */
+static inline void volk_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = (*aPtr++) * scalar;
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#ifdef LV_HAVE_ORC
+ /*!
+ \brief Multiplies the two input complex vectors and stores their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+ */
+extern void volk_32fc_s32fc_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points);
+static inline void volk_32fc_s32fc_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
+ volk_32fc_s32fc_multiply_32fc_a_orc_impl(cVector, aVector, scalar, num_points);
+}
+#endif /* LV_HAVE_ORC */
+
+
+
+
+
+#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a_H */
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 62e62c2f4d..9d1d1bca2d 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -4,6 +4,7 @@
//VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a, 1e-4, 2046, 10000);
//VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a, 1e-4, 2046, 10000);
+VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 20460, 10000);
VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a, 1e-5, 32768.0, 204600, 10000);
VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a, 0, 0, 20460, 10000);
VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a, 0, 0, 20460, 10000);
@@ -90,4 +91,3 @@ VOLK_RUN_TESTS(volk_8i_convert_16i_a, 0, 0, 20460, 20000);
VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 20460, 2000);
VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a, 1e-4, 100, 20460, 2000);
VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 20460, 2000);
-
diff --git a/volk/orc/volk_32fc_s32fc_multiply_32fc_a_orc_impl.orc b/volk/orc/volk_32fc_s32fc_multiply_32fc_a_orc_impl.orc
new file mode 100644
index 0000000000..2577e034fe
--- /dev/null
+++ b/volk/orc/volk_32fc_s32fc_multiply_32fc_a_orc_impl.orc
@@ -0,0 +1,18 @@
+.function volk_32fc_s32fc_multiply_32fc_a_orc_impl
+.source 8 src1
+.floatparam 8 scalar
+.dest 8 dst
+.temp 8 iqprod
+.temp 4 real
+.temp 4 imag
+.temp 4 ac
+.temp 4 bd
+.temp 8 swapped
+x2 mulf iqprod, src1, scalar
+splitql bd, ac, iqprod
+subf real, ac, bd
+swaplq swapped, src1
+x2 mulf iqprod, swapped, scalar
+splitql bd, ac, iqprod
+addf imag, ac, bd
+mergelq dst, real, imag