diff options
author | Nick Foster <nick@ettus.com> | 2011-11-09 11:32:47 -0800 |
---|---|---|
committer | Josh Blum <josh@joshknows.com> | 2011-12-01 09:08:00 -0500 |
commit | 488f0c614f0e951314c686e7f168bc62cea250d5 (patch) | |
tree | 46db451c2cd11c209101346de0823079054a3d06 | |
parent | d56564f5e3d9ccefc9ac34c81dc8d061298301e7 (diff) |
Volk: added 32fc x scalar multiply, implemented in Orc & generic. Orc/SSE tested 10x faster than generic.
-rw-r--r-- | volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h | 46 | ||||
-rw-r--r-- | volk/lib/testqa.cc | 2 | ||||
-rw-r--r-- | volk/orc/volk_32fc_s32fc_multiply_32fc_a_orc_impl.orc | 18 |
3 files changed, 65 insertions, 1 deletions
diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h new file mode 100644 index 0000000000..b27a7259f0 --- /dev/null +++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h @@ -0,0 +1,46 @@ +#ifndef INCLUDED_volk_32fc_s32fc_multiply_32fc_a_H +#define INCLUDED_volk_32fc_s32fc_multiply_32fc_a_H + +#include <inttypes.h> +#include <stdio.h> +#include <volk/volk_complex.h> +#include <float.h> + +#ifdef LV_HAVE_GENERIC + /*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ + lv_32fc_t* cPtr = cVector; + const lv_32fc_t* aPtr = aVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = (*aPtr++) * scalar; + } +} +#endif /* LV_HAVE_GENERIC */ + +#ifdef LV_HAVE_ORC + /*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +extern void volk_32fc_s32fc_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points); +static inline void volk_32fc_s32fc_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ + volk_32fc_s32fc_multiply_32fc_a_orc_impl(cVector, aVector, scalar, num_points); +} +#endif /* LV_HAVE_ORC */ + + + + + +#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a_H */ diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index 62e62c2f4d..9d1d1bca2d 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -4,6 +4,7 @@ //VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a, 1e-4, 2046, 10000); //VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a, 1e-4, 2046, 10000); +VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 20460, 10000); VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a, 1e-5, 32768.0, 204600, 10000); VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a, 0, 0, 20460, 10000); VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a, 0, 0, 20460, 10000); @@ -90,4 +91,3 @@ VOLK_RUN_TESTS(volk_8i_convert_16i_a, 0, 0, 20460, 20000); VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 20460, 2000); VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a, 1e-4, 100, 20460, 2000); VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 20460, 2000); - diff --git a/volk/orc/volk_32fc_s32fc_multiply_32fc_a_orc_impl.orc b/volk/orc/volk_32fc_s32fc_multiply_32fc_a_orc_impl.orc new file mode 100644 index 0000000000..2577e034fe --- /dev/null +++ b/volk/orc/volk_32fc_s32fc_multiply_32fc_a_orc_impl.orc @@ -0,0 +1,18 @@ +.function volk_32fc_s32fc_multiply_32fc_a_orc_impl +.source 8 src1 +.floatparam 8 scalar +.dest 8 dst +.temp 8 iqprod +.temp 4 real +.temp 4 imag +.temp 4 ac +.temp 4 bd +.temp 8 swapped +x2 mulf iqprod, src1, scalar +splitql bd, ac, iqprod +subf real, ac, bd +swaplq swapped, src1 +x2 mulf iqprod, swapped, scalar +splitql bd, ac, iqprod +addf imag, ac, bd +mergelq dst, real, imag |