diff options
author | Tom Rondeau <trondeau@vt.edu> | 2012-07-03 20:35:28 -0400 |
---|---|---|
committer | Tom Rondeau <trondeau@vt.edu> | 2012-07-03 20:35:28 -0400 |
commit | bf665eeb45e6816202e146eeff059a1ac81b73e2 (patch) | |
tree | 563e065866891d708a94f1298004dc65335f7b09 /volk | |
parent | 1b6152005ca48614942bb8260d340c3dba039c11 (diff) |
qtgui: using volk to handle conversion/decomposing float/complex to double buffers.
Adding an unaligned 32fc_deinterleave_64f_x2 volk kernel to support this.
Diffstat (limited to 'volk')
-rw-r--r-- | volk/apps/volk_profile.cc | 1 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_deinterleave_64f_x2_u.h | 78 | ||||
-rw-r--r-- | volk/lib/testqa.cc | 1 |
3 files changed, 80 insertions, 0 deletions
diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc index 6244abb357..b58d5ba2ab 100644 --- a/volk/apps/volk_profile.cc +++ b/volk/apps/volk_profile.cc @@ -45,6 +45,7 @@ int main(int argc, char *argv[]) { VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000, &results); VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_u, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 204600, 10000, &results); VOLK_PROFILE(volk_32fc_deinterleave_imag_32f_a, 1e-4, 0, 204600, 5000, &results); VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 204600, 5000, &results); diff --git a/volk/include/volk/volk_32fc_deinterleave_64f_x2_u.h b/volk/include/volk/volk_32fc_deinterleave_64f_x2_u.h new file mode 100644 index 0000000000..d6f5dc111d --- /dev/null +++ b/volk/include/volk/volk_32fc_deinterleave_64f_x2_u.h @@ -0,0 +1,78 @@ +#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_u_H +#define INCLUDED_volk_32fc_deinterleave_64f_x2_u_H + +#include <inttypes.h> +#include <stdio.h> + +#ifdef LV_HAVE_SSE2 +#include <emmintrin.h> +/*! + \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data + \param complexVector The complex input vector + \param iBuffer The I buffer output data + \param qBuffer The Q buffer output data + \param num_points The number of complex data values to be deinterleaved +*/ +static inline void volk_32fc_deinterleave_64f_x2_u_sse2(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ + unsigned int number = 0; + + const float* complexVectorPtr = (float*)complexVector; + double* iBufferPtr = iBuffer; + double* qBufferPtr = qBuffer; + + const unsigned int halfPoints = num_points / 2; + __m128 cplxValue, fVal; + __m128d dVal; + + for(;number < halfPoints; number++){ + + cplxValue = _mm_load_ps(complexVectorPtr); + complexVectorPtr += 4; + + // Arrange in i1i2i1i2 format + fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2,0,2,0)); + dVal = _mm_cvtps_pd(fVal); + _mm_store_pd(iBufferPtr, dVal); + + // Arrange in q1q2q1q2 format + fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3,1,3,1)); + dVal = _mm_cvtps_pd(fVal); + _mm_store_pd(qBufferPtr, dVal); + + iBufferPtr += 2; + qBufferPtr += 2; + } + + number = halfPoints * 2; + for(; number < num_points; number++){ + *iBufferPtr++ = *complexVectorPtr++; + *qBufferPtr++ = *complexVectorPtr++; + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data + \param complexVector The complex input vector + \param iBuffer The I buffer output data + \param qBuffer The Q buffer output data + \param num_points The number of complex data values to be deinterleaved +*/ +static inline void volk_32fc_deinterleave_64f_x2_u_generic(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ + unsigned int number = 0; + const float* complexVectorPtr = (float*)complexVector; + double* iBufferPtr = iBuffer; + double* qBufferPtr = qBuffer; + + for(number = 0; number < num_points; number++){ + *iBufferPtr++ = (double)*complexVectorPtr++; + *qBufferPtr++ = (double)*complexVectorPtr++; + } +} +#endif /* LV_HAVE_GENERIC */ + + + + +#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_u_H */ diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index f0011190e1..5f13def09d 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -32,6 +32,7 @@ VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a, 1e-4, 10.0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_u, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 20460, 1); VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_deinterleave_imag_32f_a, 1e-4, 0, 20460, 1); |