diff options
author | Tom Rondeau <trondeau@vt.edu> | 2012-07-03 20:35:28 -0400 |
---|---|---|
committer | Tom Rondeau <trondeau@vt.edu> | 2012-07-03 20:35:28 -0400 |
commit | bf665eeb45e6816202e146eeff059a1ac81b73e2 (patch) | |
tree | 563e065866891d708a94f1298004dc65335f7b09 | |
parent | 1b6152005ca48614942bb8260d340c3dba039c11 (diff) |
qtgui: using volk to handle conversion/decomposing float/complex to double buffers.
Adding an unaligned 32fc_deinterleave_64f_x2 volk kernel to support this.
-rw-r--r-- | gr-qtgui/lib/CMakeLists.txt | 4 | ||||
-rw-r--r-- | gr-qtgui/lib/qtgui_const_sink_c.cc | 37 | ||||
-rw-r--r-- | gr-qtgui/lib/qtgui_freq_sink_c.cc | 23 | ||||
-rw-r--r-- | gr-qtgui/lib/qtgui_freq_sink_f.cc | 23 | ||||
-rw-r--r-- | gr-qtgui/lib/qtgui_time_sink_c.cc | 33 | ||||
-rw-r--r-- | gr-qtgui/lib/qtgui_time_sink_f.cc | 30 | ||||
-rw-r--r-- | volk/apps/volk_profile.cc | 1 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_deinterleave_64f_x2_u.h | 78 | ||||
-rw-r--r-- | volk/lib/testqa.cc | 1 |
9 files changed, 180 insertions, 50 deletions
diff --git a/gr-qtgui/lib/CMakeLists.txt b/gr-qtgui/lib/CMakeLists.txt index 774944cf49..812342e63a 100644 --- a/gr-qtgui/lib/CMakeLists.txt +++ b/gr-qtgui/lib/CMakeLists.txt @@ -82,6 +82,9 @@ include_directories( include_directories(${Boost_INCLUDE_DIRS}) link_directories(${Boost_LIBRARY_DIRS}) +include_directories(${FFTW3F_INCLUDE_DIRS}) +link_directories(${FFTW3F_LIBRARY_DIRS}) + include_directories(${QWT_INCLUDE_DIRS}) link_directories(${QWT_LIBRARY_DIRS}) @@ -97,6 +100,7 @@ list(APPEND qtgui_libs ${QT_LIBRARIES} ${QWT_LIBRARIES} ${PYTHON_LIBRARIES} + ${FFTW3F_LIBRARIES} ) add_definitions(-DQWT_DLL) #setup QWT library linkage diff --git a/gr-qtgui/lib/qtgui_const_sink_c.cc b/gr-qtgui/lib/qtgui_const_sink_c.cc index 64260e88f5..df5e77ce45 100644 --- a/gr-qtgui/lib/qtgui_const_sink_c.cc +++ b/gr-qtgui/lib/qtgui_const_sink_c.cc @@ -56,10 +56,15 @@ qtgui_const_sink_c::qtgui_const_sink_c(int size, d_index = 0; for(int i = 0; i < d_nconnections; i++) { - d_residbufs_real.push_back(new double[d_size]); - d_residbufs_imag.push_back(new double[d_size]); + d_residbufs_real.push_back(gri_fft_malloc_double(d_size)); + d_residbufs_imag.push_back(gri_fft_malloc_double(d_size)); } + // Set alignment properties for VOLK + const int alignment_multiple = + volk_get_alignment() / sizeof(gr_complex); + set_alignment(std::max(1,alignment_multiple)); + initialize(); } @@ -67,8 +72,8 @@ qtgui_const_sink_c::~qtgui_const_sink_c() { // d_main_gui is a qwidget destroyed with its parent for(int i = 0; i < d_nconnections; i++) { - delete [] d_residbufs_real[i]; - delete [] d_residbufs_imag[i]; + gri_fft_free(d_residbufs_real[i]); + gri_fft_free(d_residbufs_imag[i]); } } @@ -159,9 +164,15 @@ qtgui_const_sink_c::work(int noutput_items, // Fill up residbufs with d_size number of items for(n = 0; n < d_nconnections; n++) { in = (const gr_complex*)input_items[idx++]; - for(unsigned int k = 0; k < resid; k++) { - d_residbufs_real[n][d_index+k] = (double)in[j+k].real(); - d_residbufs_imag[n][d_index+k] = (double)in[j+k].imag(); + if(is_unaligned()) { + volk_32fc_deinterleave_64f_x2_u(d_residbufs_real[n], + d_residbufs_imag[n], + &in[j], resid); + } + else { + volk_32fc_deinterleave_64f_x2_a(d_residbufs_real[n], + d_residbufs_imag[n], + &in[j], resid); } } @@ -182,9 +193,15 @@ qtgui_const_sink_c::work(int noutput_items, else { for(n = 0; n < d_nconnections; n++) { in = (const gr_complex*)input_items[idx++]; - for(unsigned int k = 0; k < datasize; k++) { - d_residbufs_real[n][d_index+k] = in[j+k].real(); - d_residbufs_imag[n][d_index+k] = in[j+k].imag(); + if(is_unaligned()) { + volk_32fc_deinterleave_64f_x2_u(&d_residbufs_real[n][d_index], + &d_residbufs_imag[n][d_index], + &in[j], resid); + } + else { + volk_32fc_deinterleave_64f_x2_a(&d_residbufs_real[n][d_index], + &d_residbufs_imag[n][d_index], + &in[j], resid); } } d_index += datasize; diff --git a/gr-qtgui/lib/qtgui_freq_sink_c.cc b/gr-qtgui/lib/qtgui_freq_sink_c.cc index fb935cfda1..59a01ff9c3 100644 --- a/gr-qtgui/lib/qtgui_freq_sink_c.cc +++ b/gr-qtgui/lib/qtgui_freq_sink_c.cc @@ -68,8 +68,8 @@ qtgui_freq_sink_c::qtgui_freq_sink_c(int fftsize, int wintype, d_index = 0; for(int i = 0; i < d_nconnections; i++) { - d_residbufs.push_back(new gr_complex[d_fftsize]); - d_magbufs.push_back(new double[d_fftsize]); + d_residbufs.push_back(gri_fft_malloc_complex(d_fftsize)); + d_magbufs.push_back(gri_fft_malloc_double(d_fftsize)); } buildwindow(); @@ -80,8 +80,8 @@ qtgui_freq_sink_c::qtgui_freq_sink_c(int fftsize, int wintype, qtgui_freq_sink_c::~qtgui_freq_sink_c() { for(int i = 0; i < d_nconnections; i++) { - delete [] d_residbufs[i]; - delete [] d_magbufs[i]; + gri_fft_free(d_residbufs[i]); + gri_fft_free(d_magbufs[i]); } delete d_fft; } @@ -232,11 +232,11 @@ qtgui_freq_sink_c::fftresize() // Resize residbuf and replace data for(int i = 0; i < d_nconnections; i++) { - delete [] d_residbufs[i]; - delete [] d_magbufs[i]; + gri_fft_free(d_residbufs[i]); + gri_fft_free(d_magbufs[i]); - d_residbufs.push_back(new gr_complex[newfftsize]); - d_magbufs.push_back(new double[newfftsize]); + d_residbufs.push_back(gri_fft_malloc_complex(newfftsize)); + d_magbufs.push_back(gri_fft_malloc_double(newfftsize)); } // Set new fft size and reset buffer index @@ -272,17 +272,16 @@ qtgui_freq_sink_c::work(int noutput_items, // If we have enough input for one full FFT, do it if(datasize >= resid) { - float *fbuf = new float[d_fftsize]; + float *fbuf = gri_fft_malloc_float(d_fftsize); for(int n = 0; n < d_nconnections; n++) { // Fill up residbuf with d_fftsize number of items in = (const gr_complex*)input_items[n]; memcpy(d_residbufs[n]+d_index, &in[j], sizeof(gr_complex)*resid); fft(fbuf, d_residbufs[n], d_fftsize); - for(int x=0; x < d_fftsize; x++) - d_magbufs[n][x] = (double)fbuf[x]; + volk_32f_convert_64f_a(d_magbufs[n], fbuf, d_fftsize); } - delete [] fbuf; + gri_fft_free(fbuf); if(gruel::high_res_timer_now() - d_last_time > d_update_time) { d_last_time = gruel::high_res_timer_now(); diff --git a/gr-qtgui/lib/qtgui_freq_sink_f.cc b/gr-qtgui/lib/qtgui_freq_sink_f.cc index 39ae336380..6a2878cebe 100644 --- a/gr-qtgui/lib/qtgui_freq_sink_f.cc +++ b/gr-qtgui/lib/qtgui_freq_sink_f.cc @@ -68,8 +68,8 @@ qtgui_freq_sink_f::qtgui_freq_sink_f(int fftsize, int wintype, d_index = 0; for(int i = 0; i < d_nconnections; i++) { - d_residbufs.push_back(new float[d_fftsize]); - d_magbufs.push_back(new double[d_fftsize]); + d_residbufs.push_back(gri_fft_malloc_float(d_fftsize)); + d_magbufs.push_back(gri_fft_malloc_double(d_fftsize)); } buildwindow(); @@ -80,8 +80,8 @@ qtgui_freq_sink_f::qtgui_freq_sink_f(int fftsize, int wintype, qtgui_freq_sink_f::~qtgui_freq_sink_f() { for(int i = 0; i < d_nconnections; i++) { - delete [] d_residbufs[i]; - delete [] d_magbufs[i]; + gri_fft_free(d_residbufs[i]); + gri_fft_free(d_magbufs[i]); } delete d_fft; } @@ -222,11 +222,11 @@ qtgui_freq_sink_f::fftresize() // Resize residbuf and replace data for(int i = 0; i < d_nconnections; i++) { - delete [] d_residbufs[i]; - delete [] d_magbufs[i]; + gri_fft_free(d_residbufs[i]); + gri_fft_free(d_magbufs[i]); - d_residbufs.push_back(new float[newfftsize]); - d_magbufs.push_back(new double[newfftsize]); + d_residbufs.push_back(gri_fft_malloc_float(newfftsize)); + d_magbufs.push_back(gri_fft_malloc_double(newfftsize)); } // Set new fft size and reset buffer index @@ -262,17 +262,16 @@ qtgui_freq_sink_f::work(int noutput_items, // If we have enough input for one full FFT, do it if(datasize >= resid) { - float *fbuf = new float[d_fftsize]; + float *fbuf = gri_fft_malloc_float(d_fftsize); for(int n = 0; n < d_nconnections; n++) { // Fill up residbuf with d_fftsize number of items in = (const float*)input_items[n]; memcpy(d_residbufs[n]+d_index, &in[j], sizeof(float)*resid); fft(fbuf, d_residbufs[n], d_fftsize); - for(int x=0; x < d_fftsize; x++) - d_magbufs[n][x] = (double)fbuf[x]; + volk_32f_convert_64f_a(d_magbufs[n], fbuf, d_fftsize); } - delete [] fbuf; + gri_fft_free(fbuf); if(gruel::high_res_timer_now() - d_last_time > d_update_time) { d_last_time = gruel::high_res_timer_now(); diff --git a/gr-qtgui/lib/qtgui_time_sink_c.cc b/gr-qtgui/lib/qtgui_time_sink_c.cc index e1a2e1eb0f..b922e059a3 100644 --- a/gr-qtgui/lib/qtgui_time_sink_c.cc +++ b/gr-qtgui/lib/qtgui_time_sink_c.cc @@ -56,9 +56,14 @@ qtgui_time_sink_c::qtgui_time_sink_c(int size, double bw, d_index = 0; for(int i = 0; i < d_nconnections; i++) { - d_residbufs.push_back(new double[d_size]); + d_residbufs.push_back(gri_fft_malloc_double(d_size)); } + // Set alignment properties for VOLK + const int alignment_multiple = + volk_get_alignment() / sizeof(gr_complex); + set_alignment(std::max(1,alignment_multiple)); + initialize(); } @@ -66,7 +71,7 @@ qtgui_time_sink_c::~qtgui_time_sink_c() { // d_main_gui is a qwidget destroyed with its parent for(int i = 0; i < d_nconnections; i++) { - delete [] d_residbufs[i]; + gri_fft_free(d_residbufs[i]); } } @@ -163,9 +168,15 @@ qtgui_time_sink_c::work(int noutput_items, // Fill up residbufs with d_size number of items for(n = 0; n < d_nconnections; n+=2) { in = (const gr_complex*)input_items[idx++]; - for(unsigned int k = 0; k < resid; k++) { - d_residbufs[n][d_index+k] = in[j+k].real(); - d_residbufs[n+1][d_index+k] = in[j+k].imag(); + if(is_unaligned()) { + volk_32fc_deinterleave_64f_x2_u(d_residbufs[n], + d_residbufs[n+1], + &in[j], resid); + } + else { + volk_32fc_deinterleave_64f_x2_a(d_residbufs[n], + d_residbufs[n+1], + &in[j], resid); } } @@ -184,9 +195,15 @@ qtgui_time_sink_c::work(int noutput_items, else { for(n = 0; n < d_nconnections; n+=2) { in = (const gr_complex*)input_items[idx++]; - for(unsigned int k = 0; k < datasize; k++) { - d_residbufs[n][d_index+k] = in[j+k].real(); - d_residbufs[n+1][d_index+k] = in[j+k].imag(); + if(is_unaligned()) { + volk_32fc_deinterleave_64f_x2_u(&d_residbufs[n][d_index], + &d_residbufs[n+1][d_index], + &in[j], resid); + } + else { + volk_32fc_deinterleave_64f_x2_a(&d_residbufs[n][d_index], + &d_residbufs[n+1][d_index], + &in[j], datasize); } } d_index += datasize; diff --git a/gr-qtgui/lib/qtgui_time_sink_f.cc b/gr-qtgui/lib/qtgui_time_sink_f.cc index 09fdf0a92e..24adab685f 100644 --- a/gr-qtgui/lib/qtgui_time_sink_f.cc +++ b/gr-qtgui/lib/qtgui_time_sink_f.cc @@ -27,6 +27,7 @@ #include <qtgui_time_sink_f.h> #include <gr_io_signature.h> #include <string.h> +#include <volk/volk.h> #include <QTimer> @@ -55,18 +56,22 @@ qtgui_time_sink_f::qtgui_time_sink_f (int size, double bw, d_index = 0; for(int i = 0; i < d_nconnections; i++) { - d_residbufs.push_back(new double[d_size]); + d_residbufs.push_back(gri_fft_malloc_double(d_size)); } + // Set alignment properties for VOLK + const int alignment_multiple = + volk_get_alignment() / sizeof(gr_complex); + set_alignment(std::max(1,alignment_multiple)); + initialize(); - set_output_multiple(d_size); } qtgui_time_sink_f::~qtgui_time_sink_f() { // d_main_gui is a qwidget destroyed with its parent for(int i = 0; i < d_nconnections; i++) { - delete [] d_residbufs[i]; + gri_fft_free(d_residbufs[i]); } } @@ -155,8 +160,13 @@ qtgui_time_sink_f::work (int noutput_items, // Fill up residbufs with d_size number of items for(n = 0; n < d_nconnections; n++) { in = (const float*)input_items[idx++]; - for(unsigned int k = 0; k < resid; k++) { - d_residbufs[n][d_index+k] = in[j+k]; + if(is_unaligned()) { + volk_32f_convert_64f_u(d_residbufs[n], + &in[j], resid); + } + else { + volk_32f_convert_64f_a(d_residbufs[n], + &in[j], resid); } } @@ -173,11 +183,15 @@ qtgui_time_sink_f::work (int noutput_items, // Otherwise, copy what we received into the residbufs for next time // because we set the output_multiple, this should never need to be called else { - assert(0); for(n = 0; n < d_nconnections; n++) { in = (const float*)input_items[idx++]; - for(unsigned int k = 0; k < resid; k++) { - d_residbufs[n][d_index+k] = in[j+k]; + if(is_unaligned()) { + volk_32f_convert_64f_u(&d_residbufs[n][d_index], + &in[j], datasize); + } + else { + volk_32f_convert_64f_a(&d_residbufs[n][d_index], + &in[j], datasize); } } d_index += datasize; diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc index 6244abb357..b58d5ba2ab 100644 --- a/volk/apps/volk_profile.cc +++ b/volk/apps/volk_profile.cc @@ -45,6 +45,7 @@ int main(int argc, char *argv[]) { VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000, &results); VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_u, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 204600, 10000, &results); VOLK_PROFILE(volk_32fc_deinterleave_imag_32f_a, 1e-4, 0, 204600, 5000, &results); VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 204600, 5000, &results); diff --git a/volk/include/volk/volk_32fc_deinterleave_64f_x2_u.h b/volk/include/volk/volk_32fc_deinterleave_64f_x2_u.h new file mode 100644 index 0000000000..d6f5dc111d --- /dev/null +++ b/volk/include/volk/volk_32fc_deinterleave_64f_x2_u.h @@ -0,0 +1,78 @@ +#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_u_H +#define INCLUDED_volk_32fc_deinterleave_64f_x2_u_H + +#include <inttypes.h> +#include <stdio.h> + +#ifdef LV_HAVE_SSE2 +#include <emmintrin.h> +/*! + \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data + \param complexVector The complex input vector + \param iBuffer The I buffer output data + \param qBuffer The Q buffer output data + \param num_points The number of complex data values to be deinterleaved +*/ +static inline void volk_32fc_deinterleave_64f_x2_u_sse2(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ + unsigned int number = 0; + + const float* complexVectorPtr = (float*)complexVector; + double* iBufferPtr = iBuffer; + double* qBufferPtr = qBuffer; + + const unsigned int halfPoints = num_points / 2; + __m128 cplxValue, fVal; + __m128d dVal; + + for(;number < halfPoints; number++){ + + cplxValue = _mm_load_ps(complexVectorPtr); + complexVectorPtr += 4; + + // Arrange in i1i2i1i2 format + fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2,0,2,0)); + dVal = _mm_cvtps_pd(fVal); + _mm_store_pd(iBufferPtr, dVal); + + // Arrange in q1q2q1q2 format + fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3,1,3,1)); + dVal = _mm_cvtps_pd(fVal); + _mm_store_pd(qBufferPtr, dVal); + + iBufferPtr += 2; + qBufferPtr += 2; + } + + number = halfPoints * 2; + for(; number < num_points; number++){ + *iBufferPtr++ = *complexVectorPtr++; + *qBufferPtr++ = *complexVectorPtr++; + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data + \param complexVector The complex input vector + \param iBuffer The I buffer output data + \param qBuffer The Q buffer output data + \param num_points The number of complex data values to be deinterleaved +*/ +static inline void volk_32fc_deinterleave_64f_x2_u_generic(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ + unsigned int number = 0; + const float* complexVectorPtr = (float*)complexVector; + double* iBufferPtr = iBuffer; + double* qBufferPtr = qBuffer; + + for(number = 0; number < num_points; number++){ + *iBufferPtr++ = (double)*complexVectorPtr++; + *qBufferPtr++ = (double)*complexVectorPtr++; + } +} +#endif /* LV_HAVE_GENERIC */ + + + + +#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_u_H */ diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index f0011190e1..5f13def09d 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -32,6 +32,7 @@ VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a, 1e-4, 10.0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_u, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 20460, 1); VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_deinterleave_imag_32f_a, 1e-4, 0, 20460, 1); |