summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Rondeau <trondeau@vt.edu>2012-07-03 20:35:28 -0400
committerTom Rondeau <trondeau@vt.edu>2012-07-03 20:35:28 -0400
commitbf665eeb45e6816202e146eeff059a1ac81b73e2 (patch)
tree563e065866891d708a94f1298004dc65335f7b09
parent1b6152005ca48614942bb8260d340c3dba039c11 (diff)
qtgui: using volk to handle conversion/decomposing float/complex to double buffers.
Adding an unaligned 32fc_deinterleave_64f_x2 volk kernel to support this.
-rw-r--r--gr-qtgui/lib/CMakeLists.txt4
-rw-r--r--gr-qtgui/lib/qtgui_const_sink_c.cc37
-rw-r--r--gr-qtgui/lib/qtgui_freq_sink_c.cc23
-rw-r--r--gr-qtgui/lib/qtgui_freq_sink_f.cc23
-rw-r--r--gr-qtgui/lib/qtgui_time_sink_c.cc33
-rw-r--r--gr-qtgui/lib/qtgui_time_sink_f.cc30
-rw-r--r--volk/apps/volk_profile.cc1
-rw-r--r--volk/include/volk/volk_32fc_deinterleave_64f_x2_u.h78
-rw-r--r--volk/lib/testqa.cc1
9 files changed, 180 insertions, 50 deletions
diff --git a/gr-qtgui/lib/CMakeLists.txt b/gr-qtgui/lib/CMakeLists.txt
index 774944cf49..812342e63a 100644
--- a/gr-qtgui/lib/CMakeLists.txt
+++ b/gr-qtgui/lib/CMakeLists.txt
@@ -82,6 +82,9 @@ include_directories(
include_directories(${Boost_INCLUDE_DIRS})
link_directories(${Boost_LIBRARY_DIRS})
+include_directories(${FFTW3F_INCLUDE_DIRS})
+link_directories(${FFTW3F_LIBRARY_DIRS})
+
include_directories(${QWT_INCLUDE_DIRS})
link_directories(${QWT_LIBRARY_DIRS})
@@ -97,6 +100,7 @@ list(APPEND qtgui_libs
${QT_LIBRARIES}
${QWT_LIBRARIES}
${PYTHON_LIBRARIES}
+ ${FFTW3F_LIBRARIES}
)
add_definitions(-DQWT_DLL) #setup QWT library linkage
diff --git a/gr-qtgui/lib/qtgui_const_sink_c.cc b/gr-qtgui/lib/qtgui_const_sink_c.cc
index 64260e88f5..df5e77ce45 100644
--- a/gr-qtgui/lib/qtgui_const_sink_c.cc
+++ b/gr-qtgui/lib/qtgui_const_sink_c.cc
@@ -56,10 +56,15 @@ qtgui_const_sink_c::qtgui_const_sink_c(int size,
d_index = 0;
for(int i = 0; i < d_nconnections; i++) {
- d_residbufs_real.push_back(new double[d_size]);
- d_residbufs_imag.push_back(new double[d_size]);
+ d_residbufs_real.push_back(gri_fft_malloc_double(d_size));
+ d_residbufs_imag.push_back(gri_fft_malloc_double(d_size));
}
+ // Set alignment properties for VOLK
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(gr_complex);
+ set_alignment(std::max(1,alignment_multiple));
+
initialize();
}
@@ -67,8 +72,8 @@ qtgui_const_sink_c::~qtgui_const_sink_c()
{
// d_main_gui is a qwidget destroyed with its parent
for(int i = 0; i < d_nconnections; i++) {
- delete [] d_residbufs_real[i];
- delete [] d_residbufs_imag[i];
+ gri_fft_free(d_residbufs_real[i]);
+ gri_fft_free(d_residbufs_imag[i]);
}
}
@@ -159,9 +164,15 @@ qtgui_const_sink_c::work(int noutput_items,
// Fill up residbufs with d_size number of items
for(n = 0; n < d_nconnections; n++) {
in = (const gr_complex*)input_items[idx++];
- for(unsigned int k = 0; k < resid; k++) {
- d_residbufs_real[n][d_index+k] = (double)in[j+k].real();
- d_residbufs_imag[n][d_index+k] = (double)in[j+k].imag();
+ if(is_unaligned()) {
+ volk_32fc_deinterleave_64f_x2_u(d_residbufs_real[n],
+ d_residbufs_imag[n],
+ &in[j], resid);
+ }
+ else {
+ volk_32fc_deinterleave_64f_x2_a(d_residbufs_real[n],
+ d_residbufs_imag[n],
+ &in[j], resid);
}
}
@@ -182,9 +193,15 @@ qtgui_const_sink_c::work(int noutput_items,
else {
for(n = 0; n < d_nconnections; n++) {
in = (const gr_complex*)input_items[idx++];
- for(unsigned int k = 0; k < datasize; k++) {
- d_residbufs_real[n][d_index+k] = in[j+k].real();
- d_residbufs_imag[n][d_index+k] = in[j+k].imag();
+ if(is_unaligned()) {
+ volk_32fc_deinterleave_64f_x2_u(&d_residbufs_real[n][d_index],
+ &d_residbufs_imag[n][d_index],
+ &in[j], resid);
+ }
+ else {
+ volk_32fc_deinterleave_64f_x2_a(&d_residbufs_real[n][d_index],
+ &d_residbufs_imag[n][d_index],
+ &in[j], resid);
}
}
d_index += datasize;
diff --git a/gr-qtgui/lib/qtgui_freq_sink_c.cc b/gr-qtgui/lib/qtgui_freq_sink_c.cc
index fb935cfda1..59a01ff9c3 100644
--- a/gr-qtgui/lib/qtgui_freq_sink_c.cc
+++ b/gr-qtgui/lib/qtgui_freq_sink_c.cc
@@ -68,8 +68,8 @@ qtgui_freq_sink_c::qtgui_freq_sink_c(int fftsize, int wintype,
d_index = 0;
for(int i = 0; i < d_nconnections; i++) {
- d_residbufs.push_back(new gr_complex[d_fftsize]);
- d_magbufs.push_back(new double[d_fftsize]);
+ d_residbufs.push_back(gri_fft_malloc_complex(d_fftsize));
+ d_magbufs.push_back(gri_fft_malloc_double(d_fftsize));
}
buildwindow();
@@ -80,8 +80,8 @@ qtgui_freq_sink_c::qtgui_freq_sink_c(int fftsize, int wintype,
qtgui_freq_sink_c::~qtgui_freq_sink_c()
{
for(int i = 0; i < d_nconnections; i++) {
- delete [] d_residbufs[i];
- delete [] d_magbufs[i];
+ gri_fft_free(d_residbufs[i]);
+ gri_fft_free(d_magbufs[i]);
}
delete d_fft;
}
@@ -232,11 +232,11 @@ qtgui_freq_sink_c::fftresize()
// Resize residbuf and replace data
for(int i = 0; i < d_nconnections; i++) {
- delete [] d_residbufs[i];
- delete [] d_magbufs[i];
+ gri_fft_free(d_residbufs[i]);
+ gri_fft_free(d_magbufs[i]);
- d_residbufs.push_back(new gr_complex[newfftsize]);
- d_magbufs.push_back(new double[newfftsize]);
+ d_residbufs.push_back(gri_fft_malloc_complex(newfftsize));
+ d_magbufs.push_back(gri_fft_malloc_double(newfftsize));
}
// Set new fft size and reset buffer index
@@ -272,17 +272,16 @@ qtgui_freq_sink_c::work(int noutput_items,
// If we have enough input for one full FFT, do it
if(datasize >= resid) {
- float *fbuf = new float[d_fftsize];
+ float *fbuf = gri_fft_malloc_float(d_fftsize);
for(int n = 0; n < d_nconnections; n++) {
// Fill up residbuf with d_fftsize number of items
in = (const gr_complex*)input_items[n];
memcpy(d_residbufs[n]+d_index, &in[j], sizeof(gr_complex)*resid);
fft(fbuf, d_residbufs[n], d_fftsize);
- for(int x=0; x < d_fftsize; x++)
- d_magbufs[n][x] = (double)fbuf[x];
+ volk_32f_convert_64f_a(d_magbufs[n], fbuf, d_fftsize);
}
- delete [] fbuf;
+ gri_fft_free(fbuf);
if(gruel::high_res_timer_now() - d_last_time > d_update_time) {
d_last_time = gruel::high_res_timer_now();
diff --git a/gr-qtgui/lib/qtgui_freq_sink_f.cc b/gr-qtgui/lib/qtgui_freq_sink_f.cc
index 39ae336380..6a2878cebe 100644
--- a/gr-qtgui/lib/qtgui_freq_sink_f.cc
+++ b/gr-qtgui/lib/qtgui_freq_sink_f.cc
@@ -68,8 +68,8 @@ qtgui_freq_sink_f::qtgui_freq_sink_f(int fftsize, int wintype,
d_index = 0;
for(int i = 0; i < d_nconnections; i++) {
- d_residbufs.push_back(new float[d_fftsize]);
- d_magbufs.push_back(new double[d_fftsize]);
+ d_residbufs.push_back(gri_fft_malloc_float(d_fftsize));
+ d_magbufs.push_back(gri_fft_malloc_double(d_fftsize));
}
buildwindow();
@@ -80,8 +80,8 @@ qtgui_freq_sink_f::qtgui_freq_sink_f(int fftsize, int wintype,
qtgui_freq_sink_f::~qtgui_freq_sink_f()
{
for(int i = 0; i < d_nconnections; i++) {
- delete [] d_residbufs[i];
- delete [] d_magbufs[i];
+ gri_fft_free(d_residbufs[i]);
+ gri_fft_free(d_magbufs[i]);
}
delete d_fft;
}
@@ -222,11 +222,11 @@ qtgui_freq_sink_f::fftresize()
// Resize residbuf and replace data
for(int i = 0; i < d_nconnections; i++) {
- delete [] d_residbufs[i];
- delete [] d_magbufs[i];
+ gri_fft_free(d_residbufs[i]);
+ gri_fft_free(d_magbufs[i]);
- d_residbufs.push_back(new float[newfftsize]);
- d_magbufs.push_back(new double[newfftsize]);
+ d_residbufs.push_back(gri_fft_malloc_float(newfftsize));
+ d_magbufs.push_back(gri_fft_malloc_double(newfftsize));
}
// Set new fft size and reset buffer index
@@ -262,17 +262,16 @@ qtgui_freq_sink_f::work(int noutput_items,
// If we have enough input for one full FFT, do it
if(datasize >= resid) {
- float *fbuf = new float[d_fftsize];
+ float *fbuf = gri_fft_malloc_float(d_fftsize);
for(int n = 0; n < d_nconnections; n++) {
// Fill up residbuf with d_fftsize number of items
in = (const float*)input_items[n];
memcpy(d_residbufs[n]+d_index, &in[j], sizeof(float)*resid);
fft(fbuf, d_residbufs[n], d_fftsize);
- for(int x=0; x < d_fftsize; x++)
- d_magbufs[n][x] = (double)fbuf[x];
+ volk_32f_convert_64f_a(d_magbufs[n], fbuf, d_fftsize);
}
- delete [] fbuf;
+ gri_fft_free(fbuf);
if(gruel::high_res_timer_now() - d_last_time > d_update_time) {
d_last_time = gruel::high_res_timer_now();
diff --git a/gr-qtgui/lib/qtgui_time_sink_c.cc b/gr-qtgui/lib/qtgui_time_sink_c.cc
index e1a2e1eb0f..b922e059a3 100644
--- a/gr-qtgui/lib/qtgui_time_sink_c.cc
+++ b/gr-qtgui/lib/qtgui_time_sink_c.cc
@@ -56,9 +56,14 @@ qtgui_time_sink_c::qtgui_time_sink_c(int size, double bw,
d_index = 0;
for(int i = 0; i < d_nconnections; i++) {
- d_residbufs.push_back(new double[d_size]);
+ d_residbufs.push_back(gri_fft_malloc_double(d_size));
}
+ // Set alignment properties for VOLK
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(gr_complex);
+ set_alignment(std::max(1,alignment_multiple));
+
initialize();
}
@@ -66,7 +71,7 @@ qtgui_time_sink_c::~qtgui_time_sink_c()
{
// d_main_gui is a qwidget destroyed with its parent
for(int i = 0; i < d_nconnections; i++) {
- delete [] d_residbufs[i];
+ gri_fft_free(d_residbufs[i]);
}
}
@@ -163,9 +168,15 @@ qtgui_time_sink_c::work(int noutput_items,
// Fill up residbufs with d_size number of items
for(n = 0; n < d_nconnections; n+=2) {
in = (const gr_complex*)input_items[idx++];
- for(unsigned int k = 0; k < resid; k++) {
- d_residbufs[n][d_index+k] = in[j+k].real();
- d_residbufs[n+1][d_index+k] = in[j+k].imag();
+ if(is_unaligned()) {
+ volk_32fc_deinterleave_64f_x2_u(d_residbufs[n],
+ d_residbufs[n+1],
+ &in[j], resid);
+ }
+ else {
+ volk_32fc_deinterleave_64f_x2_a(d_residbufs[n],
+ d_residbufs[n+1],
+ &in[j], resid);
}
}
@@ -184,9 +195,15 @@ qtgui_time_sink_c::work(int noutput_items,
else {
for(n = 0; n < d_nconnections; n+=2) {
in = (const gr_complex*)input_items[idx++];
- for(unsigned int k = 0; k < datasize; k++) {
- d_residbufs[n][d_index+k] = in[j+k].real();
- d_residbufs[n+1][d_index+k] = in[j+k].imag();
+ if(is_unaligned()) {
+ volk_32fc_deinterleave_64f_x2_u(&d_residbufs[n][d_index],
+ &d_residbufs[n+1][d_index],
+ &in[j], resid);
+ }
+ else {
+ volk_32fc_deinterleave_64f_x2_a(&d_residbufs[n][d_index],
+ &d_residbufs[n+1][d_index],
+ &in[j], datasize);
}
}
d_index += datasize;
diff --git a/gr-qtgui/lib/qtgui_time_sink_f.cc b/gr-qtgui/lib/qtgui_time_sink_f.cc
index 09fdf0a92e..24adab685f 100644
--- a/gr-qtgui/lib/qtgui_time_sink_f.cc
+++ b/gr-qtgui/lib/qtgui_time_sink_f.cc
@@ -27,6 +27,7 @@
#include <qtgui_time_sink_f.h>
#include <gr_io_signature.h>
#include <string.h>
+#include <volk/volk.h>
#include <QTimer>
@@ -55,18 +56,22 @@ qtgui_time_sink_f::qtgui_time_sink_f (int size, double bw,
d_index = 0;
for(int i = 0; i < d_nconnections; i++) {
- d_residbufs.push_back(new double[d_size]);
+ d_residbufs.push_back(gri_fft_malloc_double(d_size));
}
+ // Set alignment properties for VOLK
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(gr_complex);
+ set_alignment(std::max(1,alignment_multiple));
+
initialize();
- set_output_multiple(d_size);
}
qtgui_time_sink_f::~qtgui_time_sink_f()
{
// d_main_gui is a qwidget destroyed with its parent
for(int i = 0; i < d_nconnections; i++) {
- delete [] d_residbufs[i];
+ gri_fft_free(d_residbufs[i]);
}
}
@@ -155,8 +160,13 @@ qtgui_time_sink_f::work (int noutput_items,
// Fill up residbufs with d_size number of items
for(n = 0; n < d_nconnections; n++) {
in = (const float*)input_items[idx++];
- for(unsigned int k = 0; k < resid; k++) {
- d_residbufs[n][d_index+k] = in[j+k];
+ if(is_unaligned()) {
+ volk_32f_convert_64f_u(d_residbufs[n],
+ &in[j], resid);
+ }
+ else {
+ volk_32f_convert_64f_a(d_residbufs[n],
+ &in[j], resid);
}
}
@@ -173,11 +183,15 @@ qtgui_time_sink_f::work (int noutput_items,
// Otherwise, copy what we received into the residbufs for next time
// because we set the output_multiple, this should never need to be called
else {
- assert(0);
for(n = 0; n < d_nconnections; n++) {
in = (const float*)input_items[idx++];
- for(unsigned int k = 0; k < resid; k++) {
- d_residbufs[n][d_index+k] = in[j+k];
+ if(is_unaligned()) {
+ volk_32f_convert_64f_u(&d_residbufs[n][d_index],
+ &in[j], datasize);
+ }
+ else {
+ volk_32f_convert_64f_a(&d_residbufs[n][d_index],
+ &in[j], datasize);
}
}
d_index += datasize;
diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index 6244abb357..b58d5ba2ab 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -45,6 +45,7 @@ int main(int argc, char *argv[]) {
VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000, &results);
VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 204600, 1000, &results);
VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_u, 1e-4, 0, 204600, 1000, &results);
VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 204600, 10000, &results);
VOLK_PROFILE(volk_32fc_deinterleave_imag_32f_a, 1e-4, 0, 204600, 5000, &results);
VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 204600, 5000, &results);
diff --git a/volk/include/volk/volk_32fc_deinterleave_64f_x2_u.h b/volk/include/volk/volk_32fc_deinterleave_64f_x2_u.h
new file mode 100644
index 0000000000..d6f5dc111d
--- /dev/null
+++ b/volk/include/volk/volk_32fc_deinterleave_64f_x2_u.h
@@ -0,0 +1,78 @@
+#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
+#define INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#ifdef LV_HAVE_SSE2
+#include <emmintrin.h>
+/*!
+ \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data
+ \param complexVector The complex input vector
+ \param iBuffer The I buffer output data
+ \param qBuffer The Q buffer output data
+ \param num_points The number of complex data values to be deinterleaved
+*/
+static inline void volk_32fc_deinterleave_64f_x2_u_sse2(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+
+ const float* complexVectorPtr = (float*)complexVector;
+ double* iBufferPtr = iBuffer;
+ double* qBufferPtr = qBuffer;
+
+ const unsigned int halfPoints = num_points / 2;
+ __m128 cplxValue, fVal;
+ __m128d dVal;
+
+ for(;number < halfPoints; number++){
+
+ cplxValue = _mm_load_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ // Arrange in i1i2i1i2 format
+ fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2,0,2,0));
+ dVal = _mm_cvtps_pd(fVal);
+ _mm_store_pd(iBufferPtr, dVal);
+
+ // Arrange in q1q2q1q2 format
+ fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3,1,3,1));
+ dVal = _mm_cvtps_pd(fVal);
+ _mm_store_pd(qBufferPtr, dVal);
+
+ iBufferPtr += 2;
+ qBufferPtr += 2;
+ }
+
+ number = halfPoints * 2;
+ for(; number < num_points; number++){
+ *iBufferPtr++ = *complexVectorPtr++;
+ *qBufferPtr++ = *complexVectorPtr++;
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data
+ \param complexVector The complex input vector
+ \param iBuffer The I buffer output data
+ \param qBuffer The Q buffer output data
+ \param num_points The number of complex data values to be deinterleaved
+*/
+static inline void volk_32fc_deinterleave_64f_x2_u_generic(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+ const float* complexVectorPtr = (float*)complexVector;
+ double* iBufferPtr = iBuffer;
+ double* qBufferPtr = qBuffer;
+
+ for(number = 0; number < num_points; number++){
+ *iBufferPtr++ = (double)*complexVectorPtr++;
+ *qBufferPtr++ = (double)*complexVectorPtr++;
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+
+
+#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_u_H */
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index f0011190e1..5f13def09d 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -32,6 +32,7 @@ VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a, 1e-4, 10.0, 20460, 1);
VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_u, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 20460, 1);
VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32fc_deinterleave_imag_32f_a, 1e-4, 0, 20460, 1);