diff options
-rw-r--r-- | gr-filter/lib/fft_filter.cc | 16 | ||||
-rw-r--r-- | gr-filter/lib/fir_filter.cc | 60 | ||||
-rw-r--r-- | gr-filter/lib/fir_filter_with_buffer.cc | 49 | ||||
-rw-r--r-- | gr-filter/lib/qa_fir_filter_with_buffer.cc | 64 | ||||
-rw-r--r-- | gr-filter/lib/qa_mmse_fir_interpolator_cc.cc | 6 | ||||
-rw-r--r-- | gr-filter/lib/qa_mmse_fir_interpolator_ff.cc | 5 | ||||
-rw-r--r-- | gr-qtgui/lib/const_sink_c_impl.cc | 20 | ||||
-rw-r--r-- | gr-qtgui/lib/freq_sink_c_impl.cc | 30 | ||||
-rw-r--r-- | gr-qtgui/lib/freq_sink_f_impl.cc | 30 | ||||
-rw-r--r-- | gr-qtgui/lib/histogram_sink_f_impl.cc | 10 | ||||
-rw-r--r-- | gr-qtgui/lib/time_raster_sink_b_impl.cc | 20 | ||||
-rw-r--r-- | gr-qtgui/lib/time_raster_sink_f_impl.cc | 20 | ||||
-rw-r--r-- | gr-qtgui/lib/time_sink_c_impl.cc | 10 | ||||
-rw-r--r-- | gr-qtgui/lib/time_sink_f_impl.cc | 10 | ||||
-rw-r--r-- | gr-qtgui/lib/waterfall_sink_c_impl.cc | 30 | ||||
-rw-r--r-- | gr-qtgui/lib/waterfall_sink_f_impl.cc | 30 | ||||
-rw-r--r-- | volk/CMakeLists.txt | 1 | ||||
-rw-r--r-- | volk/include/volk/volk_malloc.h | 66 | ||||
-rw-r--r-- | volk/lib/CMakeLists.txt | 1 | ||||
-rw-r--r-- | volk/lib/volk_malloc.c | 176 | ||||
-rw-r--r-- | volk/tmpl/volk.tmpl.h | 1 |
21 files changed, 479 insertions, 176 deletions
diff --git a/gr-filter/lib/fft_filter.cc b/gr-filter/lib/fft_filter.cc index 829375795a..b45344d81e 100644 --- a/gr-filter/lib/fft_filter.cc +++ b/gr-filter/lib/fft_filter.cc @@ -48,7 +48,8 @@ namespace gr { { delete d_fwdfft; delete d_invfft; - fft::free(d_xformed_taps); + if(d_xformed_taps != NULL) + volk_free(d_xformed_taps); } /* @@ -106,9 +107,12 @@ namespace gr { if(d_fftsize != old_fftsize) { delete d_fwdfft; delete d_invfft; + if(d_xformed_taps != NULL) + volk_free(d_xformed_taps); d_fwdfft = new fft::fft_real_fwd(d_fftsize); d_invfft = new fft::fft_real_rev(d_fftsize); - d_xformed_taps = fft::malloc_complex(d_fftsize/2+1); + d_xformed_taps = (gr_complex*)volk_malloc(sizeof(gr_complex)*d_fftsize/2+1, + volk_get_alignment()); } } @@ -201,7 +205,8 @@ namespace gr { { delete d_fwdfft; delete d_invfft; - fft::free(d_xformed_taps); + if(d_xformed_taps != NULL) + volk_free(d_xformed_taps); } /* @@ -259,9 +264,12 @@ namespace gr { if(d_fftsize != old_fftsize) { delete d_fwdfft; delete d_invfft; + if(d_xformed_taps != NULL) + volk_free(d_xformed_taps); d_fwdfft = new fft::fft_complex(d_fftsize, true, d_nthreads); d_invfft = new fft::fft_complex(d_fftsize, false, d_nthreads); - d_xformed_taps = fft::malloc_complex(d_fftsize); + d_xformed_taps = (gr_complex*)volk_malloc(sizeof(gr_complex)*d_fftsize, + volk_get_alignment()); } } diff --git a/gr-filter/lib/fir_filter.cc b/gr-filter/lib/fir_filter.cc index 552936b8f6..e322cfb129 100644 --- a/gr-filter/lib/fir_filter.cc +++ b/gr-filter/lib/fir_filter.cc @@ -40,7 +40,7 @@ namespace gr { set_taps(taps); // Make sure the output sample is always aligned, too. - d_output = fft::malloc_float(1); + d_output = (float*)volk_malloc(1*sizeof(float), d_align); } fir_filter_fff::~fir_filter_fff() @@ -48,14 +48,14 @@ namespace gr { // Free all aligned taps if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; } // Free output sample - fft::free(d_output); + volk_free(d_output); } void @@ -64,7 +64,7 @@ namespace gr { // Free the taps if already allocated if(d_aligned_taps!= NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; @@ -77,7 +77,7 @@ namespace gr { // Make a set of taps at all possible arch alignments d_aligned_taps = (float**)malloc(d_naligned*sizeof(float*)); for(int i = 0; i < d_naligned; i++) { - d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1); + d_aligned_taps[i] = (float*)volk_malloc((d_ntaps+d_naligned-1)*sizeof(float), d_align); memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1)); for(unsigned int j = 0; j < d_ntaps; j++) d_aligned_taps[i][i+j] = d_taps[j]; @@ -154,7 +154,7 @@ namespace gr { set_taps(taps); // Make sure the output sample is always aligned, too. - d_output = fft::malloc_complex(1); + d_output = (gr_complex*)volk_malloc(1*sizeof(gr_complex), d_align); } fir_filter_ccf::~fir_filter_ccf() @@ -162,14 +162,14 @@ namespace gr { // Free all aligned taps if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; } // Free output sample - fft::free(d_output); + volk_free(d_output); } void @@ -178,7 +178,7 @@ namespace gr { // Free the taps if already allocated if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; @@ -191,7 +191,7 @@ namespace gr { // Make a set of taps at all possible arch alignments d_aligned_taps = (float**)malloc(d_naligned*sizeof(float*)); for(int i = 0; i < d_naligned; i++) { - d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1); + d_aligned_taps[i] = (float*)volk_malloc((d_ntaps+d_naligned-1)*sizeof(float), d_align); memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1)); for(unsigned int j = 0; j < d_ntaps; j++) d_aligned_taps[i][i+j] = d_taps[j]; @@ -270,7 +270,7 @@ namespace gr { set_taps(taps); // Make sure the output sample is always aligned, too. - d_output = fft::malloc_complex(1); + d_output = (gr_complex*)volk_malloc(1*sizeof(gr_complex), d_align); } fir_filter_fcc::~fir_filter_fcc() @@ -278,14 +278,14 @@ namespace gr { // Free all aligned taps if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; } // Free output sample - fft::free(d_output); + volk_free(d_output); } void @@ -294,7 +294,7 @@ namespace gr { // Free the taps if already allocated if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; @@ -307,7 +307,7 @@ namespace gr { // Make a set of taps at all possible arch alignments d_aligned_taps = (gr_complex**)malloc(d_naligned*sizeof(gr_complex*)); for(int i = 0; i < d_naligned; i++) { - d_aligned_taps[i] = fft::malloc_complex(d_ntaps+d_naligned-1); + d_aligned_taps[i] = (gr_complex*)volk_malloc((d_ntaps+d_naligned-1)*sizeof(gr_complex), d_align); memset(d_aligned_taps[i], 0, sizeof(gr_complex)*(d_ntaps+d_naligned-1)); for(unsigned int j = 0; j < d_ntaps; j++) d_aligned_taps[i][i+j] = d_taps[j]; @@ -386,7 +386,7 @@ namespace gr { set_taps(taps); // Make sure the output sample is always aligned, too. - d_output = fft::malloc_complex(1); + d_output = (gr_complex*)volk_malloc(1*sizeof(gr_complex), d_align); } fir_filter_ccc::~fir_filter_ccc() @@ -394,14 +394,14 @@ namespace gr { // Free all aligned taps if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; } // Free output sample - fft::free(d_output); + volk_free(d_output); } void @@ -410,7 +410,7 @@ namespace gr { // Free the taps if already allocated if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; @@ -423,7 +423,7 @@ namespace gr { // Make a set of taps at all possible arch alignments d_aligned_taps = (gr_complex**)malloc(d_naligned*sizeof(gr_complex*)); for(int i = 0; i < d_naligned; i++) { - d_aligned_taps[i] = fft::malloc_complex(d_ntaps+d_naligned-1); + d_aligned_taps[i] = (gr_complex*)volk_malloc((d_ntaps+d_naligned-1)*sizeof(gr_complex), d_align); memset(d_aligned_taps[i], 0, sizeof(gr_complex)*(d_ntaps+d_naligned-1)); for(unsigned int j = 0; j < d_ntaps; j++) d_aligned_taps[i][i+j] = d_taps[j]; @@ -500,7 +500,7 @@ namespace gr { set_taps(taps); // Make sure the output sample is always aligned, too. - d_output = fft::malloc_complex(1); + d_output = (gr_complex*)volk_malloc(1*sizeof(gr_complex), d_align); } fir_filter_scc::~fir_filter_scc() @@ -508,14 +508,14 @@ namespace gr { // Free all aligned taps if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; } // Free output sample - fft::free(d_output); + volk_free(d_output); } void @@ -524,7 +524,7 @@ namespace gr { // Free the taps if already allocated if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; @@ -537,7 +537,7 @@ namespace gr { // Make a set of taps at all possible arch alignments d_aligned_taps = (gr_complex**)malloc(d_naligned*sizeof(gr_complex*)); for(int i = 0; i < d_naligned; i++) { - d_aligned_taps[i] = fft::malloc_complex(d_ntaps+d_naligned-1); + d_aligned_taps[i] = (gr_complex*)volk_malloc((d_ntaps+d_naligned-1)*sizeof(gr_complex), d_align); memset(d_aligned_taps[i], 0, sizeof(gr_complex)*(d_ntaps+d_naligned-1)); for(unsigned int j = 0; j < d_ntaps; j++) d_aligned_taps[i][i+j] = d_taps[j]; @@ -615,7 +615,7 @@ namespace gr { set_taps(taps); // Make sure the output sample is always aligned, too. - d_output = (short*)fft::malloc_float(1); + d_output = (short*)volk_malloc(1*sizeof(short), d_align); } fir_filter_fsf::~fir_filter_fsf() @@ -623,14 +623,14 @@ namespace gr { // Free all aligned taps if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; } // Free output sample - fft::free(d_output); + volk_free(d_output); } void @@ -639,7 +639,7 @@ namespace gr { // Free the taps if already allocated if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; @@ -652,7 +652,7 @@ namespace gr { // Make a set of taps at all possible arch alignments d_aligned_taps = (float**)malloc(d_naligned*sizeof(float*)); for(int i = 0; i < d_naligned; i++) { - d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1); + d_aligned_taps[i] = (float*)volk_malloc((d_ntaps+d_naligned-1)*sizeof(float), d_align); memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1)); for(unsigned int j = 0; j < d_ntaps; j++) d_aligned_taps[i][i+j] = d_taps[j]; diff --git a/gr-filter/lib/fir_filter_with_buffer.cc b/gr-filter/lib/fir_filter_with_buffer.cc index 9953d48dd5..4191dc3932 100644 --- a/gr-filter/lib/fir_filter_with_buffer.cc +++ b/gr-filter/lib/fir_filter_with_buffer.cc @@ -45,41 +45,41 @@ namespace gr { set_taps(taps); // Make sure the output sample is always aligned, too. - d_output = fft::malloc_float(1); + d_output = (float*)volk_malloc(1*sizeof(float), d_align); } fir_filter_with_buffer_fff::~fir_filter_with_buffer_fff() { if(d_buffer_ptr != NULL) { - fft::free(d_buffer_ptr); + volk_free(d_buffer_ptr); d_buffer_ptr = NULL; } // Free aligned taps if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; } // Free output sample - fft::free(d_output); + volk_free(d_output); } void fir_filter_with_buffer_fff::set_taps(const std::vector<float> &taps) { if(d_buffer_ptr != NULL) { - fft::free(d_buffer_ptr); + volk_free(d_buffer_ptr); d_buffer_ptr = NULL; } // Free the taps if already allocated if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; @@ -95,14 +95,15 @@ namespace gr { // problems). We then set d_buffer to the position in the // d_buffer_ptr such that we only touch the internally // allocated space. - d_buffer_ptr = fft::malloc_float(2*(d_ntaps + d_naligned)); + d_buffer_ptr = (float*)volk_malloc((2*(d_ntaps + d_naligned))*sizeof(float), d_align); + memset(d_buffer_ptr, 0, 2*(d_ntaps + d_naligned)*sizeof(float)); d_buffer = d_buffer_ptr + d_naligned; // Allocate aligned taps d_aligned_taps = (float**)malloc(d_naligned*sizeof(float*)); for(int i = 0; i < d_naligned; i++) { - d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1); + d_aligned_taps[i] = (float*)volk_malloc((d_ntaps+d_naligned-1)*sizeof(float), d_align); memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1)); for(unsigned int j = 0; j < d_ntaps; j++) d_aligned_taps[i][i+j] = d_taps[j]; @@ -198,41 +199,41 @@ namespace gr { set_taps(taps); // Make sure the output sample is always aligned, too. - d_output = fft::malloc_complex(1); + d_output = (gr_complex*)volk_malloc(1*sizeof(gr_complex), d_align); } fir_filter_with_buffer_ccc::~fir_filter_with_buffer_ccc() { if(d_buffer_ptr != NULL) { - fft::free(d_buffer_ptr); + volk_free(d_buffer_ptr); d_buffer_ptr = NULL; } // Free aligned taps if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; } // Free output sample - fft::free(d_output); + volk_free(d_output); } void fir_filter_with_buffer_ccc::set_taps(const std::vector<gr_complex> &taps) { if(d_buffer_ptr != NULL) { - fft::free(d_buffer_ptr); + volk_free(d_buffer_ptr); d_buffer_ptr = NULL; } // Free the taps if already allocated if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; @@ -248,14 +249,14 @@ namespace gr { // problems). We then set d_buffer to the position in the // d_buffer_ptr such that we only touch the internally // allocated space. - d_buffer_ptr = fft::malloc_complex(2*(d_ntaps + d_naligned)); + d_buffer_ptr = (gr_complex*)volk_malloc((2*(d_ntaps + d_naligned))*sizeof(gr_complex), d_align); memset(d_buffer_ptr, 0, 2*(d_ntaps + d_naligned)*sizeof(gr_complex)); d_buffer = d_buffer_ptr + d_naligned; // Allocate aligned taps d_aligned_taps = (gr_complex**)malloc(d_naligned*sizeof(gr_complex*)); for(int i = 0; i < d_naligned; i++) { - d_aligned_taps[i] = fft::malloc_complex(d_ntaps+d_naligned-1); + d_aligned_taps[i] = (gr_complex*)volk_malloc((d_ntaps+d_naligned-1)*sizeof(gr_complex), d_align); memset(d_aligned_taps[i], 0, sizeof(gr_complex)*(d_ntaps+d_naligned-1)); for(unsigned int j = 0; j < d_ntaps; j++) d_aligned_taps[i][i+j] = d_taps[j]; @@ -351,41 +352,41 @@ namespace gr { set_taps(taps); // Make sure the output sample is always aligned, too. - d_output = fft::malloc_complex(1); + d_output = (gr_complex*)volk_malloc(1*sizeof(gr_complex), d_align); } fir_filter_with_buffer_ccf::~fir_filter_with_buffer_ccf() { if(d_buffer_ptr != NULL) { - fft::free(d_buffer_ptr); + volk_free(d_buffer_ptr); d_buffer_ptr = NULL; } // Free aligned taps if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; } // Free output sample - fft::free(d_output); + volk_free(d_output); } void fir_filter_with_buffer_ccf::set_taps(const std::vector<float> &taps) { if(d_buffer_ptr != NULL) { - fft::free(d_buffer_ptr); + volk_free(d_buffer_ptr); d_buffer_ptr = NULL; } // Free the taps if already allocated if(d_aligned_taps != NULL) { for(int i = 0; i < d_naligned; i++) { - fft::free(d_aligned_taps[i]); + volk_free(d_aligned_taps[i]); } ::free(d_aligned_taps); d_aligned_taps = NULL; @@ -401,14 +402,14 @@ namespace gr { // problems). We then set d_buffer to the position in the // d_buffer_ptr such that we only touch the internally // allocated space. - d_buffer_ptr = fft::malloc_complex(2*(d_ntaps + d_naligned)); + d_buffer_ptr = (gr_complex*)volk_malloc((2*(d_ntaps + d_naligned))*sizeof(gr_complex), d_align); memset(d_buffer_ptr, 0, 2*(d_ntaps + d_naligned)*sizeof(gr_complex)); d_buffer = d_buffer_ptr + d_naligned; // Allocate aligned taps d_aligned_taps = (float**)malloc(d_naligned*sizeof(float*)); for(int i = 0; i < d_naligned; i++) { - d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1); + d_aligned_taps[i] = (float*)volk_malloc((d_ntaps+d_naligned-1)*sizeof(float), d_align); memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1)); for(unsigned int j = 0; j < d_ntaps; j++) d_aligned_taps[i][i+j] = d_taps[j]; diff --git a/gr-filter/lib/qa_fir_filter_with_buffer.cc b/gr-filter/lib/qa_fir_filter_with_buffer.cc index 92aca73554..06d44f6eee 100644 --- a/gr-filter/lib/qa_fir_filter_with_buffer.cc +++ b/gr-filter/lib/qa_fir_filter_with_buffer.cc @@ -28,6 +28,7 @@ #include <qa_fir_filter_with_buffer.h> #include <gnuradio/filter/fir_filter_with_buffer.h> #include <gnuradio/fft/fft.h> +#include <volk/volk.h> #include <cppunit/TestAssert.h> #include <cmath> #include <gnuradio/random.h> @@ -110,13 +111,14 @@ namespace gr { const int MAX_TAPS = 29; const int OUTPUT_LEN = 37; const int INPUT_LEN = MAX_TAPS + OUTPUT_LEN; + size_t align = volk_get_alignment(); // Mem aligned buffer not really necessary, but why not? - i_type *input = fft::malloc_float(INPUT_LEN); - i_type *dline = fft::malloc_float(INPUT_LEN); - o_type *expected_output = fft::malloc_float(OUTPUT_LEN); - o_type *actual_output = fft::malloc_float(OUTPUT_LEN); - tap_type *taps = fft::malloc_float(MAX_TAPS); + i_type *input = (float*)volk_malloc(INPUT_LEN*sizeof(float), align); + i_type *dline = (float*)volk_malloc(INPUT_LEN*sizeof(float), align); + o_type *expected_output = (float*)volk_malloc(OUTPUT_LEN*sizeof(float), align); + o_type *actual_output = (float*)volk_malloc(OUTPUT_LEN*sizeof(float), align); + tap_type *taps = (float*)volk_malloc(MAX_TAPS*sizeof(float), align); srandom(0); // we want reproducibility memset(dline, 0, INPUT_LEN*sizeof(i_type)); @@ -163,11 +165,11 @@ namespace gr { delete f1; } } - fft::free(input); - fft::free(dline); - fft::free(expected_output); - fft::free(actual_output); - fft::free(taps); + volk_free(input); + volk_free(dline); + volk_free(expected_output); + volk_free(actual_output); + volk_free(taps); } } /* namespace fff */ @@ -225,13 +227,14 @@ namespace gr { const int MAX_TAPS = 29; const int OUTPUT_LEN = 37; const int INPUT_LEN = MAX_TAPS + OUTPUT_LEN; + size_t align = volk_get_alignment(); // Mem aligned buffer not really necessary, but why not? - i_type *input = fft::malloc_complex(INPUT_LEN); - i_type *dline = fft::malloc_complex(INPUT_LEN); - o_type *expected_output = fft::malloc_complex(OUTPUT_LEN); - o_type *actual_output = fft::malloc_complex(OUTPUT_LEN); - tap_type *taps = fft::malloc_complex(MAX_TAPS); + i_type *input = (gr_complex*)volk_malloc(INPUT_LEN*sizeof(gr_complex), align); + i_type *dline = (gr_complex*)volk_malloc(INPUT_LEN*sizeof(gr_complex), align); + o_type *expected_output = (gr_complex*)volk_malloc(OUTPUT_LEN*sizeof(gr_complex), align); + o_type *actual_output = (gr_complex*)volk_malloc(OUTPUT_LEN*sizeof(gr_complex), align); + tap_type *taps = (gr_complex*)volk_malloc(MAX_TAPS*sizeof(gr_complex), align); srandom(0); // we want reproducibility memset(dline, 0, INPUT_LEN*sizeof(i_type)); @@ -278,11 +281,11 @@ namespace gr { delete f1; } } - fft::free(input); - fft::free(dline); - fft::free(expected_output); - fft::free(actual_output); - fft::free(taps); + volk_free(input); + volk_free(dline); + volk_free(expected_output); + volk_free(actual_output); + volk_free(taps); } } /* namespace ccc */ @@ -340,13 +343,14 @@ namespace gr { const int MAX_TAPS = 29; const int OUTPUT_LEN = 37; const int INPUT_LEN = MAX_TAPS + OUTPUT_LEN; + size_t align = volk_get_alignment(); // Mem aligned buffer not really necessary, but why not? - i_type *input = fft::malloc_complex(INPUT_LEN); - i_type *dline = fft::malloc_complex(INPUT_LEN); - o_type *expected_output = fft::malloc_complex(OUTPUT_LEN); - o_type *actual_output = fft::malloc_complex(OUTPUT_LEN); - tap_type *taps = fft::malloc_float(MAX_TAPS); + i_type *input = (gr_complex*)volk_malloc(INPUT_LEN*sizeof(gr_complex), align); + i_type *dline = (gr_complex*)volk_malloc(INPUT_LEN*sizeof(gr_complex), align); + o_type *expected_output = (gr_complex*)volk_malloc(OUTPUT_LEN*sizeof(gr_complex), align); + o_type *actual_output = (gr_complex*)volk_malloc(OUTPUT_LEN*sizeof(gr_complex), align); + tap_type *taps = (float*)volk_malloc(MAX_TAPS*sizeof(float), align); srandom(0); // we want reproducibility memset(dline, 0, INPUT_LEN*sizeof(i_type)); @@ -393,11 +397,11 @@ namespace gr { delete f1; } } - fft::free(input); - fft::free(dline); - fft::free(expected_output); - fft::free(actual_output); - fft::free(taps); + volk_free(input); + volk_free(dline); + volk_free(expected_output); + volk_free(actual_output); + volk_free(taps); } } /* namespace ccf */ diff --git a/gr-filter/lib/qa_mmse_fir_interpolator_cc.cc b/gr-filter/lib/qa_mmse_fir_interpolator_cc.cc index 8d1ec533f7..8057e1d3ce 100644 --- a/gr-filter/lib/qa_mmse_fir_interpolator_cc.cc +++ b/gr-filter/lib/qa_mmse_fir_interpolator_cc.cc @@ -28,6 +28,7 @@ #include <qa_mmse_fir_interpolator_cc.h> #include <gnuradio/filter/mmse_fir_interpolator_cc.h> #include <gnuradio/fft/fft.h> +#include <volk/volk.h> #include <cstdio> #include <cmath> #include <stdexcept> @@ -61,7 +62,8 @@ namespace gr { qa_mmse_fir_interpolator_cc::t1() { static const unsigned N = 100; - gr_complex *input = fft::malloc_complex(N + 10); + gr_complex *input = (gr_complex*)volk_malloc((N + 10)*sizeof(gr_complex), + volk_get_alignment()); for(unsigned i = 0; i < N+10; i++) input[i] = test_fcn((double) i); @@ -78,7 +80,7 @@ namespace gr { // printf ("%9.6f %9.6f %9.6f\n", expected, actual, expected - actual); } } - fft::free(input); + volk_free(input); } /* diff --git a/gr-filter/lib/qa_mmse_fir_interpolator_ff.cc b/gr-filter/lib/qa_mmse_fir_interpolator_ff.cc index 9e9c6cfdd8..b2e3d34502 100644 --- a/gr-filter/lib/qa_mmse_fir_interpolator_ff.cc +++ b/gr-filter/lib/qa_mmse_fir_interpolator_ff.cc @@ -28,6 +28,7 @@ #include <qa_mmse_fir_interpolator_ff.h> #include <gnuradio/filter/mmse_fir_interpolator_ff.h> #include <gnuradio/fft/fft.h> +#include <volk/volk.h> #include <cstdio> #include <cmath> @@ -47,7 +48,7 @@ namespace gr { // use aligned malloc and make sure that everything in this // buffer is properly initialized. static const unsigned N = 100; - float *input = fft::malloc_float(N + 10); + float *input = (float*)volk_malloc((N + 10)*sizeof(float), volk_get_alignment()); for(unsigned i = 0; i < N+10; i++) input[i] = test_fcn((double) i); @@ -64,7 +65,7 @@ namespace gr { // printf ("%9.6f %9.6f %9.6f\n", expected, actual, expected - actual); } } - fft::free(input); + volk_free(input); } } /* namespace filter */ diff --git a/gr-qtgui/lib/const_sink_c_impl.cc b/gr-qtgui/lib/const_sink_c_impl.cc index 3fda1ed573..fd9338f111 100644 --- a/gr-qtgui/lib/const_sink_c_impl.cc +++ b/gr-qtgui/lib/const_sink_c_impl.cc @@ -67,8 +67,10 @@ namespace gr { d_index = 0; for(int i = 0; i < d_nconnections; i++) { - d_residbufs_real.push_back(fft::malloc_double(d_size)); - d_residbufs_imag.push_back(fft::malloc_double(d_size)); + d_residbufs_real.push_back((double*)volk_malloc(d_size*sizeof(double), + volk_get_alignment())); + d_residbufs_imag.push_back((double*)volk_malloc(d_size*sizeof(double), + volk_get_alignment())); memset(d_residbufs_real[i], 0, d_size*sizeof(double)); memset(d_residbufs_imag[i], 0, d_size*sizeof(double)); } @@ -88,8 +90,8 @@ namespace gr { // d_main_gui is a qwidget destroyed with its parent for(int i = 0; i < d_nconnections; i++) { - fft::free(d_residbufs_real[i]); - fft::free(d_residbufs_imag[i]); + volk_free(d_residbufs_real[i]); + volk_free(d_residbufs_imag[i]); } delete d_argv; @@ -259,10 +261,12 @@ namespace gr { if(newsize != d_size) { // Resize residbuf and replace data for(int i = 0; i < d_nconnections; i++) { - fft::free(d_residbufs_real[i]); - fft::free(d_residbufs_imag[i]); - d_residbufs_real[i] = fft::malloc_double(newsize); - d_residbufs_imag[i] = fft::malloc_double(newsize); + volk_free(d_residbufs_real[i]); + volk_free(d_residbufs_imag[i]); + d_residbufs_real[i] = (double*)volk_malloc(newsize*sizeof(double), + volk_get_alignment()); + d_residbufs_imag[i] = (double*)volk_malloc(newsize*sizeof(double), + volk_get_alignment()); memset(d_residbufs_real[i], 0, newsize*sizeof(double)); memset(d_residbufs_imag[i], 0, newsize*sizeof(double)); diff --git a/gr-qtgui/lib/freq_sink_c_impl.cc b/gr-qtgui/lib/freq_sink_c_impl.cc index 03401a7511..1339e2b7f3 100644 --- a/gr-qtgui/lib/freq_sink_c_impl.cc +++ b/gr-qtgui/lib/freq_sink_c_impl.cc @@ -75,13 +75,16 @@ namespace gr { d_shift = true; d_fft = new fft::fft_complex(d_fftsize, true); - d_fbuf = fft::malloc_float(d_fftsize); + d_fbuf = (float*)volk_malloc(d_fftsize*sizeof(float), + volk_get_alignment()); memset(d_fbuf, 0, d_fftsize*sizeof(float)); d_index = 0; for(int i = 0; i < d_nconnections; i++) { - d_residbufs.push_back(fft::malloc_complex(d_fftsize)); - d_magbufs.push_back(fft::malloc_double(d_fftsize)); + d_residbufs.push_back((gr_complex*)volk_malloc(d_fftsize*sizeof(gr_complex), + volk_get_alignment())); + d_magbufs.push_back((double*)volk_malloc(d_fftsize*sizeof(double), + volk_get_alignment())); memset(d_residbufs[i], 0, d_fftsize*sizeof(gr_complex)); memset(d_magbufs[i], 0, d_fftsize*sizeof(double)); @@ -98,11 +101,11 @@ namespace gr { d_main_gui->close(); for(int i = 0; i < d_nconnections; i++) { - fft::free(d_residbufs[i]); - fft::free(d_magbufs[i]); + volk_free(d_residbufs[i]); + volk_free(d_magbufs[i]); } delete d_fft; - fft::free(d_fbuf); + volk_free(d_fbuf); delete d_argv; } @@ -394,11 +397,13 @@ namespace gr { if(newfftsize != d_fftsize) { // Resize residbuf and replace data for(int i = 0; i < d_nconnections; i++) { - fft::free(d_residbufs[i]); - fft::free(d_magbufs[i]); + volk_free(d_residbufs[i]); + volk_free(d_magbufs[i]); - d_residbufs[i] = fft::malloc_complex(newfftsize); - d_magbufs[i] = fft::malloc_double(newfftsize); + d_residbufs[i] = (gr_complex*)volk_malloc(newfftsize*sizeof(gr_complex), + volk_get_alignment()); + d_magbufs[i] = (double*)volk_malloc(newfftsize*sizeof(double), + volk_get_alignment()); memset(d_residbufs[i], 0, newfftsize*sizeof(gr_complex)); memset(d_magbufs[i], 0, newfftsize*sizeof(double)); @@ -416,8 +421,9 @@ namespace gr { delete d_fft; d_fft = new fft::fft_complex(d_fftsize, true); - fft::free(d_fbuf); - d_fbuf = fft::malloc_float(d_fftsize); + volk_free(d_fbuf); + d_fbuf = (float*)volk_malloc(d_fftsize*sizeof(float), + volk_get_alignment()); memset(d_fbuf, 0, d_fftsize*sizeof(float)); } } diff --git a/gr-qtgui/lib/freq_sink_f_impl.cc b/gr-qtgui/lib/freq_sink_f_impl.cc index a673e18d7e..98c55b93b5 100644 --- a/gr-qtgui/lib/freq_sink_f_impl.cc +++ b/gr-qtgui/lib/freq_sink_f_impl.cc @@ -75,13 +75,16 @@ namespace gr { d_shift = true; d_fft = new fft::fft_complex(d_fftsize, true); - d_fbuf = fft::malloc_float(d_fftsize); + d_fbuf = (float*)volk_malloc(d_fftsize*sizeof(float), + volk_get_alignment()); memset(d_fbuf, 0, d_fftsize*sizeof(float)); d_index = 0; for(int i = 0; i < d_nconnections; i++) { - d_residbufs.push_back(fft::malloc_float(d_fftsize)); - d_magbufs.push_back(fft::malloc_double(d_fftsize)); + d_residbufs.push_back((float*)volk_malloc(d_fftsize*sizeof(float), + volk_get_alignment())); + d_magbufs.push_back((double*)volk_malloc(d_fftsize*sizeof(double), + volk_get_alignment())); memset(d_residbufs[i], 0, d_fftsize*sizeof(float)); memset(d_magbufs[i], 0, d_fftsize*sizeof(double)); @@ -98,11 +101,11 @@ namespace gr { d_main_gui->close(); for(int i = 0; i < d_nconnections; i++) { - fft::free(d_residbufs[i]); - fft::free(d_magbufs[i]); + volk_free(d_residbufs[i]); + volk_free(d_magbufs[i]); } delete d_fft; - fft::free(d_fbuf); + volk_free(d_fbuf); delete d_argv; } @@ -397,11 +400,13 @@ namespace gr { if(newfftsize != d_fftsize) { // Resize residbuf and replace data for(int i = 0; i < d_nconnections; i++) { - fft::free(d_residbufs[i]); - fft::free(d_magbufs[i]); + volk_free(d_residbufs[i]); + volk_free(d_magbufs[i]); - d_residbufs[i] = fft::malloc_float(newfftsize); - d_magbufs[i] = fft::malloc_double(newfftsize); + d_residbufs[i] = (float*)volk_malloc(newfftsize*sizeof(float), + volk_get_alignment()); + d_magbufs[i] = (double*)volk_malloc(newfftsize*sizeof(double), + volk_get_alignment()); memset(d_residbufs[i], 0, newfftsize*sizeof(float)); memset(d_magbufs[i], 0, newfftsize*sizeof(double)); @@ -419,8 +424,9 @@ namespace gr { delete d_fft; d_fft = new fft::fft_complex(d_fftsize, true); - fft::free(d_fbuf); - d_fbuf = fft::malloc_float(d_fftsize); + volk_free(d_fbuf); + d_fbuf = (float*)volk_malloc(d_fftsize*sizeof(float), + volk_get_alignment()); memset(d_fbuf, 0, d_fftsize*sizeof(float)); } } diff --git a/gr-qtgui/lib/histogram_sink_f_impl.cc b/gr-qtgui/lib/histogram_sink_f_impl.cc index bc4ac84da8..8a3ba6987e 100644 --- a/gr-qtgui/lib/histogram_sink_f_impl.cc +++ b/gr-qtgui/lib/histogram_sink_f_impl.cc @@ -70,7 +70,8 @@ namespace gr { d_index = 0; for(int i = 0; i < d_nconnections; i++) { - d_residbufs.push_back(fft::malloc_double(d_size)); + d_residbufs.push_back((double*)volk_malloc(d_size*sizeof(double), + volk_get_alignment())); memset(d_residbufs[i], 0, d_size*sizeof(double)); } @@ -89,7 +90,7 @@ namespace gr { // d_main_gui is a qwidget destroyed with its parent for(int i = 0; i < d_nconnections; i++) { - fft::free(d_residbufs[i]); + volk_free(d_residbufs[i]); } delete d_argv; @@ -262,8 +263,9 @@ namespace gr { if(newsize != d_size) { // Resize residbuf and replace data for(int i = 0; i < d_nconnections; i++) { - fft::free(d_residbufs[i]); - d_residbufs[i] = fft::malloc_double(newsize); + volk_free(d_residbufs[i]); + d_residbufs[i] = (double*)volk_malloc(newsize*sizeof(double), + volk_get_alignment()); memset(d_residbufs[i], 0, newsize*sizeof(double)); } diff --git a/gr-qtgui/lib/time_raster_sink_b_impl.cc b/gr-qtgui/lib/time_raster_sink_b_impl.cc index 078603d889..26daffe7bb 100644 --- a/gr-qtgui/lib/time_raster_sink_b_impl.cc +++ b/gr-qtgui/lib/time_raster_sink_b_impl.cc @@ -81,11 +81,13 @@ namespace gr { d_scale = 1.0f; d_icols = static_cast<int>(ceil(d_cols)); - d_tmpflt = fft::malloc_float(d_icols); + d_tmpflt = (float*)volk_malloc(d_icols*sizeof(float), + volk_get_alignment()); memset(d_tmpflt, 0, d_icols*sizeof(float)); for(int i = 0; i < d_nconnections; i++) { - d_residbufs.push_back(fft::malloc_double(d_icols)); + d_residbufs.push_back((double*)volk_malloc(d_icols*sizeof(double), + volk_get_alignment())); memset(d_residbufs[i], 0, d_icols*sizeof(double)); } @@ -100,9 +102,9 @@ namespace gr { if(!d_main_gui->isClosed()) d_main_gui->close(); - fft::free(d_tmpflt); + volk_free(d_tmpflt); for(int i = 0; i < d_nconnections; i++) { - fft::free(d_residbufs[i]); + volk_free(d_residbufs[i]); } delete d_argv; @@ -381,12 +383,14 @@ namespace gr { d_cols = cols; d_index = 0; d_icols = static_cast<int>(ceil(d_cols)); - fft::free(d_tmpflt); - d_tmpflt = fft::malloc_float(d_icols); + volk_free(d_tmpflt); + d_tmpflt = (float*)volk_malloc(d_icols*sizeof(float), + volk_get_alignment()); memset(d_tmpflt, 0, d_icols*sizeof(float)); for(int i = 0; i < d_nconnections; i++) { - fft::free(d_residbufs[i]); - d_residbufs[i] = fft::malloc_double(d_icols); + volk_free(d_residbufs[i]); + d_residbufs[i] = (double*)volk_malloc(d_icols*sizeof(double), + volk_get_alignment()); memset(d_residbufs[i], 0, d_icols*sizeof(double)); } } diff --git a/gr-qtgui/lib/time_raster_sink_f_impl.cc b/gr-qtgui/lib/time_raster_sink_f_impl.cc index 6052d75a92..10b7f762ca 100644 --- a/gr-qtgui/lib/time_raster_sink_f_impl.cc +++ b/gr-qtgui/lib/time_raster_sink_f_impl.cc @@ -79,11 +79,13 @@ namespace gr { d_index = 0; d_icols = static_cast<int>(ceil(d_cols)); - d_tmpflt = fft::malloc_float(d_icols); + d_tmpflt = (float*)volk_malloc(d_icols*sizeof(float), + volk_get_alignment()); memset(d_tmpflt, 0, d_icols*sizeof(float)); for(int i = 0; i < d_nconnections; i++) { - d_residbufs.push_back(fft::malloc_double(d_icols)); + d_residbufs.push_back((double*)volk_malloc(d_icols*sizeof(double), + volk_get_alignment())); memset(d_residbufs[i], 0, d_icols*sizeof(double)); } @@ -98,9 +100,9 @@ namespace gr { if(!d_main_gui->isClosed()) d_main_gui->close(); - fft::free(d_tmpflt); + volk_free(d_tmpflt); for(int i = 0; i < d_nconnections; i++) { - fft::free(d_residbufs[i]); + volk_free(d_residbufs[i]); } delete d_argv; @@ -379,12 +381,14 @@ namespace gr { d_cols = cols; d_index = 0; d_icols = static_cast<int>(ceil(d_cols)); - fft::free(d_tmpflt); - d_tmpflt = fft::malloc_float(d_icols); + volk_free(d_tmpflt); + d_tmpflt = (float*)volk_malloc(d_icols*sizeof(float), + volk_get_alignment()); memset(d_tmpflt, 0, d_icols*sizeof(float)); for(int i = 0; i < d_nconnections; i++) { - fft::free(d_residbufs[i]); - d_residbufs[i] = fft::malloc_double(d_icols); + volk_free(d_residbufs[i]); + d_residbufs[i] = (double*)volk_malloc(d_icols*sizeof(double), + volk_get_alignment()); memset(d_residbufs[i], 0, d_icols*sizeof(double)); } } diff --git a/gr-qtgui/lib/time_sink_c_impl.cc b/gr-qtgui/lib/time_sink_c_impl.cc index 82aba029e5..f24657830f 100644 --- a/gr-qtgui/lib/time_sink_c_impl.cc +++ b/gr-qtgui/lib/time_sink_c_impl.cc @@ -65,7 +65,8 @@ namespace gr { d_main_gui = NULL; for(int n = 0; n < d_nconnections; n++) { - d_buffers.push_back(fft::malloc_double(d_buffer_size)); + d_buffers.push_back((double*)volk_malloc(d_buffer_size*sizeof(double), + volk_get_alignment())); memset(d_buffers[n], 0, d_buffer_size*sizeof(double)); } @@ -92,7 +93,7 @@ namespace gr { // d_main_gui is a qwidget destroyed with its parent for(int n = 0; n < d_nconnections; n++) { - fft::free(d_buffers[n]); + volk_free(d_buffers[n]); } delete d_argv; @@ -298,8 +299,9 @@ namespace gr { // Resize buffers and replace data for(int n = 0; n < d_nconnections; n++) { - fft::free(d_buffers[n]); - d_buffers[n] = fft::malloc_double(d_buffer_size); + volk_free(d_buffers[n]); + d_buffers[n] = (double*)volk_malloc(d_buffer_size*sizeof(double), + volk_get_alignment()); memset(d_buffers[n], 0, d_buffer_size*sizeof(double)); } diff --git a/gr-qtgui/lib/time_sink_f_impl.cc b/gr-qtgui/lib/time_sink_f_impl.cc index afb07e379e..596a416541 100644 --- a/gr-qtgui/lib/time_sink_f_impl.cc +++ b/gr-qtgui/lib/time_sink_f_impl.cc @@ -67,7 +67,8 @@ namespace gr { d_main_gui = NULL; for(int n = 0; n < d_nconnections; n++) { - d_buffers.push_back(fft::malloc_double(d_buffer_size)); + d_buffers.push_back((double*)volk_malloc(d_buffer_size*sizeof(double), + volk_get_alignment())); memset(d_buffers[n], 0, d_buffer_size*sizeof(double)); } @@ -94,7 +95,7 @@ namespace gr { // d_main_gui is a qwidget destroyed with its parent for(int n = 0; n < d_nconnections; n++) { - fft::free(d_buffers[n]); + volk_free(d_buffers[n]); } delete d_argv; @@ -300,8 +301,9 @@ namespace gr { // Resize buffers and replace data for(int n = 0; n < d_nconnections; n++) { - fft::free(d_buffers[n]); - d_buffers[n] = fft::malloc_double(d_buffer_size); + volk_free(d_buffers[n]); + d_buffers[n] = (double*)volk_malloc(d_buffer_size*sizeof(double), + volk_get_alignment()); memset(d_buffers[n], 0, d_buffer_size*sizeof(double)); } diff --git a/gr-qtgui/lib/waterfall_sink_c_impl.cc b/gr-qtgui/lib/waterfall_sink_c_impl.cc index 24983cd820..9fc4f2163f 100644 --- a/gr-qtgui/lib/waterfall_sink_c_impl.cc +++ b/gr-qtgui/lib/waterfall_sink_c_impl.cc @@ -75,13 +75,16 @@ namespace gr { d_shift = true; d_fft = new fft::fft_complex(d_fftsize, true); - d_fbuf = fft::malloc_float(d_fftsize); + d_fbuf = (float*)volk_malloc(d_fftsize*sizeof(float), + volk_get_alignment()); memset(d_fbuf, 0, d_fftsize*sizeof(float)); d_index = 0; for(int i = 0; i < d_nconnections; i++) { - d_residbufs.push_back(fft::malloc_complex(d_fftsize)); - d_magbufs.push_back(fft::malloc_double(d_fftsize)); + d_residbufs.push_back((gr_complex*)volk_malloc(d_fftsize*sizeof(gr_complex), + volk_get_alignment())); + d_magbufs.push_back((double*)volk_malloc(d_fftsize*sizeof(double), + volk_get_alignment())); memset(d_residbufs[i], 0, d_fftsize*sizeof(float)); memset(d_magbufs[i], 0, d_fftsize*sizeof(double)); } @@ -97,11 +100,11 @@ namespace gr { d_main_gui->close(); for(int i = 0; i < d_nconnections; i++) { - fft::free(d_residbufs[i]); - fft::free(d_magbufs[i]); + volk_free(d_residbufs[i]); + volk_free(d_magbufs[i]); } delete d_fft; - fft::free(d_fbuf); + volk_free(d_fbuf); delete d_argv; } @@ -368,11 +371,13 @@ namespace gr { // Resize residbuf and replace data for(int i = 0; i < d_nconnections; i++) { - fft::free(d_residbufs[i]); - fft::free(d_magbufs[i]); + volk_free(d_residbufs[i]); + volk_free(d_magbufs[i]); - d_residbufs[i] = fft::malloc_complex(newfftsize); - d_magbufs[i] = fft::malloc_double(newfftsize); + d_residbufs[i] = (gr_complex*)volk_malloc(newfftsize*sizeof(gr_complex), + volk_get_alignment()); + d_magbufs[i] = (double*)volk_malloc(newfftsize*sizeof(double), + volk_get_alignment()); memset(d_residbufs[i], 0, newfftsize*sizeof(gr_complex)); memset(d_magbufs[i], 0, newfftsize*sizeof(double)); @@ -390,8 +395,9 @@ namespace gr { delete d_fft; d_fft = new fft::fft_complex(d_fftsize, true); - fft::free(d_fbuf); - d_fbuf = fft::malloc_float(d_fftsize); + volk_free(d_fbuf); + d_fbuf = (float*)volk_malloc(d_fftsize*sizeof(float), + volk_get_alignment()); memset(d_fbuf, 0, d_fftsize*sizeof(float)); } } diff --git a/gr-qtgui/lib/waterfall_sink_f_impl.cc b/gr-qtgui/lib/waterfall_sink_f_impl.cc index d4cfee39a8..50525f1903 100644 --- a/gr-qtgui/lib/waterfall_sink_f_impl.cc +++ b/gr-qtgui/lib/waterfall_sink_f_impl.cc @@ -74,13 +74,16 @@ namespace gr { d_shift = true; d_fft = new fft::fft_complex(d_fftsize, true); - d_fbuf = fft::malloc_float(d_fftsize); + d_fbuf = (float*)volk_malloc(d_fftsize*sizeof(float), + volk_get_alignment()); memset(d_fbuf, 0, d_fftsize*sizeof(float)); d_index = 0; for(int i = 0; i < d_nconnections; i++) { - d_residbufs.push_back(fft::malloc_float(d_fftsize)); - d_magbufs.push_back(fft::malloc_double(d_fftsize)); + d_residbufs.push_back((float*)volk_malloc(d_fftsize*sizeof(float), + volk_get_alignment())); + d_magbufs.push_back((double*)volk_malloc(d_fftsize*sizeof(double), + volk_get_alignment())); memset(d_residbufs[i], 0, d_fftsize*sizeof(float)); memset(d_magbufs[i], 0, d_fftsize*sizeof(double)); } @@ -96,11 +99,11 @@ namespace gr { d_main_gui->close(); for(int i = 0; i < d_nconnections; i++) { - fft::free(d_residbufs[i]); - fft::free(d_magbufs[i]); + volk_free(d_residbufs[i]); + volk_free(d_magbufs[i]); } delete d_fft; - fft::free(d_fbuf); + volk_free(d_fbuf); delete d_argv; } @@ -369,11 +372,13 @@ namespace gr { // Resize residbuf and replace data for(int i = 0; i < d_nconnections; i++) { - fft::free(d_residbufs[i]); - fft::free(d_magbufs[i]); + volk_free(d_residbufs[i]); + volk_free(d_magbufs[i]); - d_residbufs[i] = fft::malloc_float(newfftsize); - d_magbufs[i] = fft::malloc_double(newfftsize); + d_residbufs[i] = (float*)volk_malloc(newfftsize*sizeof(float), + volk_get_alignment()); + d_magbufs[i] = (double*)volk_malloc(newfftsize*sizeof(double), + volk_get_alignment()); memset(d_residbufs[i], 0, newfftsize*sizeof(float)); memset(d_magbufs[i], 0, newfftsize*sizeof(double)); @@ -391,8 +396,9 @@ namespace gr { delete d_fft; d_fft = new fft::fft_complex(d_fftsize, true); - fft::free(d_fbuf); - d_fbuf = fft::malloc_float(d_fftsize); + volk_free(d_fbuf); + d_fbuf = (float*)volk_malloc(d_fftsize*sizeof(float), + volk_get_alignment()); memset(d_fbuf, 0, d_fftsize*sizeof(float)); } } diff --git a/volk/CMakeLists.txt b/volk/CMakeLists.txt index 40572f3d04..cf651edd81 100644 --- a/volk/CMakeLists.txt +++ b/volk/CMakeLists.txt @@ -122,6 +122,7 @@ install(FILES ${CMAKE_BINARY_DIR}/include/volk/volk_cpu.h ${CMAKE_BINARY_DIR}/include/volk/volk_config_fixed.h ${CMAKE_BINARY_DIR}/include/volk/volk_typedefs.h + ${CMAKE_SOURCE_DIR}/include/volk/volk_malloc.h DESTINATION include/volk COMPONENT "volk_devel" ) diff --git a/volk/include/volk/volk_malloc.h b/volk/include/volk/volk_malloc.h new file mode 100644 index 0000000000..6ec7391efd --- /dev/null +++ b/volk/include/volk/volk_malloc.h @@ -0,0 +1,66 @@ +/* -*- c -*- */ +/* + * Copyright 2014 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_VOLK_MALLOC_H +#define INCLUDED_VOLK_MALLOC_H + +#include <volk/volk_common.h> +#include <stdlib.h> + +__VOLK_DECL_BEGIN + +/*! + * \brief Allocate \p size bytes of data aligned to \p alignment. + * + * \details + * Because we don't have a standard method to allocate buffers in + * memory that are guaranteed to be on an alignment, VOLK handles this + * itself. The volk_malloc function behaves like malloc in that it + * returns a pointer to the allocated memory. However, it also takes + * in an alignment specfication, which is usually something like 16 or + * 32 to ensure that the aligned memory is located on a particular + * byte boundary for use with SIMD. + * + * Internally, the volk_malloc first checks if the compiler is C11 + * compliant and uses the new aligned_alloc method. If not, it checks + * if the system is POSIX compliant and uses posix_memalign. If that + * fails, volk_malloc handles the memory allocation and alignment + * internally. + * + * Because of the ways in which volk_malloc may allocate memory, it is + * important to always free volk_malloc pointers using volk_free. + * + * \param size The number of bytes to allocate. + * \param alignment The byte alignment of the allocated memory. + * \return pointer to aligned memory. + */ +VOLK_API void *volk_malloc(size_t size, size_t alignment); + +/*! + * \brief Free's memory allocated by volk_malloc. + * \param aptr The aligned pointer allocaed by volk_malloc. + */ +VOLK_API void volk_free(void *aptr); + +__VOLK_DECL_END + +#endif /* INCLUDED_VOLK_MALLOC_H */ diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt index cda10872c7..dbebac067f 100644 --- a/volk/lib/CMakeLists.txt +++ b/volk/lib/CMakeLists.txt @@ -413,6 +413,7 @@ endif() list(APPEND volk_sources ${CMAKE_CURRENT_SOURCE_DIR}/volk_prefs.c ${CMAKE_CURRENT_SOURCE_DIR}/volk_rank_archs.c + ${CMAKE_CURRENT_SOURCE_DIR}/volk_malloc.c ${volk_gen_sources} ) diff --git a/volk/lib/volk_malloc.c b/volk/lib/volk_malloc.c new file mode 100644 index 0000000000..1333345800 --- /dev/null +++ b/volk/lib/volk_malloc.c @@ -0,0 +1,176 @@ +/* -*- c -*- */ +/* + * Copyright 2014 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#include <volk/volk_malloc.h> +#include <stdio.h> +#include <stdlib.h> + +/* + * For #defines used to determine support for allocation functions, + * see: http://linux.die.net/man/3/aligned_alloc +*/ + +// Disabling use of aligned_alloc. This function requires that size be +// a multiple of alignment, which is too restrictive for many uses of +// VOLK. + +//// If we are using C11 standard, use the aligned_alloc +//#ifdef _ISOC11_SOURCE +// +//void *volk_malloc(size_t size, size_t alignment) +//{ +// void *ptr = aligned_alloc(alignment, size); +// if(ptr == NULL) { +// fprintf(stderr, "VOLK: Error allocating memory (aligned_alloc)\n"); +// } +// return ptr; +//} +// +//void volk_free(void *ptr) +//{ +// free(ptr); +//} +// +//#else // _ISOC11_SOURCE + +// Otherwise, test if we are a POSIX or X/Open system +// This only has a restriction that alignment be a power of 2. +#if _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 + +void *volk_malloc(size_t size, size_t alignment) +{ + void *ptr; + int err = posix_memalign(&ptr, alignment, size); + if(err == 0) { + return ptr; + } + else { + fprintf(stderr, "VOLK: Error allocating memory (posix_memalign: %d)\n", err); + return NULL; + } +} + +void volk_free(void *ptr) +{ + free(ptr); +} + +// No standard handlers; we'll do it ourselves. +#else // _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 + +typedef struct mbuf_t { + void *orig; + void *align; + struct mbuf_t *next; +} mbuf; + +/* + Keep track of the pointers we've allocated. We hold a linked list + from volk_first_ptr to volk_last_ptr and the number of allocated + pointers. When allocating a new pointer, we create the pointer with + malloc, find how it is misaligned, and create a new pointer at the + alignment boundary. Both of these are stored in the linked list data + structure. When free, we are passed the aligned pointer and use that + to look up the original pointer, which we use to actually free the + entire allocated memory. +*/ +unsigned int volk_nptrs = 0; +mbuf* volk_first_ptr = NULL; +mbuf* volk_last_ptr = NULL; + +void* +volk_malloc(size_t size, size_t alignment) +{ + // Allocate memory plus enough extra to adjust alignment + void *ptr = malloc(size + (alignment - 1)); + if(ptr == NULL) { + free(ptr); + fprintf(stderr, "VOLK: Error allocating memory (malloc)\n"); + return NULL; + } + + // Find and return the first aligned boundary of the pointer + void *aptr = ptr; + if((unsigned long)ptr % alignment != 0) + aptr = ptr + (alignment - ((unsigned long)ptr % alignment)); + + // Store original pointer and aligned pointers + mbuf *n = (mbuf*)malloc(sizeof(mbuf)); + n->orig = ptr; + n->align = aptr; + n->next = NULL; + if(volk_first_ptr == NULL) { + volk_first_ptr = n; + } + else { + volk_last_ptr->next = n; + } + volk_last_ptr = n; + volk_nptrs++; + + return aptr; +} + +void volk_free(void *ptr) +{ + unsigned long aptr = (unsigned long)ptr; + mbuf *prev = volk_first_ptr; + mbuf *p = volk_first_ptr; + + // Look for the aligned pointer until we either find it or have + // walked the entire list of allocated pointers + while(p != NULL) { + if((unsigned long)(p->align) == aptr) { + // If the memory is found at the first pointer, move this + // pointer to the next in the list + if(p == volk_first_ptr) { + if(volk_first_ptr == volk_last_ptr) + volk_last_ptr = NULL; + volk_first_ptr = p->next; + } + // Otherwise, link the previous to the following to skip the + // struct we're deleting. + else { + if(p == volk_last_ptr) + volk_last_ptr = prev; + prev->next = p->next; + } + + // Free the original pointer to remove all memory allocated + free((void*)p->orig); + volk_nptrs--; + + // Free the struct to clean up all memory and exit + free(p); + + return; + } + // Not found, update our pointers to look at the next in the list + prev = p; + p = p->next; + } + fprintf(stderr, "VOLK: tried to free a non-VOLK pointer\n"); +} + +#endif // _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 + +//#endif // _ISOC11_SOURCE diff --git a/volk/tmpl/volk.tmpl.h b/volk/tmpl/volk.tmpl.h index c8731ed321..1d0957efd0 100644 --- a/volk/tmpl/volk.tmpl.h +++ b/volk/tmpl/volk.tmpl.h @@ -26,6 +26,7 @@ #include <volk/volk_config_fixed.h> #include <volk/volk_common.h> #include <volk/volk_complex.h> +#include <volk/volk_malloc.h> #include <stdlib.h> #include <stdbool.h> |