diff options
author | Tom Rondeau <trondeau@vt.edu> | 2012-06-10 19:35:55 -0400 |
---|---|---|
committer | Tom Rondeau <trondeau@vt.edu> | 2012-06-10 22:35:59 -0400 |
commit | 227448e247ba720b87b99e1b9382cd3737241ab3 (patch) | |
tree | c466e391fb6391b9020457486f852f6f86c6fd20 /gr-filter | |
parent | 14532d8da0f40f2b58595bd7e217004bdbfc90e3 (diff) |
filter: working aligned taps loads for regular and decimating filter.
Has working QA code but needs significant performance improvements.
Diffstat (limited to 'gr-filter')
-rw-r--r-- | gr-filter/include/filter/fir_filter.h | 12 | ||||
-rw-r--r-- | gr-filter/lib/fir_filter.cc | 49 | ||||
-rw-r--r-- | gr-filter/lib/fir_filter_XXX_impl.cc.t | 8 | ||||
-rwxr-xr-x | gr-filter/python/qa_fir_filter.py | 13 |
4 files changed, 65 insertions, 17 deletions
diff --git a/gr-filter/include/filter/fir_filter.h b/gr-filter/include/filter/fir_filter.h index fba4641bf3..1eb70f92a4 100644 --- a/gr-filter/include/filter/fir_filter.h +++ b/gr-filter/include/filter/fir_filter.h @@ -41,8 +41,8 @@ namespace gr { void set_taps(const std::vector<float> &taps); std::vector<float> taps() const; unsigned int ntaps() const; - - float filter(const float input[]); + + inline float filter(const float input[]); void filterN(float output[], const float input[], unsigned long n); @@ -53,8 +53,10 @@ namespace gr { private: unsigned int d_ntaps; - float *d_taps; - float *d_aligned_taps[4]; + float *d_taps; + float **d_aligned_taps; + int d_offset; + float *d_output; }; /**************************************************************/ @@ -100,7 +102,7 @@ namespace gr { gr_complex filter(const gr_complex input[]); void filterN(gr_complex output[], const gr_complex input[], - unsigned long n); + unsigned long n) void filterNdec(gr_complex output[], const gr_complex input[], unsigned long n, diff --git a/gr-filter/lib/fir_filter.cc b/gr-filter/lib/fir_filter.cc index 3abcda53e9..098dd8d367 100644 --- a/gr-filter/lib/fir_filter.cc +++ b/gr-filter/lib/fir_filter.cc @@ -23,6 +23,7 @@ #include <filter/fir_filter.h> #include <fft/fft.h> #include <volk/volk.h> +#include <cstdio> namespace gr { namespace filter { @@ -33,15 +34,29 @@ namespace gr { { d_taps = NULL; set_taps(taps); + d_offset = 0; + + // Make sure the output sample is always aligned, too. + d_output = fft::malloc_float(1); } fir_filter_fff::~fir_filter_fff() { + // Free taps if(d_taps != NULL) { fft::free(d_taps); d_taps = NULL; } - } + + // Free all aligned taps + for(int i = 0; i < 4; i++) { + fft::free(d_aligned_taps[i]); + } + fft::free(d_aligned_taps); + + // Free output sample + fft::free(d_output); + } void fir_filter_fff::set_taps(const std::vector<float> &taps) @@ -50,6 +65,11 @@ namespace gr { if(d_taps != NULL) { fft::free(d_taps); d_taps = NULL; + + for(int i = 0; i < 4; i++) { + fft::free(d_aligned_taps[i]); + } + fft::free(d_aligned_taps); } d_ntaps = (int)taps.size(); @@ -58,6 +78,8 @@ namespace gr { d_taps[d_ntaps-i-1] = taps[i]; } + // Make a set of taps at all possible arch alignments + d_aligned_taps = (float**)malloc(4*sizeof(float**)); for(int i = 0; i < 4; i++) { d_aligned_taps[i] = fft::malloc_float(d_ntaps+3); memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+3)); @@ -83,12 +105,9 @@ namespace gr { float fir_filter_fff::filter(const float input[]) { - float output; - - //const float *ar = (float*)((unsigned long)input & ~15); - - volk_32f_x2_dot_prod_32f_u(&output, input, d_taps, d_ntaps); - return output; + volk_32f_x2_dot_prod_32f_a(d_output, input, + d_aligned_taps[d_offset], d_ntaps+3); + return *d_output; } void @@ -96,8 +115,16 @@ namespace gr { const float input[], unsigned long n) { - for(unsigned long i = 0; i < n; i++) - output[i] = filter(&input[i]); + unsigned long ar = ((unsigned long) input); + int off = (ar - (ar & ~15))/4; + + int j = -off; + d_offset = off; + for(unsigned long i = 0; i < n; i++) { + output[i] = filter(&input[j]); + d_offset= (d_offset+1) & 0x03; + j += (d_offset == 0 ? 4 : 0); + } } @@ -108,8 +135,8 @@ namespace gr { unsigned int decimate) { unsigned long j = 0; - for(unsigned long i = 0; i < n; i++){ - output[i] = filter(&input[j]); + for(unsigned long i = 0; i < n; i++) { + filterN(&output[i], &input[j], 1); j += decimate; } } diff --git a/gr-filter/lib/fir_filter_XXX_impl.cc.t b/gr-filter/lib/fir_filter_XXX_impl.cc.t index c3637042d7..18bec38be3 100644 --- a/gr-filter/lib/fir_filter_XXX_impl.cc.t +++ b/gr-filter/lib/fir_filter_XXX_impl.cc.t @@ -26,6 +26,7 @@ #include "@IMPL_NAME@.h" #include <gr_io_signature.h> +#include <volk/volk.h> namespace gr { namespace filter { @@ -47,6 +48,10 @@ namespace gr { d_fir = new kernel::@BASE_NAME@(decimation, taps); d_updated = false; set_history(d_fir->ntaps()); + + const int alignment_multiple = + volk_get_alignment() / sizeof(float); + set_alignment(std::max(1, alignment_multiple)); } @IMPL_NAME@::~@IMPL_NAME@() @@ -85,7 +90,8 @@ namespace gr { d_fir->filterN(out, in, noutput_items); } else { - d_fir->filterNdec(out, in, noutput_items, decimation()); + d_fir->filterNdec(out, in, noutput_items, + decimation()); } return noutput_items; diff --git a/gr-filter/python/qa_fir_filter.py b/gr-filter/python/qa_fir_filter.py index 38bfd9ea51..93974bb89a 100755 --- a/gr-filter/python/qa_fir_filter.py +++ b/gr-filter/python/qa_fir_filter.py @@ -41,6 +41,19 @@ class test_filter(gr_unittest.TestCase): result_data = dst.data() self.assertFloatTuplesAlmostEqual(expected_data, result_data, 5) + def test_fir_filter_fff_002(self): + src_data = 10*[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + expected_data = [0.0,] + 4*[3.5, 7.5, 1.5, 5.5, 4.5,] + [3.5, 7.5, 1.5, 5.5] + src = gr.vector_source_f(src_data) + op = filter.fir_filter_fff(4, [0.5, 0.5]) + dst = gr.vector_sink_f() + self.tb.connect(src, op, dst) + self.tb.run() + result_data = dst.data() + print result_data + print expected_data + self.assertFloatTuplesAlmostEqual(expected_data, result_data, 5) + def test_fir_filter_ccf_001(self): src_data = [1+1j, 2+2j, 3+3j, 4+4j] expected_data = [0.5+0.5j, 1.5+1.5j, 2.5+2.5j, 3.5+3.5j] |