diff options
author | Tom Rondeau <trondeau@vt.edu> | 2012-06-15 08:43:20 -0400 |
---|---|---|
committer | Tom Rondeau <trondeau@vt.edu> | 2012-06-15 08:43:20 -0400 |
commit | 5585c71229cfa7886e0bd090828cd1f5104f6b27 (patch) | |
tree | c80bcc8821fb10a44c073ce1fa2a0f4816027bef /gr-filter | |
parent | a74286a2aa7fcddb52c165ba2c17cb2f55b5b592 (diff) |
filter: adding ssc and fsf versions of filter with associated new Volk kernels.
These routines work and pass QA. They could use some performance work. the FSF is just slightly slower than before; the SCC version is more noticably slower.
Both could benefit, probably, by using SSE2 intrinsics to handle the shorts.
Diffstat (limited to 'gr-filter')
-rw-r--r-- | gr-filter/include/filter/CMakeLists.txt | 2 | ||||
-rw-r--r-- | gr-filter/include/filter/fir_filter.h | 62 | ||||
-rw-r--r-- | gr-filter/lib/CMakeLists.txt | 2 | ||||
-rw-r--r-- | gr-filter/lib/fir_filter.cc | 232 | ||||
-rwxr-xr-x | gr-filter/python/qa_fir_filter.py | 95 | ||||
-rw-r--r-- | gr-filter/swig/filter_swig.i | 6 |
6 files changed, 397 insertions, 2 deletions
diff --git a/gr-filter/include/filter/CMakeLists.txt b/gr-filter/include/filter/CMakeLists.txt index 5b209873c2..2620d3f544 100644 --- a/gr-filter/include/filter/CMakeLists.txt +++ b/gr-filter/include/filter/CMakeLists.txt @@ -64,7 +64,7 @@ endmacro(expand_h) ######################################################################## # Invoke macro to generate various sources ####################################################################### -expand_h(fir_filter_XXX fff ccf ccc) +expand_h(fir_filter_XXX fff ccf ccc scc fsf) add_custom_target(filter_generated_includes DEPENDS ${generated_includes} diff --git a/gr-filter/include/filter/fir_filter.h b/gr-filter/include/filter/fir_filter.h index 8bfaa4f50e..1fb3afb4d4 100644 --- a/gr-filter/include/filter/fir_filter.h +++ b/gr-filter/include/filter/fir_filter.h @@ -122,6 +122,68 @@ namespace gr { int d_naligned; }; + /**************************************************************/ + + class FILTER_API fir_filter_scc + { + public: + fir_filter_scc(int decimation, + const std::vector<gr_complex> &taps); + ~fir_filter_scc(); + + void set_taps(const std::vector<gr_complex> &taps); + std::vector<gr_complex> taps() const; + unsigned int ntaps() const; + + gr_complex filter(const short input[]); + void filterN(gr_complex output[], + const short input[], + unsigned long n); + void filterNdec(gr_complex output[], + const short input[], + unsigned long n, + unsigned int decimate); + + private: + unsigned int d_ntaps; + gr_complex *d_taps; + gr_complex **d_aligned_taps; + gr_complex *d_output; + int d_align; + int d_naligned; + }; + + /**************************************************************/ + + class FILTER_API fir_filter_fsf + { + public: + fir_filter_fsf(int decimation, + const std::vector<float> &taps); + ~fir_filter_fsf(); + + void set_taps(const std::vector<float> &taps); + std::vector<float> taps() const; + unsigned int ntaps() const; + + short filter(const float input[]); + void filterN(short output[], + const float input[], + unsigned long n); + void filterNdec(short output[], + const float input[], + unsigned long n, + unsigned int decimate); + + private: + unsigned int d_ntaps; + float *d_taps; + float **d_aligned_taps; + short *d_output; + int d_align; + int d_naligned; + }; + } /* namespace kernel */ } /* namespace filter */ } /* namespace gr */ diff --git a/gr-filter/lib/CMakeLists.txt b/gr-filter/lib/CMakeLists.txt index b51a23bab5..f5dbd1bb3b 100644 --- a/gr-filter/lib/CMakeLists.txt +++ b/gr-filter/lib/CMakeLists.txt @@ -80,7 +80,7 @@ endmacro(expand_cc) ######################################################################## # Invoke macro to generate various sources ######################################################################## -expand_cc(fir_filter_XXX_impl fff ccf ccc) +expand_cc(fir_filter_XXX_impl fff ccf ccc scc fsf) ######################################################################## diff --git a/gr-filter/lib/fir_filter.cc b/gr-filter/lib/fir_filter.cc index 18568da9d0..be8017400a 100644 --- a/gr-filter/lib/fir_filter.cc +++ b/gr-filter/lib/fir_filter.cc @@ -349,6 +349,238 @@ namespace gr { } } + /**************************************************************/ + + fir_filter_scc::fir_filter_scc(int decimation, + const std::vector<gr_complex> &taps) + { + d_align = volk_get_alignment(); + d_naligned = d_align / sizeof(short); + + d_taps = NULL; + set_taps(taps); + + // Make sure the output sample is always aligned, too. + d_output = fft::malloc_complex(1); + } + + fir_filter_scc::~fir_filter_scc() + { + // Free taps + if(d_taps != NULL) { + fft::free(d_taps); + d_taps = NULL; + } + + // Free all aligned taps + for(int i = 0; i < d_naligned; i++) { + fft::free(d_aligned_taps[i]); + } + fft::free(d_aligned_taps); + + // Free output sample + fft::free(d_output); + } + + void + fir_filter_scc::set_taps(const std::vector<gr_complex> &taps) + { + // Free the taps if already allocated + if(d_taps != NULL) { + fft::free(d_taps); + d_taps = NULL; + + for(int i = 0; i < d_naligned; i++) { + fft::free(d_aligned_taps[i]); + } + fft::free(d_aligned_taps); + } + + d_ntaps = (int)taps.size(); + d_taps = fft::malloc_complex(d_ntaps); + for(unsigned int i = 0; i < d_ntaps; i++) { + d_taps[d_ntaps-i-1] = taps[i]; + } + + // Make a set of taps at all possible arch alignments + d_aligned_taps = (gr_complex**)malloc(d_naligned*sizeof(gr_complex**)); + for(int i = 0; i < d_naligned; i++) { + d_aligned_taps[i] = fft::malloc_complex(d_ntaps+d_naligned-1); + memset(d_aligned_taps[i], 0, sizeof(gr_complex)*(d_ntaps+d_naligned-1)); + memcpy(&d_aligned_taps[i][i], d_taps, sizeof(gr_complex)*(d_ntaps)); + } + + } + + std::vector<gr_complex> + fir_filter_scc::taps() const + { + std::vector<gr_complex> t; + for(unsigned int i = 0; i < d_ntaps; i++) + t.push_back(d_taps[d_ntaps-i-1]); + return t; + } + + unsigned int + fir_filter_scc::ntaps() const + { + return d_ntaps; + } + + gr_complex + fir_filter_scc::filter(const short input[]) + { + const short *ar = (short *)((unsigned long) input & ~(d_align-1)); + unsigned al = input - ar; + + volk_16i_32fc_dot_prod_32fc_a(d_output, ar, + d_aligned_taps[al], + (d_ntaps+al)); + + return *d_output; + } + + void + fir_filter_scc::filterN(gr_complex output[], + const short input[], + unsigned long n) + { + for(unsigned long i = 0; i < n; i++) + output[i] = filter(&input[i]); + } + + + void + fir_filter_scc::filterNdec(gr_complex output[], + const short input[], + unsigned long n, + unsigned int decimate) + { + unsigned long j = 0; + for(unsigned long i = 0; i < n; i++){ + output[i] = filter(&input[j]); + j += decimate; + } + } + + /**************************************************************/ + + fir_filter_fsf::fir_filter_fsf(int decimation, + const std::vector<float> &taps) + { + d_align = volk_get_alignment(); + d_naligned = d_align / sizeof(float); + + d_taps = NULL; + set_taps(taps); + + // Make sure the output sample is always aligned, too. + d_output = (short*)fft::malloc_float(1); + } + + fir_filter_fsf::~fir_filter_fsf() + { + // Free taps + if(d_taps != NULL) { + fft::free(d_taps); + d_taps = NULL; + } + + // Free all aligned taps + for(int i = 0; i < d_naligned; i++) { + fft::free(d_aligned_taps[i]); + } + fft::free(d_aligned_taps); + + // Free output sample + fft::free(d_output); + } + + void + fir_filter_fsf::set_taps(const std::vector<float> &taps) + { + // Free the taps if already allocated + if(d_taps != NULL) { + fft::free(d_taps); + d_taps = NULL; + + for(int i = 0; i < d_naligned; i++) { + fft::free(d_aligned_taps[i]); + } + fft::free(d_aligned_taps); + } + + d_ntaps = (int)taps.size(); + d_taps = fft::malloc_float(d_ntaps); + for(unsigned int i = 0; i < d_ntaps; i++) { + d_taps[d_ntaps-i-1] = taps[i]; + } + + // Make a set of taps at all possible arch alignments + d_aligned_taps = (float**)malloc(d_naligned*sizeof(float**)); + for(int i = 0; i < d_naligned; i++) { + d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1); + memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1)); + memcpy(&d_aligned_taps[i][i], d_taps, sizeof(float)*(d_ntaps)); + } + } + + std::vector<float> + fir_filter_fsf::taps() const + { + std::vector<float> t; + for(unsigned int i = 0; i < d_ntaps; i++) + t.push_back(d_taps[d_ntaps-i-1]); + return t; + } + + unsigned int + fir_filter_fsf::ntaps() const + { + return d_ntaps; + } + + short + fir_filter_fsf::filter(const float input[]) + { + const float *ar = (float *)((unsigned long) input & ~(d_align-1)); + unsigned al = input - ar; + + volk_32f_x2_dot_prod_16i_a(d_output, ar, + d_aligned_taps[al], + (d_ntaps+al)); + + //float out = 0; + //for(unsigned int i = 0; i < d_ntaps; i++) { + // out += d_taps[i] * input[i]; + //} + //*d_output = (short)out; + + return *d_output; + } + + void + fir_filter_fsf::filterN(short output[], + const float input[], + unsigned long n) + { + for(unsigned long i = 0; i < n; i++) + output[i] = filter(&input[i]); + } + + void + fir_filter_fsf::filterNdec(short output[], + const float input[], + unsigned long n, + unsigned int decimate) + { + unsigned long j = 0; + for(unsigned long i = 0; i < n; i++){ + output[i] = filter(&input[j]); + j += decimate; + } + } + } /* namespace kernel */ } /* namespace filter */ } /* namespace gr */ diff --git a/gr-filter/python/qa_fir_filter.py b/gr-filter/python/qa_fir_filter.py index ac20286ccb..2a61498a26 100755 --- a/gr-filter/python/qa_fir_filter.py +++ b/gr-filter/python/qa_fir_filter.py @@ -218,6 +218,101 @@ class test_filter(gr_unittest.TestCase): self.assertComplexTuplesAlmostEqual(expected_data, result_data, 5) + def test_fir_filter_scc_001(self): + src_data = 40*[1, 2, 3, 4] + expected_data = ((0.5+1j), (1.5+3j), (3+6j), (5+10j), (5.5+11j), + (6.5+13j), (8+16j), (10+20j), (10.5+21j), (11.5+23j), + (13+26j), (15+30j), (15.5+31j), (16.5+33j), (18+36j), + (20+40j), (20.5+41j), (21.5+43j), (23+46j), (25+50j), + (25.5+51j), (26.5+53j), (28+56j), (30+60j), (30.5+61j), + (31.5+63j), (33+66j), (35+70j), (35.5+71j), (36.5+73j), + (38+76j), (40+80j), (40.5+81j), (41.5+83j), (43+86j), + (45+90j), (45.5+91j), (46.5+93j), (48+96j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j)) + src = gr.vector_source_s(src_data) + op = filter.fir_filter_scc(1, 20*[0.5+1j, 0.5+1j]) + dst = gr.vector_sink_c() + self.tb.connect(src, op, dst) + self.tb.run() + result_data = dst.data() + self.assertComplexTuplesAlmostEqual(expected_data, result_data, 5) + + + def test_fir_filter_scc_002(self): + src_data = 40*[1, 2, 3, 4] + expected_data = ((0.5+1j), (5.5+11j), (10.5+21j), (15.5+31j), (20.5+41j), + (25.5+51j), (30.5+61j), (35.5+71j), (40.5+81j), (45.5+91j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j), + (50+100j), (50+100j), (50+100j), (50+100j), (50+100j)) + src = gr.vector_source_s(src_data) + op = filter.fir_filter_scc(4, 20*[0.5+1j, 0.5+1j]) + dst = gr.vector_sink_c() + self.tb.connect(src, op, dst) + self.tb.run() + result_data = dst.data() + self.assertComplexTuplesAlmostEqual(expected_data, result_data, 5) + + def test_fir_filter_fsf_001(self): + src_data = 40*[1, 2, 3, 4] + expected_data =(0, 1, 3, 5, 5, 6, 8, 10, 10, 11, 13, 15, 15, 16, 18, 20, 20, + 21, 23, 25, 25, 26, 28, 30, 30, 31, 33, 35, 35, 36, 38, 40, 40, + 41, 43, 45, 45, 46, 48, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50) + src = gr.vector_source_f(src_data) + op = filter.fir_filter_fsf(1, 20*[0.5, 0.5]) + dst = gr.vector_sink_s() + self.tb.connect(src, op, dst) + self.tb.run() + result_data = dst.data() + self.assertComplexTuplesAlmostEqual(expected_data, result_data, 5) + + + def test_fir_filter_fsf_002(self): + src_data = 40*[1, 2, 3, 4] + expected_data = (0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50) + src = gr.vector_source_f(src_data) + op = filter.fir_filter_fsf(4, 20*[0.5, 0.5]) + dst = gr.vector_sink_s() + self.tb.connect(src, op, dst) + self.tb.run() + result_data = dst.data() + self.assertComplexTuplesAlmostEqual(expected_data, result_data, 5) + if __name__ == '__main__': gr_unittest.run(test_filter, "test_filter.xml") diff --git a/gr-filter/swig/filter_swig.i b/gr-filter/swig/filter_swig.i index cc15b5722f..c9de3fb9af 100644 --- a/gr-filter/swig/filter_swig.i +++ b/gr-filter/swig/filter_swig.i @@ -36,6 +36,8 @@ #include "filter/fir_filter_fff.h" #include "filter/fir_filter_ccf.h" #include "filter/fir_filter_ccc.h" +#include "filter/fir_filter_scc.h" +#include "filter/fir_filter_fsf.h" #include "filter/fft_filter_ccc.h" #include "filter/fft_filter_fff.h" #include "filter/hilbert_fc.h" @@ -50,6 +52,8 @@ %include "filter/fir_filter_fff.h" %include "filter/fir_filter_ccf.h" %include "filter/fir_filter_ccc.h" +%include "filter/fir_filter_scc.h" +%include "filter/fir_filter_fsf.h" %include "filter/fft_filter_ccc.h" %include "filter/fft_filter_fff.h" %include "filter/hilbert_fc.h" @@ -61,6 +65,8 @@ GR_SWIG_BLOCK_MAGIC2(filter, filter_delay_fc); GR_SWIG_BLOCK_MAGIC2(filter, fir_filter_fff); GR_SWIG_BLOCK_MAGIC2(filter, fir_filter_ccf); GR_SWIG_BLOCK_MAGIC2(filter, fir_filter_ccc); +GR_SWIG_BLOCK_MAGIC2(filter, fir_filter_scc); +GR_SWIG_BLOCK_MAGIC2(filter, fir_filter_fsf); GR_SWIG_BLOCK_MAGIC2(filter, fft_filter_ccc); GR_SWIG_BLOCK_MAGIC2(filter, fft_filter_fff); GR_SWIG_BLOCK_MAGIC2(filter, hilbert_fc); |