diff options
author | Tom Rondeau <trondeau@vt.edu> | 2012-06-14 12:44:41 -0400 |
---|---|---|
committer | Tom Rondeau <trondeau@vt.edu> | 2012-06-14 16:13:13 -0400 |
commit | 4e2f38528e90ee4a05644d48d015b098a8651405 (patch) | |
tree | b0da1219ef0e10a770d0a04ee72e7dc425a17353 /gr-filter | |
parent | dae3b39098c16573f9c70e77f1a9a6b09ecfd041 (diff) |
filter: switched ccc FIR filter to use aligned VOLK calls.
Diffstat (limited to 'gr-filter')
-rw-r--r-- | gr-filter/include/filter/fir_filter.h | 8 | ||||
-rw-r--r-- | gr-filter/lib/fir_filter.cc | 39 |
2 files changed, 44 insertions, 3 deletions
diff --git a/gr-filter/include/filter/fir_filter.h b/gr-filter/include/filter/fir_filter.h index 99acb7a0a2..3ce21d3049 100644 --- a/gr-filter/include/filter/fir_filter.h +++ b/gr-filter/include/filter/fir_filter.h @@ -85,6 +85,10 @@ namespace gr { private: unsigned int d_ntaps; gr_complex *d_taps; + float **d_aligned_taps; + gr_complex *d_output; + int d_align; + int d_naligned; }; /**************************************************************/ @@ -112,6 +116,10 @@ namespace gr { private: unsigned int d_ntaps; gr_complex *d_taps; + gr_complex **d_aligned_taps; + gr_complex *d_output; + int d_align; + int d_naligned; }; } /* namespace kernel */ diff --git a/gr-filter/lib/fir_filter.cc b/gr-filter/lib/fir_filter.cc index 1e59902949..973be45d4b 100644 --- a/gr-filter/lib/fir_filter.cc +++ b/gr-filter/lib/fir_filter.cc @@ -224,16 +224,32 @@ namespace gr { fir_filter_ccc::fir_filter_ccc(int decimation, const std::vector<gr_complex> &taps) { + d_align = volk_get_alignment(); + d_naligned = d_align / sizeof(gr_complex); + d_taps = NULL; set_taps(taps); + + // Make sure the output sample is always aligned, too. + d_output = fft::malloc_complex(1); } fir_filter_ccc::~fir_filter_ccc() { + // Free taps if(d_taps != NULL) { fft::free(d_taps); d_taps = NULL; } + + // Free all aligned taps + for(int i = 0; i < d_naligned; i++) { + fft::free(d_aligned_taps[i]); + } + fft::free(d_aligned_taps); + + // Free output sample + fft::free(d_output); } void @@ -243,6 +259,11 @@ namespace gr { if(d_taps != NULL) { fft::free(d_taps); d_taps = NULL; + + for(int i = 0; i < d_naligned; i++) { + fft::free(d_aligned_taps[i]); + } + fft::free(d_aligned_taps); } d_ntaps = (int)taps.size(); @@ -250,6 +271,14 @@ namespace gr { for(unsigned int i = 0; i < d_ntaps; i++) { d_taps[d_ntaps-i-1] = taps[i]; } + + // Make a set of taps at all possible arch alignments + d_aligned_taps = (gr_complex**)malloc(d_naligned*sizeof(gr_complex**)); + for(int i = 0; i < d_naligned; i++) { + d_aligned_taps[i] = fft::malloc_complex(d_ntaps+d_naligned-1); + memset(d_aligned_taps[i], 0, sizeof(gr_complex)*(d_ntaps+d_naligned-1)); + memcpy(&d_aligned_taps[i][i], d_taps, sizeof(gr_complex)*(d_ntaps)); + } } std::vector<gr_complex> @@ -270,9 +299,13 @@ namespace gr { gr_complex fir_filter_ccc::filter(const gr_complex input[]) { - gr_complex output; - volk_32fc_x2_dot_prod_32fc_u(&output, input, d_taps, d_ntaps); - return output; + const gr_complex *ar = (gr_complex *)((unsigned long) input & ~(d_align-1)); + unsigned al = input - ar; + + volk_32fc_x2_dot_prod_32fc_a(d_output, ar, + d_aligned_taps[al], + (d_ntaps+al)*sizeof(gr_complex)); + return *d_output; } void |