summaryrefslogtreecommitdiff
path: root/gr-filter
diff options
context:
space:
mode:
authorTom Rondeau <trondeau@vt.edu>2012-06-14 12:44:41 -0400
committerTom Rondeau <trondeau@vt.edu>2012-06-14 16:13:13 -0400
commit4e2f38528e90ee4a05644d48d015b098a8651405 (patch)
treeb0da1219ef0e10a770d0a04ee72e7dc425a17353 /gr-filter
parentdae3b39098c16573f9c70e77f1a9a6b09ecfd041 (diff)
filter: switched ccc FIR filter to use aligned VOLK calls.
Diffstat (limited to 'gr-filter')
-rw-r--r--gr-filter/include/filter/fir_filter.h8
-rw-r--r--gr-filter/lib/fir_filter.cc39
2 files changed, 44 insertions, 3 deletions
diff --git a/gr-filter/include/filter/fir_filter.h b/gr-filter/include/filter/fir_filter.h
index 99acb7a0a2..3ce21d3049 100644
--- a/gr-filter/include/filter/fir_filter.h
+++ b/gr-filter/include/filter/fir_filter.h
@@ -85,6 +85,10 @@ namespace gr {
private:
unsigned int d_ntaps;
gr_complex *d_taps;
+ float **d_aligned_taps;
+ gr_complex *d_output;
+ int d_align;
+ int d_naligned;
};
/**************************************************************/
@@ -112,6 +116,10 @@ namespace gr {
private:
unsigned int d_ntaps;
gr_complex *d_taps;
+ gr_complex **d_aligned_taps;
+ gr_complex *d_output;
+ int d_align;
+ int d_naligned;
};
} /* namespace kernel */
diff --git a/gr-filter/lib/fir_filter.cc b/gr-filter/lib/fir_filter.cc
index 1e59902949..973be45d4b 100644
--- a/gr-filter/lib/fir_filter.cc
+++ b/gr-filter/lib/fir_filter.cc
@@ -224,16 +224,32 @@ namespace gr {
fir_filter_ccc::fir_filter_ccc(int decimation,
const std::vector<gr_complex> &taps)
{
+ d_align = volk_get_alignment();
+ d_naligned = d_align / sizeof(gr_complex);
+
d_taps = NULL;
set_taps(taps);
+
+ // Make sure the output sample is always aligned, too.
+ d_output = fft::malloc_complex(1);
}
fir_filter_ccc::~fir_filter_ccc()
{
+ // Free taps
if(d_taps != NULL) {
fft::free(d_taps);
d_taps = NULL;
}
+
+ // Free all aligned taps
+ for(int i = 0; i < d_naligned; i++) {
+ fft::free(d_aligned_taps[i]);
+ }
+ fft::free(d_aligned_taps);
+
+ // Free output sample
+ fft::free(d_output);
}
void
@@ -243,6 +259,11 @@ namespace gr {
if(d_taps != NULL) {
fft::free(d_taps);
d_taps = NULL;
+
+ for(int i = 0; i < d_naligned; i++) {
+ fft::free(d_aligned_taps[i]);
+ }
+ fft::free(d_aligned_taps);
}
d_ntaps = (int)taps.size();
@@ -250,6 +271,14 @@ namespace gr {
for(unsigned int i = 0; i < d_ntaps; i++) {
d_taps[d_ntaps-i-1] = taps[i];
}
+
+ // Make a set of taps at all possible arch alignments
+ d_aligned_taps = (gr_complex**)malloc(d_naligned*sizeof(gr_complex**));
+ for(int i = 0; i < d_naligned; i++) {
+ d_aligned_taps[i] = fft::malloc_complex(d_ntaps+d_naligned-1);
+ memset(d_aligned_taps[i], 0, sizeof(gr_complex)*(d_ntaps+d_naligned-1));
+ memcpy(&d_aligned_taps[i][i], d_taps, sizeof(gr_complex)*(d_ntaps));
+ }
}
std::vector<gr_complex>
@@ -270,9 +299,13 @@ namespace gr {
gr_complex
fir_filter_ccc::filter(const gr_complex input[])
{
- gr_complex output;
- volk_32fc_x2_dot_prod_32fc_u(&output, input, d_taps, d_ntaps);
- return output;
+ const gr_complex *ar = (gr_complex *)((unsigned long) input & ~(d_align-1));
+ unsigned al = input - ar;
+
+ volk_32fc_x2_dot_prod_32fc_a(d_output, ar,
+ d_aligned_taps[al],
+ (d_ntaps+al)*sizeof(gr_complex));
+ return *d_output;
}
void