summaryrefslogtreecommitdiff
path: root/gr-filter
diff options
context:
space:
mode:
authorTom Rondeau <trondeau@vt.edu>2012-06-13 14:53:41 -0400
committerTom Rondeau <trondeau@vt.edu>2012-06-13 14:53:41 -0400
commit9e2e896e9d4dbc4627702cde82a48e9ee5136f26 (patch)
treefb62fd58ec90f34e080d1dc2f52b2200b57fa0ed /gr-filter
parent46bd64dc8231402535bad742a74250599e595148 (diff)
filter: process 4 vectors each time in volk dot_prod to speed up fir filters.
This makes the volk version of the SSE FIR filter the same speed as using the hand-crafted float_dotprod from before.
Diffstat (limited to 'gr-filter')
-rw-r--r--gr-filter/include/filter/fir_filter.h2
-rw-r--r--gr-filter/lib/fir_filter.cc38
2 files changed, 36 insertions, 4 deletions
diff --git a/gr-filter/include/filter/fir_filter.h b/gr-filter/include/filter/fir_filter.h
index 1eb70f92a4..e6194df7b5 100644
--- a/gr-filter/include/filter/fir_filter.h
+++ b/gr-filter/include/filter/fir_filter.h
@@ -102,7 +102,7 @@ namespace gr {
gr_complex filter(const gr_complex input[]);
void filterN(gr_complex output[],
const gr_complex input[],
- unsigned long n)
+ unsigned long n);
void filterNdec(gr_complex output[],
const gr_complex input[],
unsigned long n,
diff --git a/gr-filter/lib/fir_filter.cc b/gr-filter/lib/fir_filter.cc
index 098dd8d367..c6e179246d 100644
--- a/gr-filter/lib/fir_filter.cc
+++ b/gr-filter/lib/fir_filter.cc
@@ -24,6 +24,7 @@
#include <fft/fft.h>
#include <volk/volk.h>
#include <cstdio>
+#include <float_dotprod_x86.h>
namespace gr {
namespace filter {
@@ -101,12 +102,16 @@ namespace gr {
{
return d_ntaps;
}
-
+
+ /*
float
fir_filter_fff::filter(const float input[])
{
volk_32f_x2_dot_prod_32f_a(d_output, input,
- d_aligned_taps[d_offset], d_ntaps+3);
+ d_aligned_taps[d_offset],
+ (d_ntaps + d_offset - 1) / 4 + 1);
+ //*d_output = float_dotprod_sse(input, d_aligned_taps[d_offset],
+ // (d_ntaps + d_offset - 1) / 4 + 1);
return *d_output;
}
@@ -126,7 +131,34 @@ namespace gr {
j += (d_offset == 0 ? 4 : 0);
}
}
+ */
+
+ float
+ fir_filter_fff::filter(const float input[])
+ {
+ //unsigned long ar = ((unsigned long) input);
+ //int off = (ar - (ar & ~15))/4;
+
+ const float *ar = (float *)((unsigned long) input & ~15);
+ unsigned al = input - ar;
+
+ volk_32f_x2_dot_prod_32f_a(d_output, ar,
+ d_aligned_taps[al],
+ (d_ntaps + al - 1) / 4 + 1);
+ //*d_output = float_dotprod_sse(input, d_aligned_taps[d_offset],
+ // (d_ntaps + d_offset - 1) / 4 + 1);
+ return *d_output;
+ }
+ void
+ fir_filter_fff::filterN(float output[],
+ const float input[],
+ unsigned long n)
+ {
+ for(unsigned long i = 0; i < n; i++) {
+ output[i] = filter(&input[i]);
+ }
+ }
void
fir_filter_fff::filterNdec(float output[],
@@ -136,7 +168,7 @@ namespace gr {
{
unsigned long j = 0;
for(unsigned long i = 0; i < n; i++) {
- filterN(&output[i], &input[j], 1);
+ output[i] = filter(&input[j]);
j += decimate;
}
}