summaryrefslogtreecommitdiff
path: root/gr-filter
diff options
context:
space:
mode:
authorTom Rondeau <trondeau@vt.edu>2012-06-10 19:35:55 -0400
committerTom Rondeau <trondeau@vt.edu>2012-06-10 22:35:59 -0400
commit227448e247ba720b87b99e1b9382cd3737241ab3 (patch)
treec466e391fb6391b9020457486f852f6f86c6fd20 /gr-filter
parent14532d8da0f40f2b58595bd7e217004bdbfc90e3 (diff)
filter: working aligned taps loads for regular and decimating filter.
Has working QA code but needs significant performance improvements.
Diffstat (limited to 'gr-filter')
-rw-r--r--gr-filter/include/filter/fir_filter.h12
-rw-r--r--gr-filter/lib/fir_filter.cc49
-rw-r--r--gr-filter/lib/fir_filter_XXX_impl.cc.t8
-rwxr-xr-xgr-filter/python/qa_fir_filter.py13
4 files changed, 65 insertions, 17 deletions
diff --git a/gr-filter/include/filter/fir_filter.h b/gr-filter/include/filter/fir_filter.h
index fba4641bf3..1eb70f92a4 100644
--- a/gr-filter/include/filter/fir_filter.h
+++ b/gr-filter/include/filter/fir_filter.h
@@ -41,8 +41,8 @@ namespace gr {
void set_taps(const std::vector<float> &taps);
std::vector<float> taps() const;
unsigned int ntaps() const;
-
- float filter(const float input[]);
+
+ inline float filter(const float input[]);
void filterN(float output[],
const float input[],
unsigned long n);
@@ -53,8 +53,10 @@ namespace gr {
private:
unsigned int d_ntaps;
- float *d_taps;
- float *d_aligned_taps[4];
+ float *d_taps;
+ float **d_aligned_taps;
+ int d_offset;
+ float *d_output;
};
/**************************************************************/
@@ -100,7 +102,7 @@ namespace gr {
gr_complex filter(const gr_complex input[]);
void filterN(gr_complex output[],
const gr_complex input[],
- unsigned long n);
+ unsigned long n)
void filterNdec(gr_complex output[],
const gr_complex input[],
unsigned long n,
diff --git a/gr-filter/lib/fir_filter.cc b/gr-filter/lib/fir_filter.cc
index 3abcda53e9..098dd8d367 100644
--- a/gr-filter/lib/fir_filter.cc
+++ b/gr-filter/lib/fir_filter.cc
@@ -23,6 +23,7 @@
#include <filter/fir_filter.h>
#include <fft/fft.h>
#include <volk/volk.h>
+#include <cstdio>
namespace gr {
namespace filter {
@@ -33,15 +34,29 @@ namespace gr {
{
d_taps = NULL;
set_taps(taps);
+ d_offset = 0;
+
+ // Make sure the output sample is always aligned, too.
+ d_output = fft::malloc_float(1);
}
fir_filter_fff::~fir_filter_fff()
{
+ // Free taps
if(d_taps != NULL) {
fft::free(d_taps);
d_taps = NULL;
}
- }
+
+ // Free all aligned taps
+ for(int i = 0; i < 4; i++) {
+ fft::free(d_aligned_taps[i]);
+ }
+ fft::free(d_aligned_taps);
+
+ // Free output sample
+ fft::free(d_output);
+ }
void
fir_filter_fff::set_taps(const std::vector<float> &taps)
@@ -50,6 +65,11 @@ namespace gr {
if(d_taps != NULL) {
fft::free(d_taps);
d_taps = NULL;
+
+ for(int i = 0; i < 4; i++) {
+ fft::free(d_aligned_taps[i]);
+ }
+ fft::free(d_aligned_taps);
}
d_ntaps = (int)taps.size();
@@ -58,6 +78,8 @@ namespace gr {
d_taps[d_ntaps-i-1] = taps[i];
}
+ // Make a set of taps at all possible arch alignments
+ d_aligned_taps = (float**)malloc(4*sizeof(float**));
for(int i = 0; i < 4; i++) {
d_aligned_taps[i] = fft::malloc_float(d_ntaps+3);
memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+3));
@@ -83,12 +105,9 @@ namespace gr {
float
fir_filter_fff::filter(const float input[])
{
- float output;
-
- //const float *ar = (float*)((unsigned long)input & ~15);
-
- volk_32f_x2_dot_prod_32f_u(&output, input, d_taps, d_ntaps);
- return output;
+ volk_32f_x2_dot_prod_32f_a(d_output, input,
+ d_aligned_taps[d_offset], d_ntaps+3);
+ return *d_output;
}
void
@@ -96,8 +115,16 @@ namespace gr {
const float input[],
unsigned long n)
{
- for(unsigned long i = 0; i < n; i++)
- output[i] = filter(&input[i]);
+ unsigned long ar = ((unsigned long) input);
+ int off = (ar - (ar & ~15))/4;
+
+ int j = -off;
+ d_offset = off;
+ for(unsigned long i = 0; i < n; i++) {
+ output[i] = filter(&input[j]);
+ d_offset= (d_offset+1) & 0x03;
+ j += (d_offset == 0 ? 4 : 0);
+ }
}
@@ -108,8 +135,8 @@ namespace gr {
unsigned int decimate)
{
unsigned long j = 0;
- for(unsigned long i = 0; i < n; i++){
- output[i] = filter(&input[j]);
+ for(unsigned long i = 0; i < n; i++) {
+ filterN(&output[i], &input[j], 1);
j += decimate;
}
}
diff --git a/gr-filter/lib/fir_filter_XXX_impl.cc.t b/gr-filter/lib/fir_filter_XXX_impl.cc.t
index c3637042d7..18bec38be3 100644
--- a/gr-filter/lib/fir_filter_XXX_impl.cc.t
+++ b/gr-filter/lib/fir_filter_XXX_impl.cc.t
@@ -26,6 +26,7 @@
#include "@IMPL_NAME@.h"
#include <gr_io_signature.h>
+#include <volk/volk.h>
namespace gr {
namespace filter {
@@ -47,6 +48,10 @@ namespace gr {
d_fir = new kernel::@BASE_NAME@(decimation, taps);
d_updated = false;
set_history(d_fir->ntaps());
+
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(float);
+ set_alignment(std::max(1, alignment_multiple));
}
@IMPL_NAME@::~@IMPL_NAME@()
@@ -85,7 +90,8 @@ namespace gr {
d_fir->filterN(out, in, noutput_items);
}
else {
- d_fir->filterNdec(out, in, noutput_items, decimation());
+ d_fir->filterNdec(out, in, noutput_items,
+ decimation());
}
return noutput_items;
diff --git a/gr-filter/python/qa_fir_filter.py b/gr-filter/python/qa_fir_filter.py
index 38bfd9ea51..93974bb89a 100755
--- a/gr-filter/python/qa_fir_filter.py
+++ b/gr-filter/python/qa_fir_filter.py
@@ -41,6 +41,19 @@ class test_filter(gr_unittest.TestCase):
result_data = dst.data()
self.assertFloatTuplesAlmostEqual(expected_data, result_data, 5)
+ def test_fir_filter_fff_002(self):
+ src_data = 10*[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+ expected_data = [0.0,] + 4*[3.5, 7.5, 1.5, 5.5, 4.5,] + [3.5, 7.5, 1.5, 5.5]
+ src = gr.vector_source_f(src_data)
+ op = filter.fir_filter_fff(4, [0.5, 0.5])
+ dst = gr.vector_sink_f()
+ self.tb.connect(src, op, dst)
+ self.tb.run()
+ result_data = dst.data()
+ print result_data
+ print expected_data
+ self.assertFloatTuplesAlmostEqual(expected_data, result_data, 5)
+
def test_fir_filter_ccf_001(self):
src_data = [1+1j, 2+2j, 3+3j, 4+4j]
expected_data = [0.5+0.5j, 1.5+1.5j, 2.5+2.5j, 3.5+3.5j]