diff options
author | Johannes Demel <demel@ant.uni-bremen.de> | 2020-06-29 14:50:53 +0200 |
---|---|---|
committer | Martin Braun <martin@gnuradio.org> | 2021-01-14 04:23:07 -0800 |
commit | c8b16186cda5b195e8df6db7e3aa8715127478b7 (patch) | |
tree | 0961c2d2ab73e5c6a6053cb8fd6ee74b0a9ab0fa /gr-blocks/lib | |
parent | 51d3ad973292763a6fc0cf8e971faa3cf89029e9 (diff) |
blocks: Use templates more efficiently in add_blk
Compare recent changes in the moving_average block with those templatized
`volk_add` functions instead of whole work functions being templatized.
It should now be easier to add further templates for other block types.
Diffstat (limited to 'gr-blocks/lib')
-rw-r--r-- | gr-blocks/lib/add_blk_impl.cc | 57 |
1 files changed, 35 insertions, 22 deletions
diff --git a/gr-blocks/lib/add_blk_impl.cc b/gr-blocks/lib/add_blk_impl.cc index f0dfa2f248..49cc0d919e 100644 --- a/gr-blocks/lib/add_blk_impl.cc +++ b/gr-blocks/lib/add_blk_impl.cc @@ -20,6 +20,29 @@ namespace gr { namespace blocks { +namespace { +template <typename T> +inline void volk_add(T* out, const T* add, unsigned int num) +{ + for (unsigned int elem = 0; elem < num; elem++) { + out[elem] += add[elem]; + } +} + +template <> +inline void volk_add<float>(float* out, const float* add, unsigned int num) +{ + volk_32f_x2_add_32f(out, out, add, num); +} + +template <> +inline void volk_add<gr_complex>(gr_complex* out, const gr_complex* add, unsigned int num) +{ + volk_32fc_x2_add_32fc(out, out, add, num); +} +} // namespace + + template <class T> typename add_blk<T>::sptr add_blk<T>::make(size_t vlen) { @@ -34,25 +57,19 @@ add_blk_impl<float>::add_blk_impl(size_t vlen) io_signature::make(1, 1, sizeof(float) * vlen)), d_vlen(vlen) { - const int alignment_multiple = volk_get_alignment() / sizeof(float); - set_alignment(std::max(1, alignment_multiple)); + set_alignment(std::max(1, int(volk_get_alignment() / sizeof(float)))); } template <> -int add_blk_impl<float>::work(int noutput_items, - gr_vector_const_void_star& input_items, - gr_vector_void_star& output_items) +add_blk_impl<gr_complex>::add_blk_impl(size_t vlen) + : sync_block("add_cc", + io_signature::make(1, -1, sizeof(gr_complex) * vlen), + io_signature::make(1, 1, sizeof(gr_complex) * vlen)), + d_vlen(vlen) { - float* out = (float*)output_items[0]; - int noi = d_vlen * noutput_items; - - memcpy(out, input_items[0], noi * sizeof(float)); - for (size_t i = 1; i < input_items.size(); i++) - volk_32f_x2_add_32f(out, out, (const float*)input_items[i], noi); - return noutput_items; + set_alignment(std::max(1, int(volk_get_alignment() / sizeof(gr_complex)))); } - template <class T> add_blk_impl<T>::add_blk_impl(size_t vlen) : sync_block("add_blk", @@ -67,16 +84,12 @@ int add_blk_impl<T>::work(int noutput_items, gr_vector_const_void_star& input_items, gr_vector_void_star& output_items) { - T* optr = (T*)output_items[0]; - - int ninputs = input_items.size(); - - for (size_t i = 0; i < noutput_items * d_vlen; i++) { - T acc = ((T*)input_items[0])[i]; - for (int j = 1; j < ninputs; j++) - acc += ((T*)input_items[j])[i]; + T* out = (T*)output_items[0]; + int noi = d_vlen * noutput_items; - *optr++ = (T)acc; + memcpy(out, input_items[0], noi * sizeof(T)); + for (size_t i = 1; i < input_items.size(); i++) { + volk_add(out, (T*)input_items[i], noi); } return noutput_items; |