diff options
-rw-r--r-- | CMakeLists.txt | 14 | ||||
-rw-r--r-- | gnuradio-runtime/include/gnuradio/math.h | 25 | ||||
-rw-r--r-- | gnuradio-runtime/include/gnuradio/nco.h | 26 | ||||
-rw-r--r-- | gnuradio-runtime/include/gnuradio/sincos.h | 42 | ||||
-rw-r--r-- | gnuradio-runtime/lib/CMakeLists.txt | 1 | ||||
-rw-r--r-- | gnuradio-runtime/lib/math/sincos.cc | 62 | ||||
-rw-r--r-- | gr-blocks/include/gnuradio/blocks/control_loop.h | 24 | ||||
-rw-r--r-- | gr-blocks/lib/control_loop.cc | 22 | ||||
-rw-r--r-- | gr-digital/lib/costas_loop_cc_impl.cc | 148 | ||||
-rw-r--r-- | gr-digital/lib/costas_loop_cc_impl.h | 72 |
10 files changed, 203 insertions, 233 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 007166bbf6..066e973f6a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -419,6 +419,20 @@ message(STATUS " Override with -DENABLE_INTERNAL_VOLK=ON/OFF") find_package(LOG4CPP REQUIRED) ######################################################################## +# Setup Native Capabilities Flag +######################################################################## +option(ENABLE_NATIVE "Enable native build optimizations" OFF) +IF(UNIX) + IF (ENABLE_NATIVE) + MESSAGE(STATUS "Found GNU Radio native optimization flag. Setting native CPU optimization flags.") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -ftree-vectorize") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -ftree-vectorize") + ELSE (ENABLE_NATIVE) + MESSAGE(STATUS "Not using additional GNU Radio native architecture optimizations.") + ENDIF (ENABLE_NATIVE) +ENDIF(UNIX) + +######################################################################## # Disable complex math NaN/INFO range checking for performance ######################################################################## check_cxx_compiler_flag(-fcx-limited-range HAVE_CX_LIMITED_RANGE) diff --git a/gnuradio-runtime/include/gnuradio/math.h b/gnuradio-runtime/include/gnuradio/math.h index c0e1a7dc88..f628a5875c 100644 --- a/gnuradio-runtime/include/gnuradio/math.h +++ b/gnuradio-runtime/include/gnuradio/math.h @@ -31,12 +31,30 @@ #define GR_M_LOG2E 1.4426950408889634074 /* log_2 e */ #define GR_M_PI 3.14159265358979323846 /* pi */ #define GR_M_PI_4 0.78539816339744830961566084582 /* pi/4 */ -#define GR_M_TWOPI (2 * GR_M_PI) /* 2*pi */ +#define GR_M_TWOPI 6.28318530717958647692 /* 2*pi */ #define GR_M_SQRT2 1.41421356237309504880 /* sqrt(2) */ +#define GR_M_ONE_OVER_2PI 0.15915494309189533577 /* 1 / (2*pi) */ +#define GR_M_MINUS_TWO_PI -6.28318530717958647692 /* - 2*pi */ namespace gr { +static inline void +fast_cc_multiply(gr_complex& out, const gr_complex cc1, const gr_complex cc2) +{ + // The built-in complex.h multiply has significant NaN/INF checking that + // considerably slows down performance. While on some compilers the + // -fcx-limit-range flag can be used, this fast function makes the math consistent + // in terms of performance for the Costas loop. + float o_r, o_i; + + o_r = (cc1.real() * cc2.real()) - (cc1.imag() * cc2.imag()); + o_i = (cc1.real() * cc2.imag()) + (cc1.imag() * cc2.real()); + + out.real(o_r); + out.imag(o_i); +} + static inline bool is_power_of_2(long x) { return x != 0 && (x & (x - 1)) == 0; } /*! @@ -62,10 +80,7 @@ static inline float fast_atan2f(gr_complex z) { return fast_atan2f(z.imag(), z.r /* This bounds x by +/- clip without a branch */ static inline float branchless_clip(float x, float clip) { - float x1 = fabsf(x + clip); - float x2 = fabsf(x - clip); - x1 -= x2; - return 0.5 * x1; + return 0.5 * (std::abs(x + clip) - std::abs(x - clip)); } static inline float clip(float x, float clip) diff --git a/gnuradio-runtime/include/gnuradio/nco.h b/gnuradio-runtime/include/gnuradio/nco.h index 6542fe58de..4bcdfe686b 100644 --- a/gnuradio-runtime/include/gnuradio/nco.h +++ b/gnuradio-runtime/include/gnuradio/nco.h @@ -44,27 +44,15 @@ public: void adjust_freq(double delta_angle_rate) { phase_inc += delta_angle_rate; } // increment current phase angle - void step() - { - phase += phase_inc; - if (fabs(phase) > GR_M_PI) { - while (phase > GR_M_PI) - phase -= 2 * GR_M_PI; - - while (phase < -GR_M_PI) - phase += 2 * GR_M_PI; - } - } - - void step(int n) + void step(int n = 1) { phase += phase_inc * n; if (fabs(phase) > GR_M_PI) { while (phase > GR_M_PI) - phase -= 2 * GR_M_PI; + phase -= GR_M_TWOPI; while (phase < -GR_M_PI) - phase += 2 * GR_M_PI; + phase += GR_M_TWOPI; } } @@ -73,7 +61,7 @@ public: double get_freq() const { return phase_inc; } // compute sin and cos for current phase angle - void sincos(float* sinx, float* cosx) const; + void sincos(float* sinx, float* cosx) const { gr::sincosf(phase, sinx, cosx); } // compute cos or sin for current phase angle float cos() const { return std::cos(phase); } @@ -94,12 +82,6 @@ protected: }; template <class o_type, class i_type> -void nco<o_type, i_type>::sincos(float* sinx, float* cosx) const -{ - gr::sincosf(phase, sinx, cosx); -} - -template <class o_type, class i_type> void nco<o_type, i_type>::sin(float* output, int noutput_items, double ampl) { for (int i = 0; i < noutput_items; i++) { diff --git a/gnuradio-runtime/include/gnuradio/sincos.h b/gnuradio-runtime/include/gnuradio/sincos.h index 15241195d0..cd0f6eb0dd 100644 --- a/gnuradio-runtime/include/gnuradio/sincos.h +++ b/gnuradio-runtime/include/gnuradio/sincos.h @@ -12,12 +12,48 @@ #define INCLUDED_GR_SINCOS_H #include <gnuradio/api.h> +#include <cmath> namespace gr { -// compute sine and cosine at the same time -GR_RUNTIME_API void sincos(double x, double* sin, double* cos); -GR_RUNTIME_API void sincosf(float x, float* sin, float* cos); +#if defined(HAVE_SINCOS) + +inline void sincos(double x, double* sinx, double* cosx) { ::sincos(x, sinx, cosx); } + +#else + +inline void sincos(double x, double* sinx, double* cosx) +{ + *sinx = ::sin(x); + *cosx = ::cos(x); +} + +#endif + +// ---------------------------------------------------------------- + +#if defined(HAVE_SINCOSF) + +inline void sincosf(float x, float* sinx, float* cosx) { ::sincosf(x, sinx, cosx); } + +#elif defined(HAVE_SINF) && defined(HAVE_COSF) + +inline void sincosf(float x, float* sinx, float* cosx) +{ + *sinx = ::sinf(x); + *cosx = ::cosf(x); +} + +#else + +inline void sincosf(float x, float* sinx, float* cosx) +{ + *sinx = ::sin(x); + *cosx = ::cos(x); +} + +#endif + } // namespace gr #endif /* INCLUDED_GR_SINCOS_H */ diff --git a/gnuradio-runtime/lib/CMakeLists.txt b/gnuradio-runtime/lib/CMakeLists.txt index 04cd7b72c7..a56c68cfd7 100644 --- a/gnuradio-runtime/lib/CMakeLists.txt +++ b/gnuradio-runtime/lib/CMakeLists.txt @@ -106,7 +106,6 @@ target_sources(gnuradio-runtime PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/math/fast_atan2f.cc ${CMAKE_CURRENT_SOURCE_DIR}/math/fxpt.cc ${CMAKE_CURRENT_SOURCE_DIR}/math/random.cc - ${CMAKE_CURRENT_SOURCE_DIR}/math/sincos.cc ) # Controlport diff --git a/gnuradio-runtime/lib/math/sincos.cc b/gnuradio-runtime/lib/math/sincos.cc deleted file mode 100644 index b093e09a4a..0000000000 --- a/gnuradio-runtime/lib/math/sincos.cc +++ /dev/null @@ -1,62 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2004,2010,2013 Free Software Foundation, Inc. - * - * This file is part of GNU Radio - * - * SPDX-License-Identifier: GPL-3.0-or-later - * - */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifndef _GNU_SOURCE -#define _GNU_SOURCE // ask for GNU extensions if available -#endif - -#include <gnuradio/sincos.h> -#include <math.h> - -namespace gr { - -#if defined(HAVE_SINCOS) - -void sincos(double x, double* sinx, double* cosx) { ::sincos(x, sinx, cosx); } - -#else - -void sincos(double x, double* sinx, double* cosx) -{ - *sinx = ::sin(x); - *cosx = ::cos(x); -} - -#endif - -// ---------------------------------------------------------------- - -#if defined(HAVE_SINCOSF) - -void sincosf(float x, float* sinx, float* cosx) { ::sincosf(x, sinx, cosx); } - -#elif defined(HAVE_SINF) && defined(HAVE_COSF) - -void sincosf(float x, float* sinx, float* cosx) -{ - *sinx = ::sinf(x); - *cosx = ::cosf(x); -} - -#else - -void sincosf(float x, float* sinx, float* cosx) -{ - *sinx = ::sin(x); - *cosx = ::cos(x); -} - -#endif - -} /* namespace gr */ diff --git a/gr-blocks/include/gnuradio/blocks/control_loop.h b/gr-blocks/include/gnuradio/blocks/control_loop.h index 7aa43b531c..64d5ade620 100644 --- a/gr-blocks/include/gnuradio/blocks/control_loop.h +++ b/gr-blocks/include/gnuradio/blocks/control_loop.h @@ -12,6 +12,7 @@ #define GR_BLOCKS_CONTROL_LOOP #include <gnuradio/blocks/api.h> +#include <gnuradio/math.h> namespace gr { namespace blocks { @@ -72,7 +73,11 @@ public: /*! \brief Advance the control loop based on the current gain * settings and the inputted error signal. */ - void advance_loop(float error); + void advance_loop(float error) + { + d_freq = d_freq + d_beta * error; + d_phase = d_phase + d_freq + d_alpha * error; + } /*! \brief Keep the phase between -2pi and 2pi. * @@ -87,7 +92,13 @@ public: * method in case another way is desired as this is fairly * heavy-handed. */ - void phase_wrap(); + void phase_wrap() + { + while (d_phase > GR_M_TWOPI) + d_phase -= GR_M_TWOPI; + while (d_phase < -GR_M_TWOPI) + d_phase += GR_M_TWOPI; + } /*! \brief Keep the frequency between d_min_freq and d_max_freq. * @@ -102,7 +113,14 @@ public: * method in case another way is desired as this is fairly * heavy-handed. */ - void frequency_limit(); + void frequency_limit() + { + if (d_freq > d_max_freq) + d_freq = d_max_freq; + else if (d_freq < d_min_freq) + d_freq = d_min_freq; + } + /******************************************************************* * SET FUNCTIONS diff --git a/gr-blocks/lib/control_loop.cc b/gr-blocks/lib/control_loop.cc index 099bc20fb9..f98d0c795b 100644 --- a/gr-blocks/lib/control_loop.cc +++ b/gr-blocks/lib/control_loop.cc @@ -40,28 +40,6 @@ void control_loop::update_gains() d_beta = (4 * d_loop_bw * d_loop_bw) / denom; } -void control_loop::advance_loop(float error) -{ - d_freq = d_freq + d_beta * error; - d_phase = d_phase + d_freq + d_alpha * error; -} - -void control_loop::phase_wrap() -{ - while (d_phase > M_TWOPI) - d_phase -= M_TWOPI; - while (d_phase < -M_TWOPI) - d_phase += M_TWOPI; -} - -void control_loop::frequency_limit() -{ - if (d_freq > d_max_freq) - d_freq = d_max_freq; - else if (d_freq < d_min_freq) - d_freq = d_min_freq; -} - /******************************************************************* * SET FUNCTIONS *******************************************************************/ diff --git a/gr-digital/lib/costas_loop_cc_impl.cc b/gr-digital/lib/costas_loop_cc_impl.cc index 930183c0c0..5f2cba275e 100644 --- a/gr-digital/lib/costas_loop_cc_impl.cc +++ b/gr-digital/lib/costas_loop_cc_impl.cc @@ -37,7 +37,8 @@ costas_loop_cc_impl::costas_loop_cc_impl(float loop_bw, unsigned int order, bool blocks::control_loop(loop_bw, 1.0, -1.0), d_error(0), d_noise(1.0), - d_phase_detector(choose_phase_detector(order, use_snr)) + d_use_snr(use_snr), + d_order(order) { message_port_register_in(pmt::mp("noise")); set_msg_handler(pmt::mp("noise"), @@ -46,96 +47,6 @@ costas_loop_cc_impl::costas_loop_cc_impl(float loop_bw, unsigned int order, bool costas_loop_cc_impl::~costas_loop_cc_impl() {} -costas_loop_cc_impl::d_phase_detector_t -costas_loop_cc_impl::choose_phase_detector(unsigned int order, bool use_snr) -{ - switch (order) { - case 2: - if (use_snr) { - return &costas_loop_cc_impl::phase_detector_snr_2; - } - return &costas_loop_cc_impl::phase_detector_2; - - case 4: - if (use_snr) { - return &costas_loop_cc_impl::phase_detector_snr_4; - } - return &costas_loop_cc_impl::phase_detector_4; - - case 8: - if (use_snr) { - return &costas_loop_cc_impl::phase_detector_snr_8; - } - return &costas_loop_cc_impl::phase_detector_8; - } - throw std::invalid_argument("order must be 2, 4, or 8"); -} - -float costas_loop_cc_impl::phase_detector_8(gr_complex sample) const -{ - /* This technique splits the 8PSK constellation into 2 squashed - QPSK constellations, one when I is larger than Q and one - where Q is larger than I. The error is then calculated - proportionally to these squashed constellations by the const - K = sqrt(2)-1. - - The signal magnitude must be > 1 or K will incorrectly bias - the error value. - - Ref: Z. Huang, Z. Yi, M. Zhang, K. Wang, "8PSK demodulation for - new generation DVB-S2", IEEE Proc. Int. Conf. Communications, - Circuits and Systems, Vol. 2, pp. 1447 - 1450, 2004. - */ - - const float K = (sqrtf(2.0) - 1); - if (fabsf(sample.real()) >= fabsf(sample.imag())) { - return ((sample.real() > 0 ? 1.0 : -1.0) * sample.imag() - - (sample.imag() > 0 ? 1.0 : -1.0) * sample.real() * K); - } else { - return ((sample.real() > 0 ? 1.0 : -1.0) * sample.imag() * K - - (sample.imag() > 0 ? 1.0 : -1.0) * sample.real()); - } -} - -float costas_loop_cc_impl::phase_detector_4(gr_complex sample) const -{ - return ((sample.real() > 0 ? 1.0 : -1.0) * sample.imag() - - (sample.imag() > 0 ? 1.0 : -1.0) * sample.real()); -} - -float costas_loop_cc_impl::phase_detector_2(gr_complex sample) const -{ - return (sample.real() * sample.imag()); -} - -float costas_loop_cc_impl::phase_detector_snr_8(gr_complex sample) const -{ - const float K = (sqrtf(2.0) - 1); - const float snr = std::norm(sample) / d_noise; - if (fabsf(sample.real()) >= fabsf(sample.imag())) { - return ((blocks::tanhf_lut(snr * sample.real()) * sample.imag()) - - (blocks::tanhf_lut(snr * sample.imag()) * sample.real() * K)); - } else { - return ((blocks::tanhf_lut(snr * sample.real()) * sample.imag() * K) - - (blocks::tanhf_lut(snr * sample.imag()) * sample.real())); - } -} - -float costas_loop_cc_impl::phase_detector_snr_4(gr_complex sample) const -{ - const float snr = std::norm(sample) / d_noise; - return ((blocks::tanhf_lut(snr * sample.real()) * sample.imag()) - - (blocks::tanhf_lut(snr * sample.imag()) * sample.real())); -} - -float costas_loop_cc_impl::phase_detector_snr_2(gr_complex sample) const -{ - const float snr = std::norm(sample) / d_noise; - return blocks::tanhf_lut(snr * sample.real()) * sample.imag(); -} - -float costas_loop_cc_impl::error() const { return d_error; } - void costas_loop_cc_impl::handle_set_noise(pmt::pmt_t msg) { if (pmt::is_real(msg)) { @@ -154,8 +65,6 @@ int costas_loop_cc_impl::work(int noutput_items, float* phase_optr = output_items.size() >= 3 ? (float*)output_items[2] : NULL; float* error_optr = output_items.size() >= 4 ? (float*)output_items[3] : NULL; - gr_complex nco_out; - std::vector<tag_t> tags; get_tags_in_range(tags, 0, @@ -163,6 +72,15 @@ int costas_loop_cc_impl::work(int noutput_items, nitems_read(0) + noutput_items, pmt::intern("phase_est")); + // Get this out of the for loop if not used: + bool has_additional_outputs = false; + if (freq_optr) + has_additional_outputs = true; + else if (phase_optr) + has_additional_outputs = true; + else if (error_optr) + has_additional_outputs = true; + for (int i = 0; i < noutput_items; i++) { if (!tags.empty()) { if (tags[0].offset - nitems_read(0) == (size_t)i) { @@ -171,22 +89,46 @@ int costas_loop_cc_impl::work(int noutput_items, } } - nco_out = gr_expj(-d_phase); - optr[i] = iptr[i] * nco_out; - - d_error = (*this.*d_phase_detector)(optr[i]); + const gr_complex nco_out = gr_expj(-d_phase); + + gr::fast_cc_multiply(optr[i], iptr[i], nco_out); + + // EXPENSIVE LINE with function pointer, switch was about 20% faster in testing. + // Left in for logic justification/reference. d_error = phase_detector_2(optr[i]); + switch (d_order) { + case 2: + if (d_use_snr) + d_error = phase_detector_snr_2(optr[i]); + else + d_error = phase_detector_2(optr[i]); + break; + case 4: + if (d_use_snr) + d_error = phase_detector_snr_4(optr[i]); + else + d_error = phase_detector_4(optr[i]); + break; + case 8: + if (d_use_snr) + d_error = phase_detector_snr_8(optr[i]); + else + d_error = phase_detector_8(optr[i]); + break; + } d_error = gr::branchless_clip(d_error, 1.0); advance_loop(d_error); phase_wrap(); frequency_limit(); - if (freq_optr != NULL) - freq_optr[i] = d_freq; - if (phase_optr != NULL) - phase_optr[i] = d_phase; - if (error_optr != NULL) - error_optr[i] = d_error; + if (has_additional_outputs) { + if (freq_optr) + freq_optr[i] = d_freq; + if (phase_optr) + phase_optr[i] = d_phase; + if (error_optr) + error_optr[i] = d_error; + } } return noutput_items; diff --git a/gr-digital/lib/costas_loop_cc_impl.h b/gr-digital/lib/costas_loop_cc_impl.h index 7d90f9fc12..fa33bbec8a 100644 --- a/gr-digital/lib/costas_loop_cc_impl.h +++ b/gr-digital/lib/costas_loop_cc_impl.h @@ -22,28 +22,60 @@ class costas_loop_cc_impl : public costas_loop_cc private: float d_error; float d_noise; + bool d_use_snr; + int d_order; /*! \brief the phase detector circuit for 8th-order PSK loops. * * \param sample complex sample * \return the phase error */ - float phase_detector_8(gr_complex sample) const; // for 8PSK + float phase_detector_8(gr_complex sample) const // for 8PSK + { + /* This technique splits the 8PSK constellation into 2 squashed + QPSK constellations, one when I is larger than Q and one + where Q is larger than I. The error is then calculated + proportionally to these squashed constellations by the const + K = sqrt(2)-1. + + The signal magnitude must be > 1 or K will incorrectly bias + the error value. + + Ref: Z. Huang, Z. Yi, M. Zhang, K. Wang, "8PSK demodulation for + new generation DVB-S2", IEEE Proc. Int. Conf. Communications, + Circuits and Systems, Vol. 2, pp. 1447 - 1450, 2004. + */ + + const float K = (sqrtf(2.0) - 1); + if (fabsf(sample.real()) >= fabsf(sample.imag())) { + return ((sample.real() > 0.0f ? 1.0f : -1.0f) * sample.imag() - + (sample.imag() > 0.0f ? 1.0f : -1.0f) * sample.real() * K); + } else { + return ((sample.real() > 0.0f ? 1.0f : -1.0f) * sample.imag() * K - + (sample.imag() > 0.0f ? 1.0f : -1.0f) * sample.real()); + } + }; /*! \brief the phase detector circuit for fourth-order loops. * * \param sample complex sample * \return the phase error */ - float phase_detector_4(gr_complex sample) const; // for QPSK + float phase_detector_4(gr_complex sample) const // for QPSK + { + return ((sample.real() > 0.0f ? 1.0f : -1.0f) * sample.imag() - + (sample.imag() > 0.0f ? 1.0f : -1.0f) * sample.real()); + }; /*! \brief the phase detector circuit for second-order loops. * * \param sample a complex sample * \return the phase error */ - float phase_detector_2(gr_complex sample) const; // for BPSK - + float phase_detector_2(gr_complex sample) const // for BPSK + { + return (sample.real() * sample.imag()); + } /*! \brief the phase detector circuit for 8th-order PSK * loops. Uses tanh instead of slicing and the noise estimate @@ -52,7 +84,18 @@ private: * \param sample complex sample * \return the phase error */ - float phase_detector_snr_8(gr_complex sample) const; // for 8PSK + float phase_detector_snr_8(gr_complex sample) const // for 8PSK + { + const float K = (sqrtf(2.0) - 1.0); + const float snr = std::norm(sample) / d_noise; + if (fabsf(sample.real()) >= fabsf(sample.imag())) { + return ((blocks::tanhf_lut(snr * sample.real()) * sample.imag()) - + (blocks::tanhf_lut(snr * sample.imag()) * sample.real() * K)); + } else { + return ((blocks::tanhf_lut(snr * sample.real()) * sample.imag() * K) - + (blocks::tanhf_lut(snr * sample.imag()) * sample.real())); + } + }; /*! \brief the phase detector circuit for fourth-order * loops. Uses tanh instead of slicing and the noise estimate @@ -61,7 +104,12 @@ private: * \param sample complex sample * \return the phase error */ - float phase_detector_snr_4(gr_complex sample) const; // for QPSK + float phase_detector_snr_4(gr_complex sample) const // for QPSK + { + const float snr = std::norm(sample) / d_noise; + return ((blocks::tanhf_lut(snr * sample.real()) * sample.imag()) - + (blocks::tanhf_lut(snr * sample.imag()) * sample.real())); + }; /*! \brief the phase detector circuit for second-order * loops. Uses tanh instead of slicing and the noise estimate @@ -70,17 +118,17 @@ private: * \param sample a complex sample * \return the phase error */ - float phase_detector_snr_2(gr_complex sample) const; // for BPSK - - typedef float (costas_loop_cc_impl::*d_phase_detector_t)(gr_complex sample) const; - static d_phase_detector_t choose_phase_detector(unsigned int order, bool use_snr); - const d_phase_detector_t d_phase_detector; + float phase_detector_snr_2(gr_complex sample) const // for BPSK + { + const float snr = std::norm(sample) / d_noise; + return blocks::tanhf_lut(snr * sample.real()) * sample.imag(); + }; public: costas_loop_cc_impl(float loop_bw, unsigned int order, bool use_snr = false); ~costas_loop_cc_impl(); - float error() const; + float error() const { return d_error; }; void handle_set_noise(pmt::pmt_t msg); |