gr-digital: Improve Performance of Costas Loop

This update is focused on improving the throughput of the Costas loop, however some changes are more global performance enhancements as this PR has evolved. Updates include an ENABLE_NATIVE added to CMake, which is off by default but enables native compiling (including FMA support) if desired; sincos was inlined in sincos.h and sincos.cc was removed from the appropriate CMake to improve sincos speed, some constants were added to math.h, inlined functions in costas loop and nco.h, used switch instead of function pointer (much faster), and used fast complex multiply to get around all the range checking in the standard complex.h complex multiply function on all builds.
author: ghostop14 <ghostop14@gmail.com> 2020-01-29 17:20:16 -0500
committer: Michael Dickens <michael.dickens@ettus.com> 2020-02-14 10:05:12 -0500
commit: aa0bd44efbf8afdfd93d627e486c1427426b76f9 (patch)
tree: dd2acb4ebed83dc5a553678ba46b76424690b90a
parent: 36680f338e0b7ae7e5bcf7d2a860527ca7b14dfe (diff)
10 files changed, 203 insertions, 233 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 007166bbf6..066e973f6a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -419,6 +419,20 @@ message(STATUS "  Override with -DENABLE_INTERNAL_VOLK=ON/OFF")
 find_package(LOG4CPP REQUIRED)
 
 ########################################################################
+# Setup Native Capabilities Flag
+########################################################################
+option(ENABLE_NATIVE "Enable native build optimizations" OFF)
+IF(UNIX)
+    IF (ENABLE_NATIVE)
+        MESSAGE(STATUS "Found GNU Radio native optimization flag.  Setting native CPU optimization flags.")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -ftree-vectorize")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -ftree-vectorize")
+    ELSE (ENABLE_NATIVE)
+        MESSAGE(STATUS "Not using additional GNU Radio native architecture optimizations.")
+    ENDIF (ENABLE_NATIVE)
+ENDIF(UNIX)
+
+########################################################################
 # Disable complex math NaN/INFO range checking for performance
 ########################################################################
 check_cxx_compiler_flag(-fcx-limited-range HAVE_CX_LIMITED_RANGE)
diff --git a/gnuradio-runtime/include/gnuradio/math.h b/gnuradio-runtime/include/gnuradio/math.h
index c0e1a7dc88..f628a5875c 100644
--- a/gnuradio-runtime/include/gnuradio/math.h
+++ b/gnuradio-runtime/include/gnuradio/math.h
@@ -31,12 +31,30 @@
 #define GR_M_LOG2E 1.4426950408889634074          /* log_2 e */
 #define GR_M_PI 3.14159265358979323846            /* pi */
 #define GR_M_PI_4 0.78539816339744830961566084582 /* pi/4 */
-#define GR_M_TWOPI (2 * GR_M_PI)                  /* 2*pi */
+#define GR_M_TWOPI 6.28318530717958647692         /* 2*pi */
 #define GR_M_SQRT2 1.41421356237309504880         /* sqrt(2) */
+#define GR_M_ONE_OVER_2PI 0.15915494309189533577  /* 1 / (2*pi) */
+#define GR_M_MINUS_TWO_PI -6.28318530717958647692 /* - 2*pi */
 
 
 namespace gr {
 
+static inline void
+fast_cc_multiply(gr_complex& out, const gr_complex cc1, const gr_complex cc2)
+{
+    // The built-in complex.h multiply has significant NaN/INF checking that
+    // considerably slows down performance.  While on some compilers the
+    // -fcx-limit-range flag can be used, this fast function makes the math consistent
+    // in terms of performance for the Costas loop.
+    float o_r, o_i;
+
+    o_r = (cc1.real() * cc2.real()) - (cc1.imag() * cc2.imag());
+    o_i = (cc1.real() * cc2.imag()) + (cc1.imag() * cc2.real());
+
+    out.real(o_r);
+    out.imag(o_i);
+}
+
 static inline bool is_power_of_2(long x) { return x != 0 && (x & (x - 1)) == 0; }
 
 /*!
@@ -62,10 +80,7 @@ static inline float fast_atan2f(gr_complex z) { return fast_atan2f(z.imag(), z.r
 /* This bounds x by +/- clip without a branch */
 static inline float branchless_clip(float x, float clip)
 {
-    float x1 = fabsf(x + clip);
-    float x2 = fabsf(x - clip);
-    x1 -= x2;
-    return 0.5 * x1;
+    return 0.5 * (std::abs(x + clip) - std::abs(x - clip));
 }
 
 static inline float clip(float x, float clip)
diff --git a/gnuradio-runtime/include/gnuradio/nco.h b/gnuradio-runtime/include/gnuradio/nco.h
index 6542fe58de..4bcdfe686b 100644
--- a/gnuradio-runtime/include/gnuradio/nco.h
+++ b/gnuradio-runtime/include/gnuradio/nco.h
@@ -44,27 +44,15 @@ public:
     void adjust_freq(double delta_angle_rate) { phase_inc += delta_angle_rate; }
 
     // increment current phase angle
-    void step()
-    {
-        phase += phase_inc;
-        if (fabs(phase) > GR_M_PI) {
-            while (phase > GR_M_PI)
-                phase -= 2 * GR_M_PI;
-
-            while (phase < -GR_M_PI)
-                phase += 2 * GR_M_PI;
-        }
-    }
-
-    void step(int n)
+    void step(int n = 1)
     {
         phase += phase_inc * n;
         if (fabs(phase) > GR_M_PI) {
             while (phase > GR_M_PI)
-                phase -= 2 * GR_M_PI;
+                phase -= GR_M_TWOPI;
 
             while (phase < -GR_M_PI)
-                phase += 2 * GR_M_PI;
+                phase += GR_M_TWOPI;
         }
     }
 
@@ -73,7 +61,7 @@ public:
     double get_freq() const { return phase_inc; }
 
     // compute sin and cos for current phase angle
-    void sincos(float* sinx, float* cosx) const;
+    void sincos(float* sinx, float* cosx) const { gr::sincosf(phase, sinx, cosx); }
 
     // compute cos or sin for current phase angle
     float cos() const { return std::cos(phase); }
@@ -94,12 +82,6 @@ protected:
 };
 
 template <class o_type, class i_type>
-void nco<o_type, i_type>::sincos(float* sinx, float* cosx) const
-{
-    gr::sincosf(phase, sinx, cosx);
-}
-
-template <class o_type, class i_type>
 void nco<o_type, i_type>::sin(float* output, int noutput_items, double ampl)
 {
     for (int i = 0; i < noutput_items; i++) {
diff --git a/gnuradio-runtime/include/gnuradio/sincos.h b/gnuradio-runtime/include/gnuradio/sincos.h
index 15241195d0..cd0f6eb0dd 100644
--- a/gnuradio-runtime/include/gnuradio/sincos.h
+++ b/gnuradio-runtime/include/gnuradio/sincos.h
@@ -12,12 +12,48 @@
 #define INCLUDED_GR_SINCOS_H
 
 #include <gnuradio/api.h>
+#include <cmath>
 
 namespace gr {
 
-// compute sine and cosine at the same time
-GR_RUNTIME_API void sincos(double x, double* sin, double* cos);
-GR_RUNTIME_API void sincosf(float x, float* sin, float* cos);
+#if defined(HAVE_SINCOS)
+
+inline void sincos(double x, double* sinx, double* cosx) { ::sincos(x, sinx, cosx); }
+
+#else
+
+inline void sincos(double x, double* sinx, double* cosx)
+{
+    *sinx = ::sin(x);
+    *cosx = ::cos(x);
+}
+
+#endif
+
+// ----------------------------------------------------------------
+
+#if defined(HAVE_SINCOSF)
+
+inline void sincosf(float x, float* sinx, float* cosx) { ::sincosf(x, sinx, cosx); }
+
+#elif defined(HAVE_SINF) && defined(HAVE_COSF)
+
+inline void sincosf(float x, float* sinx, float* cosx)
+{
+    *sinx = ::sinf(x);
+    *cosx = ::cosf(x);
+}
+
+#else
+
+inline void sincosf(float x, float* sinx, float* cosx)
+{
+    *sinx = ::sin(x);
+    *cosx = ::cos(x);
+}
+
+#endif
+
 } // namespace gr
 
 #endif /* INCLUDED_GR_SINCOS_H */
diff --git a/gnuradio-runtime/lib/CMakeLists.txt b/gnuradio-runtime/lib/CMakeLists.txt
index 04cd7b72c7..a56c68cfd7 100644
--- a/gnuradio-runtime/lib/CMakeLists.txt
+++ b/gnuradio-runtime/lib/CMakeLists.txt
@@ -106,7 +106,6 @@ target_sources(gnuradio-runtime PRIVATE
   ${CMAKE_CURRENT_SOURCE_DIR}/math/fast_atan2f.cc
   ${CMAKE_CURRENT_SOURCE_DIR}/math/fxpt.cc
   ${CMAKE_CURRENT_SOURCE_DIR}/math/random.cc
-  ${CMAKE_CURRENT_SOURCE_DIR}/math/sincos.cc
   )
 
 # Controlport
diff --git a/gnuradio-runtime/lib/math/sincos.cc b/gnuradio-runtime/lib/math/sincos.cc
deleted file mode 100644
index b093e09a4a..0000000000
--- a/gnuradio-runtime/lib/math/sincos.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2004,2010,2013 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * SPDX-License-Identifier: GPL-3.0-or-later
- *
- */
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE // ask for GNU extensions if available
-#endif
-
-#include <gnuradio/sincos.h>
-#include <math.h>
-
-namespace gr {
-
-#if defined(HAVE_SINCOS)
-
-void sincos(double x, double* sinx, double* cosx) { ::sincos(x, sinx, cosx); }
-
-#else
-
-void sincos(double x, double* sinx, double* cosx)
-{
-    *sinx = ::sin(x);
-    *cosx = ::cos(x);
-}
-
-#endif
-
-// ----------------------------------------------------------------
-
-#if defined(HAVE_SINCOSF)
-
-void sincosf(float x, float* sinx, float* cosx) { ::sincosf(x, sinx, cosx); }
-
-#elif defined(HAVE_SINF) && defined(HAVE_COSF)
-
-void sincosf(float x, float* sinx, float* cosx)
-{
-    *sinx = ::sinf(x);
-    *cosx = ::cosf(x);
-}
-
-#else
-
-void sincosf(float x, float* sinx, float* cosx)
-{
-    *sinx = ::sin(x);
-    *cosx = ::cos(x);
-}
-
-#endif
-
-} /* namespace gr */
diff --git a/gr-blocks/include/gnuradio/blocks/control_loop.h b/gr-blocks/include/gnuradio/blocks/control_loop.h
index 7aa43b531c..64d5ade620 100644
--- a/gr-blocks/include/gnuradio/blocks/control_loop.h
+++ b/gr-blocks/include/gnuradio/blocks/control_loop.h
@@ -12,6 +12,7 @@
 #define GR_BLOCKS_CONTROL_LOOP
 
 #include <gnuradio/blocks/api.h>
+#include <gnuradio/math.h>
 
 namespace gr {
 namespace blocks {
@@ -72,7 +73,11 @@ public:
     /*! \brief Advance the control loop based on the current gain
      *  settings and the inputted error signal.
      */
-    void advance_loop(float error);
+    void advance_loop(float error)
+    {
+        d_freq = d_freq + d_beta * error;
+        d_phase = d_phase + d_freq + d_alpha * error;
+    }
 
     /*! \brief Keep the phase between -2pi and 2pi.
      *
@@ -87,7 +92,13 @@ public:
      * method in case another way is desired as this is fairly
      * heavy-handed.
      */
-    void phase_wrap();
+    void phase_wrap()
+    {
+        while (d_phase > GR_M_TWOPI)
+            d_phase -= GR_M_TWOPI;
+        while (d_phase < -GR_M_TWOPI)
+            d_phase += GR_M_TWOPI;
+    }
 
     /*! \brief Keep the frequency between d_min_freq and d_max_freq.
      *
@@ -102,7 +113,14 @@ public:
      * method in case another way is desired as this is fairly
      * heavy-handed.
      */
-    void frequency_limit();
+    void frequency_limit()
+    {
+        if (d_freq > d_max_freq)
+            d_freq = d_max_freq;
+        else if (d_freq < d_min_freq)
+            d_freq = d_min_freq;
+    }
+
 
     /*******************************************************************
      * SET FUNCTIONS
diff --git a/gr-blocks/lib/control_loop.cc b/gr-blocks/lib/control_loop.cc
index 099bc20fb9..f98d0c795b 100644
--- a/gr-blocks/lib/control_loop.cc
+++ b/gr-blocks/lib/control_loop.cc
@@ -40,28 +40,6 @@ void control_loop::update_gains()
     d_beta = (4 * d_loop_bw * d_loop_bw) / denom;
 }
 
-void control_loop::advance_loop(float error)
-{
-    d_freq = d_freq + d_beta * error;
-    d_phase = d_phase + d_freq + d_alpha * error;
-}
-
-void control_loop::phase_wrap()
-{
-    while (d_phase > M_TWOPI)
-        d_phase -= M_TWOPI;
-    while (d_phase < -M_TWOPI)
-        d_phase += M_TWOPI;
-}
-
-void control_loop::frequency_limit()
-{
-    if (d_freq > d_max_freq)
-        d_freq = d_max_freq;
-    else if (d_freq < d_min_freq)
-        d_freq = d_min_freq;
-}
-
 /*******************************************************************
  * SET FUNCTIONS
  *******************************************************************/
diff --git a/gr-digital/lib/costas_loop_cc_impl.cc b/gr-digital/lib/costas_loop_cc_impl.cc
index 930183c0c0..5f2cba275e 100644
--- a/gr-digital/lib/costas_loop_cc_impl.cc
+++ b/gr-digital/lib/costas_loop_cc_impl.cc
@@ -37,7 +37,8 @@ costas_loop_cc_impl::costas_loop_cc_impl(float loop_bw, unsigned int order, bool
       blocks::control_loop(loop_bw, 1.0, -1.0),
       d_error(0),
       d_noise(1.0),
-      d_phase_detector(choose_phase_detector(order, use_snr))
+      d_use_snr(use_snr),
+      d_order(order)
 {
     message_port_register_in(pmt::mp("noise"));
     set_msg_handler(pmt::mp("noise"),
@@ -46,96 +47,6 @@ costas_loop_cc_impl::costas_loop_cc_impl(float loop_bw, unsigned int order, bool
 
 costas_loop_cc_impl::~costas_loop_cc_impl() {}
 
-costas_loop_cc_impl::d_phase_detector_t
-costas_loop_cc_impl::choose_phase_detector(unsigned int order, bool use_snr)
-{
-    switch (order) {
-    case 2:
-        if (use_snr) {
-            return &costas_loop_cc_impl::phase_detector_snr_2;
-        }
-        return &costas_loop_cc_impl::phase_detector_2;
-
-    case 4:
-        if (use_snr) {
-            return &costas_loop_cc_impl::phase_detector_snr_4;
-        }
-        return &costas_loop_cc_impl::phase_detector_4;
-
-    case 8:
-        if (use_snr) {
-            return &costas_loop_cc_impl::phase_detector_snr_8;
-        }
-        return &costas_loop_cc_impl::phase_detector_8;
-    }
-    throw std::invalid_argument("order must be 2, 4, or 8");
-}
-
-float costas_loop_cc_impl::phase_detector_8(gr_complex sample) const
-{
-    /* This technique splits the 8PSK constellation into 2 squashed
-       QPSK constellations, one when I is larger than Q and one
-       where Q is larger than I. The error is then calculated
-       proportionally to these squashed constellations by the const
-       K = sqrt(2)-1.
-
-       The signal magnitude must be > 1 or K will incorrectly bias
-       the error value.
-
-       Ref: Z. Huang, Z. Yi, M. Zhang, K. Wang, "8PSK demodulation for
-       new generation DVB-S2", IEEE Proc. Int. Conf. Communications,
-       Circuits and Systems, Vol. 2, pp. 1447 - 1450, 2004.
-    */
-
-    const float K = (sqrtf(2.0) - 1);
-    if (fabsf(sample.real()) >= fabsf(sample.imag())) {
-        return ((sample.real() > 0 ? 1.0 : -1.0) * sample.imag() -
-                (sample.imag() > 0 ? 1.0 : -1.0) * sample.real() * K);
-    } else {
-        return ((sample.real() > 0 ? 1.0 : -1.0) * sample.imag() * K -
-                (sample.imag() > 0 ? 1.0 : -1.0) * sample.real());
-    }
-}
-
-float costas_loop_cc_impl::phase_detector_4(gr_complex sample) const
-{
-    return ((sample.real() > 0 ? 1.0 : -1.0) * sample.imag() -
-            (sample.imag() > 0 ? 1.0 : -1.0) * sample.real());
-}
-
-float costas_loop_cc_impl::phase_detector_2(gr_complex sample) const
-{
-    return (sample.real() * sample.imag());
-}
-
-float costas_loop_cc_impl::phase_detector_snr_8(gr_complex sample) const
-{
-    const float K = (sqrtf(2.0) - 1);
-    const float snr = std::norm(sample) / d_noise;
-    if (fabsf(sample.real()) >= fabsf(sample.imag())) {
-        return ((blocks::tanhf_lut(snr * sample.real()) * sample.imag()) -
-                (blocks::tanhf_lut(snr * sample.imag()) * sample.real() * K));
-    } else {
-        return ((blocks::tanhf_lut(snr * sample.real()) * sample.imag() * K) -
-                (blocks::tanhf_lut(snr * sample.imag()) * sample.real()));
-    }
-}
-
-float costas_loop_cc_impl::phase_detector_snr_4(gr_complex sample) const
-{
-    const float snr = std::norm(sample) / d_noise;
-    return ((blocks::tanhf_lut(snr * sample.real()) * sample.imag()) -
-            (blocks::tanhf_lut(snr * sample.imag()) * sample.real()));
-}
-
-float costas_loop_cc_impl::phase_detector_snr_2(gr_complex sample) const
-{
-    const float snr = std::norm(sample) / d_noise;
-    return blocks::tanhf_lut(snr * sample.real()) * sample.imag();
-}
-
-float costas_loop_cc_impl::error() const { return d_error; }
-
 void costas_loop_cc_impl::handle_set_noise(pmt::pmt_t msg)
 {
     if (pmt::is_real(msg)) {
@@ -154,8 +65,6 @@ int costas_loop_cc_impl::work(int noutput_items,
     float* phase_optr = output_items.size() >= 3 ? (float*)output_items[2] : NULL;
     float* error_optr = output_items.size() >= 4 ? (float*)output_items[3] : NULL;
 
-    gr_complex nco_out;
-
     std::vector<tag_t> tags;
     get_tags_in_range(tags,
                       0,
@@ -163,6 +72,15 @@ int costas_loop_cc_impl::work(int noutput_items,
                       nitems_read(0) + noutput_items,
                       pmt::intern("phase_est"));
 
+    // Get this out of the for loop if not used:
+    bool has_additional_outputs = false;
+    if (freq_optr)
+        has_additional_outputs = true;
+    else if (phase_optr)
+        has_additional_outputs = true;
+    else if (error_optr)
+        has_additional_outputs = true;
+
     for (int i = 0; i < noutput_items; i++) {
         if (!tags.empty()) {
             if (tags[0].offset - nitems_read(0) == (size_t)i) {
@@ -171,22 +89,46 @@ int costas_loop_cc_impl::work(int noutput_items,
             }
         }
 
-        nco_out = gr_expj(-d_phase);
-        optr[i] = iptr[i] * nco_out;
-
-        d_error = (*this.*d_phase_detector)(optr[i]);
+        const gr_complex nco_out = gr_expj(-d_phase);
+
+        gr::fast_cc_multiply(optr[i], iptr[i], nco_out);
+
+        // EXPENSIVE LINE with function pointer, switch was about 20% faster in testing.
+        // Left in for logic justification/reference. d_error = phase_detector_2(optr[i]);
+        switch (d_order) {
+        case 2:
+            if (d_use_snr)
+                d_error = phase_detector_snr_2(optr[i]);
+            else
+                d_error = phase_detector_2(optr[i]);
+            break;
+        case 4:
+            if (d_use_snr)
+                d_error = phase_detector_snr_4(optr[i]);
+            else
+                d_error = phase_detector_4(optr[i]);
+            break;
+        case 8:
+            if (d_use_snr)
+                d_error = phase_detector_snr_8(optr[i]);
+            else
+                d_error = phase_detector_8(optr[i]);
+            break;
+        }
         d_error = gr::branchless_clip(d_error, 1.0);
 
         advance_loop(d_error);
         phase_wrap();
         frequency_limit();
 
-        if (freq_optr != NULL)
-            freq_optr[i] = d_freq;
-        if (phase_optr != NULL)
-            phase_optr[i] = d_phase;
-        if (error_optr != NULL)
-            error_optr[i] = d_error;
+        if (has_additional_outputs) {
+            if (freq_optr)
+                freq_optr[i] = d_freq;
+            if (phase_optr)
+                phase_optr[i] = d_phase;
+            if (error_optr)
+                error_optr[i] = d_error;
+        }
     }
 
     return noutput_items;
diff --git a/gr-digital/lib/costas_loop_cc_impl.h b/gr-digital/lib/costas_loop_cc_impl.h
index 7d90f9fc12..fa33bbec8a 100644
--- a/gr-digital/lib/costas_loop_cc_impl.h
+++ b/gr-digital/lib/costas_loop_cc_impl.h
@@ -22,28 +22,60 @@ class costas_loop_cc_impl : public costas_loop_cc
 private:
     float d_error;
     float d_noise;
+    bool d_use_snr;
+    int d_order;
 
     /*! \brief the phase detector circuit for 8th-order PSK loops.
      *
      *  \param sample complex sample
      *  \return the phase error
      */
-    float phase_detector_8(gr_complex sample) const; // for 8PSK
+    float phase_detector_8(gr_complex sample) const // for 8PSK
+    {
+        /* This technique splits the 8PSK constellation into 2 squashed
+   QPSK constellations, one when I is larger than Q and one
+   where Q is larger than I. The error is then calculated
+   proportionally to these squashed constellations by the const
+   K = sqrt(2)-1.
+
+   The signal magnitude must be > 1 or K will incorrectly bias
+   the error value.
+
+   Ref: Z. Huang, Z. Yi, M. Zhang, K. Wang, "8PSK demodulation for
+   new generation DVB-S2", IEEE Proc. Int. Conf. Communications,
+   Circuits and Systems, Vol. 2, pp. 1447 - 1450, 2004.
+         */
+
+        const float K = (sqrtf(2.0) - 1);
+        if (fabsf(sample.real()) >= fabsf(sample.imag())) {
+            return ((sample.real() > 0.0f ? 1.0f : -1.0f) * sample.imag() -
+                    (sample.imag() > 0.0f ? 1.0f : -1.0f) * sample.real() * K);
+        } else {
+            return ((sample.real() > 0.0f ? 1.0f : -1.0f) * sample.imag() * K -
+                    (sample.imag() > 0.0f ? 1.0f : -1.0f) * sample.real());
+        }
+    };
 
     /*! \brief the phase detector circuit for fourth-order loops.
      *
      *  \param sample complex sample
      *  \return the phase error
      */
-    float phase_detector_4(gr_complex sample) const; // for QPSK
+    float phase_detector_4(gr_complex sample) const // for QPSK
+    {
+        return ((sample.real() > 0.0f ? 1.0f : -1.0f) * sample.imag() -
+                (sample.imag() > 0.0f ? 1.0f : -1.0f) * sample.real());
+    };
 
     /*! \brief the phase detector circuit for second-order loops.
      *
      *  \param sample a complex sample
      *  \return the phase error
      */
-    float phase_detector_2(gr_complex sample) const; // for BPSK
-
+    float phase_detector_2(gr_complex sample) const // for BPSK
+    {
+        return (sample.real() * sample.imag());
+    }
 
     /*! \brief the phase detector circuit for 8th-order PSK
      *  loops. Uses tanh instead of slicing and the noise estimate
@@ -52,7 +84,18 @@ private:
      *  \param sample complex sample
      *  \return the phase error
      */
-    float phase_detector_snr_8(gr_complex sample) const; // for 8PSK
+    float phase_detector_snr_8(gr_complex sample) const // for 8PSK
+    {
+        const float K = (sqrtf(2.0) - 1.0);
+        const float snr = std::norm(sample) / d_noise;
+        if (fabsf(sample.real()) >= fabsf(sample.imag())) {
+            return ((blocks::tanhf_lut(snr * sample.real()) * sample.imag()) -
+                    (blocks::tanhf_lut(snr * sample.imag()) * sample.real() * K));
+        } else {
+            return ((blocks::tanhf_lut(snr * sample.real()) * sample.imag() * K) -
+                    (blocks::tanhf_lut(snr * sample.imag()) * sample.real()));
+        }
+    };
 
     /*! \brief the phase detector circuit for fourth-order
      *  loops. Uses tanh instead of slicing and the noise estimate
@@ -61,7 +104,12 @@ private:
      *  \param sample complex sample
      *  \return the phase error
      */
-    float phase_detector_snr_4(gr_complex sample) const; // for QPSK
+    float phase_detector_snr_4(gr_complex sample) const // for QPSK
+    {
+        const float snr = std::norm(sample) / d_noise;
+        return ((blocks::tanhf_lut(snr * sample.real()) * sample.imag()) -
+                (blocks::tanhf_lut(snr * sample.imag()) * sample.real()));
+    };
 
     /*! \brief the phase detector circuit for second-order
      *  loops. Uses tanh instead of slicing and the noise estimate
@@ -70,17 +118,17 @@ private:
      *  \param sample a complex sample
      *  \return the phase error
      */
-    float phase_detector_snr_2(gr_complex sample) const; // for BPSK
-
-    typedef float (costas_loop_cc_impl::*d_phase_detector_t)(gr_complex sample) const;
-    static d_phase_detector_t choose_phase_detector(unsigned int order, bool use_snr);
-    const d_phase_detector_t d_phase_detector;
+    float phase_detector_snr_2(gr_complex sample) const // for BPSK
+    {
+        const float snr = std::norm(sample) / d_noise;
+        return blocks::tanhf_lut(snr * sample.real()) * sample.imag();
+    };
 
 public:
     costas_loop_cc_impl(float loop_bw, unsigned int order, bool use_snr = false);
     ~costas_loop_cc_impl();
 
-    float error() const;
+    float error() const { return d_error; };
 
     void handle_set_noise(pmt::pmt_t msg);
author	ghostop14 <ghostop14@gmail.com>	2020-01-29 17:20:16 -0500
committer	Michael Dickens <michael.dickens@ettus.com>	2020-02-14 10:05:12 -0500
commit	aa0bd44efbf8afdfd93d627e486c1427426b76f9 (patch)
tree	dd2acb4ebed83dc5a553678ba46b76424690b90a
parent	36680f338e0b7ae7e5bcf7d2a860527ca7b14dfe (diff)