doc/doxygen-3.6/volk__32f__s32f__32f__fm__detect__32f__a_8h_source.html

00001 #ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
00002 #define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
00003
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006
00007 #ifdef LV_HAVE_SSE
00008 #include <xmmintrin.h>
00009 /*!
00010   \brief performs the FM-detect differentiation on the input vector and stores the results in the output vector.
00011   \param outputVector The byte-aligned vector where the results will be stored.
00012   \param inputVector The byte-aligned input vector containing phase data (must be on the interval (-bound,bound] )
00013   \param bound The interval that the input phase data is in, which is used to modulo the differentiation
00014   \param saveValue A pointer to a float which contains the phase value of the sample before the first input sample.
00015   \param num_noints The number of real values in the input vector.
00016 */
00017 static inline void volk_32f_s32f_32f_fm_detect_32f_a_sse(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){
00018   if (num_points < 1) {
00019     return;
00020   }
00021   unsigned int number = 1;
00022   unsigned int j = 0;
00023   // num_points-1 keeps Fedora 7's gcc from crashing...
00024   // num_points won't work.  :(
00025   const unsigned int quarterPoints = (num_points-1) / 4;
00026
00027   float* outPtr = outputVector;
00028   const float* inPtr = inputVector;
00029   __m128 upperBound = _mm_set_ps1(bound);
00030   __m128 lowerBound = _mm_set_ps1(-bound);
00031   __m128 next3old1;
00032   __m128 next4;
00033   __m128 boundAdjust;
00034   __m128 posBoundAdjust = _mm_set_ps1(-2*bound); // Subtract when we're above.
00035   __m128 negBoundAdjust = _mm_set_ps1(2*bound); // Add when we're below.
00036   // Do the first 4 by hand since we're going in from the saveValue:
00037   *outPtr = *inPtr - *saveValue;
00038   if (*outPtr >  bound) *outPtr -= 2*bound;
00039   if (*outPtr < -bound) *outPtr += 2*bound;
00040   inPtr++;
00041   outPtr++;
00042   for (j = 1; j < ( (4 < num_points) ? 4 : num_points); j++) {
00043     *outPtr = *(inPtr) - *(inPtr-1);
00044     if (*outPtr >  bound) *outPtr -= 2*bound;
00045     if (*outPtr < -bound) *outPtr += 2*bound;
00046     inPtr++;
00047     outPtr++;
00048   }
00049
00050   for (; number < quarterPoints; number++) {
00051     // Load data
00052     next3old1 = _mm_loadu_ps((float*) (inPtr-1));
00053     next4 = _mm_load_ps(inPtr);
00054     inPtr += 4;
00055     // Subtract and store:
00056     next3old1 = _mm_sub_ps(next4, next3old1);
00057     // Bound:
00058     boundAdjust = _mm_cmpgt_ps(next3old1, upperBound);
00059     boundAdjust = _mm_and_ps(boundAdjust, posBoundAdjust);
00060     next4 = _mm_cmplt_ps(next3old1, lowerBound);
00061     next4 = _mm_and_ps(next4, negBoundAdjust);
00062     boundAdjust = _mm_or_ps(next4, boundAdjust);
00063     // Make sure we're in the bounding interval:
00064     next3old1 = _mm_add_ps(next3old1, boundAdjust);
00065     _mm_store_ps(outPtr,next3old1); // Store the results back into the output
00066     outPtr += 4;
00067   }
00068
00069   for (number = (4 > (quarterPoints*4) ? 4 : (4 * quarterPoints)); number < num_points; number++) {
00070     *outPtr = *(inPtr) - *(inPtr-1);
00071     if (*outPtr >  bound) *outPtr -= 2*bound;
00072     if (*outPtr < -bound) *outPtr += 2*bound;
00073     inPtr++;
00074     outPtr++;
00075   }
00076
00077   *saveValue = inputVector[num_points-1];
00078 }
00079 #endif /* LV_HAVE_SSE */
00080
00081 #ifdef LV_HAVE_GENERIC
00082 /*!
00083   \brief performs the FM-detect differentiation on the input vector and stores the results in the output vector.
00084   \param outputVector The byte-aligned vector where the results will be stored.
00085   \param inputVector The byte-aligned input vector containing phase data (must be on the interval (-bound,bound] )
00086   \param bound The interval that the input phase data is in, which is used to modulo the differentiation
00087   \param saveValue A pointer to a float which contains the phase value of the sample before the first input sample.
00088   \param num_points The number of real values in the input vector.
00089 */
00090 static inline void volk_32f_s32f_32f_fm_detect_32f_a_generic(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){
00091   if (num_points < 1) {
00092     return;
00093   }
00094   unsigned int number = 0;
00095   float* outPtr = outputVector;
00096   const float* inPtr = inputVector;
00097
00098   // Do the first 1 by hand since we're going in from the saveValue:
00099   *outPtr = *inPtr - *saveValue;
00100   if (*outPtr >  bound) *outPtr -= 2*bound;
00101   if (*outPtr < -bound) *outPtr += 2*bound;
00102   inPtr++;
00103   outPtr++;
00104
00105   for (number = 1; number < num_points; number++) {
00106     *outPtr = *(inPtr) - *(inPtr-1);
00107     if (*outPtr >  bound) *outPtr -= 2*bound;
00108     if (*outPtr < -bound) *outPtr += 2*bound;
00109     inPtr++;
00110     outPtr++;
00111   }
00112
00113   *saveValue = inputVector[num_points-1];
00114 }
00115 #endif /* LV_HAVE_GENERIC */
00116
00117
00118
00119
00120 #endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H */