GNU Radio 3.5.3.2 C++ API
volk_32fc_x2_multiply_conjugate_32fc_a.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H
00002 #define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 #include <volk/volk_complex.h>
00007 #include <float.h>
00008 
00009 #ifdef LV_HAVE_SSE3
00010 #include <pmmintrin.h>
00011   /*!
00012     \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector
00013     \param cVector The vector where the results will be stored
00014     \param aVector First vector to be multiplied
00015     \param bVector Second vector that is conjugated before being multiplied
00016     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00017   */
00018 static inline void volk_32fc_x2_multiply_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
00019   unsigned int number = 0;
00020     const unsigned int halfPoints = num_points / 2;
00021 
00022     __m128 x, y, yl, yh, z, tmp1, tmp2;
00023     lv_32fc_t* c = cVector;
00024     const lv_32fc_t* a = aVector;
00025     const lv_32fc_t* b = bVector;
00026 
00027     __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
00028 
00029     for(;number < halfPoints; number++){
00030       
00031       x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
00032       y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
00033 
00034       y = _mm_xor_ps(y, conjugator); // conjugate y
00035       
00036       yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
00037       yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
00038       
00039       tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
00040       
00041       x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
00042       
00043       tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
00044       
00045       z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
00046     
00047       _mm_store_ps((float*)c,z); // Store the results back into the C container
00048 
00049       a += 2;
00050       b += 2;
00051       c += 2;
00052     }
00053 
00054     if((num_points % 2) != 0) {
00055       *c = (*a) * lv_conj(*b);
00056     }
00057 }
00058 #endif /* LV_HAVE_SSE */
00059 
00060 #ifdef LV_HAVE_GENERIC
00061   /*!
00062     \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector
00063     \param cVector The vector where the results will be stored
00064     \param aVector First vector to be multiplied
00065     \param bVector Second vector that is conjugated before being multiplied
00066     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00067   */
00068 static inline void volk_32fc_x2_multiply_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
00069     lv_32fc_t* cPtr = cVector;
00070     const lv_32fc_t* aPtr = aVector;
00071     const lv_32fc_t* bPtr=  bVector;
00072     unsigned int number = 0;
00073 
00074     for(number = 0; number < num_points; number++){
00075       *cPtr++ = (*aPtr++) * lv_conj(*bPtr++);
00076     }
00077 }
00078 #endif /* LV_HAVE_GENERIC */
00079 
00080 
00081 #endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H */