doc/doxygen-3.6/volk__32fc__x2__multiply__conjugate__32fc__a_8h_source.html

00001 #ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H
00002 #define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H
00003
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 #include <volk/volk_complex.h>
00007 #include <float.h>
00008
00009 #ifdef LV_HAVE_SSE3
00010 #include <pmmintrin.h>
00011   /*!
00012     \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector
00013     \param cVector The vector where the results will be stored
00014     \param aVector First vector to be multiplied
00015     \param bVector Second vector that is conjugated before being multiplied
00016     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00017   */
00018 static inline void volk_32fc_x2_multiply_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
00019   unsigned int number = 0;
00020     const unsigned int halfPoints = num_points / 2;
00021
00022     __m128 x, y, yl, yh, z, tmp1, tmp2;
00023     lv_32fc_t* c = cVector;
00024     const lv_32fc_t* a = aVector;
00025     const lv_32fc_t* b = bVector;
00026
00027     __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
00028
00029     for(;number < halfPoints; number++){
00030
00031       x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
00032       y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
00033
00034       y = _mm_xor_ps(y, conjugator); // conjugate y
00035
00036       yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
00037       yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
00038
00039       tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
00040
00041       x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
00042
00043       tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
00044
00045       z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
00046
00047       _mm_store_ps((float*)c,z); // Store the results back into the C container
00048
00049       a += 2;
00050       b += 2;
00051       c += 2;
00052     }
00053
00054     if((num_points % 2) != 0) {
00055       *c = (*a) * lv_conj(*b);
00056     }
00057 }
00058 #endif /* LV_HAVE_SSE */
00059
00060 #ifdef LV_HAVE_GENERIC
00061   /*!
00062     \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector
00063     \param cVector The vector where the results will be stored
00064     \param aVector First vector to be multiplied
00065     \param bVector Second vector that is conjugated before being multiplied
00066     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00067   */
00068 static inline void volk_32fc_x2_multiply_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
00069     lv_32fc_t* cPtr = cVector;
00070     const lv_32fc_t* aPtr = aVector;
00071     const lv_32fc_t* bPtr=  bVector;
00072     unsigned int number = 0;
00073
00074     for(number = 0; number < num_points; number++){
00075       *cPtr++ = (*aPtr++) * lv_conj(*bPtr++);
00076     }
00077 }
00078 #endif /* LV_HAVE_GENERIC */
00079
00080
00081 #endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H */