doc/doxygen-3.6/volk__32fc__x2__multiply__32fc__a_8h_source.html

00001 #ifndef INCLUDED_volk_32fc_x2_multiply_32fc_a_H
00002 #define INCLUDED_volk_32fc_x2_multiply_32fc_a_H
00003
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 #include <volk/volk_complex.h>
00007 #include <float.h>
00008
00009 #ifdef LV_HAVE_SSE3
00010 #include <pmmintrin.h>
00011   /*!
00012     \brief Multiplies the two input complex vectors and stores their results in the third vector
00013     \param cVector The vector where the results will be stored
00014     \param aVector One of the vectors to be multiplied
00015     \param bVector One of the vectors to be multiplied
00016     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00017   */
00018 static inline void volk_32fc_x2_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
00019   unsigned int number = 0;
00020     const unsigned int halfPoints = num_points / 2;
00021
00022     __m128 x, y, yl, yh, z, tmp1, tmp2;
00023     lv_32fc_t* c = cVector;
00024     const lv_32fc_t* a = aVector;
00025     const lv_32fc_t* b = bVector;
00026     for(;number < halfPoints; number++){
00027
00028       x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
00029       y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
00030
00031       yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
00032       yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
00033
00034       tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
00035
00036       x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
00037
00038       tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
00039
00040       z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
00041
00042       _mm_store_ps((float*)c,z); // Store the results back into the C container
00043
00044       a += 2;
00045       b += 2;
00046       c += 2;
00047     }
00048
00049     if((num_points % 2) != 0) {
00050       *c = (*a) * (*b);
00051     }
00052 }
00053 #endif /* LV_HAVE_SSE */
00054
00055 #ifdef LV_HAVE_GENERIC
00056   /*!
00057     \brief Multiplies the two input complex vectors and stores their results in the third vector
00058     \param cVector The vector where the results will be stored
00059     \param aVector One of the vectors to be multiplied
00060     \param bVector One of the vectors to be multiplied
00061     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00062   */
00063 static inline void volk_32fc_x2_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
00064     lv_32fc_t* cPtr = cVector;
00065     const lv_32fc_t* aPtr = aVector;
00066     const lv_32fc_t* bPtr=  bVector;
00067     unsigned int number = 0;
00068
00069     for(number = 0; number < num_points; number++){
00070       *cPtr++ = (*aPtr++) * (*bPtr++);
00071     }
00072 }
00073 #endif /* LV_HAVE_GENERIC */
00074
00075 #ifdef LV_HAVE_ORC
00076   /*!
00077     \brief Multiplies the two input complex vectors and stores their results in the third vector
00078     \param cVector The vector where the results will be stored
00079     \param aVector One of the vectors to be multiplied
00080     \param bVector One of the vectors to be multiplied
00081     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00082   */
00083 extern void volk_32fc_x2_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points);
00084 static inline void volk_32fc_x2_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
00085     volk_32fc_x2_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points);
00086 }
00087 #endif /* LV_HAVE_ORC */
00088
00089
00090
00091
00092
00093 #endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a_H */