GNU Radio 3.5.3.2 C++ API
volk_32fc_x2_multiply_32fc_a.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32fc_x2_multiply_32fc_a_H
00002 #define INCLUDED_volk_32fc_x2_multiply_32fc_a_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 #include <volk/volk_complex.h>
00007 #include <float.h>
00008 
00009 #ifdef LV_HAVE_SSE3
00010 #include <pmmintrin.h>
00011   /*!
00012     \brief Multiplies the two input complex vectors and stores their results in the third vector
00013     \param cVector The vector where the results will be stored
00014     \param aVector One of the vectors to be multiplied
00015     \param bVector One of the vectors to be multiplied
00016     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00017   */
00018 static inline void volk_32fc_x2_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
00019   unsigned int number = 0;
00020     const unsigned int halfPoints = num_points / 2;
00021 
00022     __m128 x, y, yl, yh, z, tmp1, tmp2;
00023     lv_32fc_t* c = cVector;
00024     const lv_32fc_t* a = aVector;
00025     const lv_32fc_t* b = bVector;
00026     for(;number < halfPoints; number++){
00027       
00028       x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
00029       y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
00030       
00031       yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
00032       yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
00033       
00034       tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
00035       
00036       x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
00037       
00038       tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
00039       
00040       z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
00041     
00042       _mm_store_ps((float*)c,z); // Store the results back into the C container
00043 
00044       a += 2;
00045       b += 2;
00046       c += 2;
00047     }
00048 
00049     if((num_points % 2) != 0) {
00050       *c = (*a) * (*b);
00051     }
00052 }
00053 #endif /* LV_HAVE_SSE */
00054 
00055 #ifdef LV_HAVE_GENERIC
00056   /*!
00057     \brief Multiplies the two input complex vectors and stores their results in the third vector
00058     \param cVector The vector where the results will be stored
00059     \param aVector One of the vectors to be multiplied
00060     \param bVector One of the vectors to be multiplied
00061     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00062   */
00063 static inline void volk_32fc_x2_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
00064     lv_32fc_t* cPtr = cVector;
00065     const lv_32fc_t* aPtr = aVector;
00066     const lv_32fc_t* bPtr=  bVector;
00067     unsigned int number = 0;
00068 
00069     for(number = 0; number < num_points; number++){
00070       *cPtr++ = (*aPtr++) * (*bPtr++);
00071     }
00072 }
00073 #endif /* LV_HAVE_GENERIC */
00074 
00075 #ifdef LV_HAVE_ORC
00076   /*!
00077     \brief Multiplies the two input complex vectors and stores their results in the third vector
00078     \param cVector The vector where the results will be stored
00079     \param aVector One of the vectors to be multiplied
00080     \param bVector One of the vectors to be multiplied
00081     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00082   */
00083 extern void volk_32fc_x2_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points);
00084 static inline void volk_32fc_x2_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
00085     volk_32fc_x2_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points);
00086 }
00087 #endif /* LV_HAVE_ORC */
00088 
00089 
00090 
00091 
00092 
00093 #endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a_H */