GNU Radio 3.5.3.2 C++ API
volk_32fc_32f_multiply_32fc_a.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32fc_32f_multiply_32fc_a_H
00002 #define INCLUDED_volk_32fc_32f_multiply_32fc_a_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 
00007 #ifdef LV_HAVE_SSE
00008 #include <xmmintrin.h>
00009   /*!
00010     \brief Multiplies the input complex vector with the input float vector and store their results in the third vector
00011     \param cVector The vector where the results will be stored
00012     \param aVector The complex vector to be multiplied
00013     \param bVector The vectors containing the float values to be multiplied against each complex value in aVector
00014     \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00015   */
00016 static inline void volk_32fc_32f_multiply_32fc_a_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
00017     unsigned int number = 0;
00018     const unsigned int quarterPoints = num_points / 4;
00019 
00020     lv_32fc_t* cPtr = cVector;
00021     const lv_32fc_t* aPtr = aVector;
00022     const float* bPtr=  bVector;
00023 
00024     __m128 aVal1, aVal2, bVal, bVal1, bVal2, cVal;
00025     for(;number < quarterPoints; number++){
00026       
00027       aVal1 = _mm_load_ps((const float*)aPtr);
00028       aPtr += 2;
00029  
00030       aVal2 = _mm_load_ps((const float*)aPtr); 
00031       aPtr += 2;
00032 
00033       bVal = _mm_load_ps(bPtr);
00034       bPtr += 4;
00035 
00036       bVal1 = _mm_shuffle_ps(bVal, bVal, _MM_SHUFFLE(1,1,0,0));
00037       bVal2 = _mm_shuffle_ps(bVal, bVal, _MM_SHUFFLE(3,3,2,2));
00038 
00039       cVal = _mm_mul_ps(aVal1, bVal1); 
00040       
00041       _mm_store_ps((float*)cPtr,cVal); // Store the results back into the C container
00042       cPtr += 2;
00043 
00044       cVal = _mm_mul_ps(aVal2, bVal2); 
00045       
00046       _mm_store_ps((float*)cPtr,cVal); // Store the results back into the C container
00047 
00048       cPtr += 2;
00049     }
00050 
00051     number = quarterPoints * 4;
00052     for(;number < num_points; number++){
00053       *cPtr++ = (*aPtr++) * (*bPtr);
00054       bPtr++;
00055     }
00056 }
00057 #endif /* LV_HAVE_SSE */
00058 
00059 #ifdef LV_HAVE_GENERIC
00060   /*!
00061     \brief Multiplies the input complex vector with the input lv_32fc_t vector and store their results in the third vector
00062     \param cVector The vector where the results will be stored
00063     \param aVector The complex vector to be multiplied
00064     \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector
00065     \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00066   */
00067 static inline void volk_32fc_32f_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
00068   lv_32fc_t* cPtr = cVector;
00069   const lv_32fc_t* aPtr = aVector;
00070   const float* bPtr=  bVector;
00071   unsigned int number = 0;
00072   
00073   for(number = 0; number < num_points; number++){
00074     *cPtr++ = (*aPtr++) * (*bPtr++);
00075   }
00076 }
00077 #endif /* LV_HAVE_GENERIC */
00078 
00079 #ifdef LV_HAVE_ORC
00080   /*!
00081     \brief Multiplies the input complex vector with the input lv_32fc_t vector and store their results in the third vector
00082     \param cVector The vector where the results will be stored
00083     \param aVector The complex vector to be multiplied
00084     \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector
00085     \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00086   */
00087 extern void volk_32fc_32f_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points);
00088 static inline void volk_32fc_32f_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
00089     volk_32fc_32f_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points);
00090 }
00091 #endif /* LV_HAVE_GENERIC */
00092 
00093 
00094 
00095 #endif /* INCLUDED_volk_32fc_32f_multiply_32fc_a_H */