doc/doxygen-3.6/volk__32fc__32f__multiply__32fc__a_8h_source.html

00001 #ifndef INCLUDED_volk_32fc_32f_multiply_32fc_a_H
00002 #define INCLUDED_volk_32fc_32f_multiply_32fc_a_H
00003
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006
00007 #ifdef LV_HAVE_SSE
00008 #include <xmmintrin.h>
00009   /*!
00010     \brief Multiplies the input complex vector with the input float vector and store their results in the third vector
00011     \param cVector The vector where the results will be stored
00012     \param aVector The complex vector to be multiplied
00013     \param bVector The vectors containing the float values to be multiplied against each complex value in aVector
00014     \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00015   */
00016 static inline void volk_32fc_32f_multiply_32fc_a_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
00017     unsigned int number = 0;
00018     const unsigned int quarterPoints = num_points / 4;
00019
00020     lv_32fc_t* cPtr = cVector;
00021     const lv_32fc_t* aPtr = aVector;
00022     const float* bPtr=  bVector;
00023
00024     __m128 aVal1, aVal2, bVal, bVal1, bVal2, cVal;
00025     for(;number < quarterPoints; number++){
00026
00027       aVal1 = _mm_load_ps((const float*)aPtr);
00028       aPtr += 2;
00029
00030       aVal2 = _mm_load_ps((const float*)aPtr);
00031       aPtr += 2;
00032
00033       bVal = _mm_load_ps(bPtr);
00034       bPtr += 4;
00035
00036       bVal1 = _mm_shuffle_ps(bVal, bVal, _MM_SHUFFLE(1,1,0,0));
00037       bVal2 = _mm_shuffle_ps(bVal, bVal, _MM_SHUFFLE(3,3,2,2));
00038
00039       cVal = _mm_mul_ps(aVal1, bVal1);
00040
00041       _mm_store_ps((float*)cPtr,cVal); // Store the results back into the C container
00042       cPtr += 2;
00043
00044       cVal = _mm_mul_ps(aVal2, bVal2);
00045
00046       _mm_store_ps((float*)cPtr,cVal); // Store the results back into the C container
00047
00048       cPtr += 2;
00049     }
00050
00051     number = quarterPoints * 4;
00052     for(;number < num_points; number++){
00053       *cPtr++ = (*aPtr++) * (*bPtr);
00054       bPtr++;
00055     }
00056 }
00057 #endif /* LV_HAVE_SSE */
00058
00059 #ifdef LV_HAVE_GENERIC
00060   /*!
00061     \brief Multiplies the input complex vector with the input lv_32fc_t vector and store their results in the third vector
00062     \param cVector The vector where the results will be stored
00063     \param aVector The complex vector to be multiplied
00064     \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector
00065     \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00066   */
00067 static inline void volk_32fc_32f_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
00068   lv_32fc_t* cPtr = cVector;
00069   const lv_32fc_t* aPtr = aVector;
00070   const float* bPtr=  bVector;
00071   unsigned int number = 0;
00072
00073   for(number = 0; number < num_points; number++){
00074     *cPtr++ = (*aPtr++) * (*bPtr++);
00075   }
00076 }
00077 #endif /* LV_HAVE_GENERIC */
00078
00079 #ifdef LV_HAVE_ORC
00080   /*!
00081     \brief Multiplies the input complex vector with the input lv_32fc_t vector and store their results in the third vector
00082     \param cVector The vector where the results will be stored
00083     \param aVector The complex vector to be multiplied
00084     \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector
00085     \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00086   */
00087 extern void volk_32fc_32f_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points);
00088 static inline void volk_32fc_32f_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
00089     volk_32fc_32f_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points);
00090 }
00091 #endif /* LV_HAVE_GENERIC */
00092
00093
00094
00095 #endif /* INCLUDED_volk_32fc_32f_multiply_32fc_a_H */