doc/doxygen-3.6/volk__32f__x2__multiply__32f__u_8h_source.html

00001 #ifndef INCLUDED_volk_32f_x2_multiply_32f_u_H
00002 #define INCLUDED_volk_32f_x2_multiply_32f_u_H
00003
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006
00007 #ifdef LV_HAVE_SSE
00008 #include <xmmintrin.h>
00009 /*!
00010   \brief Multiplys the two input vectors and store their results in the third vector
00011   \param cVector The vector where the results will be stored
00012   \param aVector One of the vectors to be multiplied
00013   \param bVector One of the vectors to be multiplied
00014   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00015 */
00016 static inline void volk_32f_x2_multiply_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
00017     unsigned int number = 0;
00018     const unsigned int quarterPoints = num_points / 4;
00019
00020     float* cPtr = cVector;
00021     const float* aPtr = aVector;
00022     const float* bPtr=  bVector;
00023
00024     __m128 aVal, bVal, cVal;
00025     for(;number < quarterPoints; number++){
00026
00027       aVal = _mm_loadu_ps(aPtr);
00028       bVal = _mm_loadu_ps(bPtr);
00029
00030       cVal = _mm_mul_ps(aVal, bVal);
00031
00032       _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
00033
00034       aPtr += 4;
00035       bPtr += 4;
00036       cPtr += 4;
00037     }
00038
00039     number = quarterPoints * 4;
00040     for(;number < num_points; number++){
00041       *cPtr++ = (*aPtr++) * (*bPtr++);
00042     }
00043 }
00044 #endif /* LV_HAVE_SSE */
00045
00046 #ifdef LV_HAVE_AVX
00047 #include <immintrin.h>
00048 /*!
00049   \brief Multiplies the two input vectors and store their results in the third vector
00050   \param cVector The vector where the results will be stored
00051   \param aVector One of the vectors to be multiplied
00052   \param bVector One of the vectors to be multiplied
00053   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00054 */
00055 static inline void volk_32f_x2_multiply_32f_u_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
00056     unsigned int number = 0;
00057     const unsigned int eighthPoints = num_points / 8;
00058
00059     float* cPtr = cVector;
00060     const float* aPtr = aVector;
00061     const float* bPtr=  bVector;
00062
00063     __m256 aVal, bVal, cVal;
00064     for(;number < eighthPoints; number++){
00065
00066       aVal = _mm256_loadu_ps(aPtr);
00067       bVal = _mm256_loadu_ps(bPtr);
00068
00069       cVal = _mm256_mul_ps(aVal, bVal);
00070
00071       _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
00072
00073       aPtr += 8;
00074       bPtr += 8;
00075       cPtr += 8;
00076     }
00077
00078     number = eighthPoints * 8;
00079     for(;number < num_points; number++){
00080       *cPtr++ = (*aPtr++) * (*bPtr++);
00081     }
00082 }
00083 #endif /* LV_HAVE_AVX */
00084
00085 #ifdef LV_HAVE_GENERIC
00086 /*!
00087   \brief Multiplys the two input vectors and store their results in the third vector
00088   \param cVector The vector where the results will be stored
00089   \param aVector One of the vectors to be multiplied
00090   \param bVector One of the vectors to be multiplied
00091   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00092 */
00093 static inline void volk_32f_x2_multiply_32f_u_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
00094     float* cPtr = cVector;
00095     const float* aPtr = aVector;
00096     const float* bPtr=  bVector;
00097     unsigned int number = 0;
00098
00099     for(number = 0; number < num_points; number++){
00100       *cPtr++ = (*aPtr++) * (*bPtr++);
00101     }
00102 }
00103 #endif /* LV_HAVE_GENERIC */
00104
00105
00106 #endif /* INCLUDED_volk_32f_x2_multiply_32f_u_H */