doc/doxygen-3.5/volk__32f__s32f__multiply__32f__u_8h_source.html

00001 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H
00002 #define INCLUDED_volk_32f_s32f_multiply_32f_u_H
00003
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006
00007 #ifdef LV_HAVE_SSE
00008 #include <xmmintrin.h>
00009 /*!
00010   \brief Scalar float multiply
00011   \param cVector The vector where the results will be stored
00012   \param aVector One of the vectors to be multiplied
00013   \param scalar the scalar value
00014   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00015 */
00016 static inline void volk_32f_s32f_multiply_32f_u_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00017     unsigned int number = 0;
00018     const unsigned int quarterPoints = num_points / 4;
00019
00020     float* cPtr = cVector;
00021     const float* aPtr = aVector;
00022
00023     __m128 aVal, bVal, cVal;
00024     bVal = _mm_set_ps1(scalar);
00025     for(;number < quarterPoints; number++){
00026
00027       aVal = _mm_loadu_ps(aPtr);
00028
00029       cVal = _mm_mul_ps(aVal, bVal);
00030
00031       _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
00032
00033       aPtr += 4;
00034       cPtr += 4;
00035     }
00036
00037     number = quarterPoints * 4;
00038     for(;number < num_points; number++){
00039       *cPtr++ = (*aPtr++) * scalar;
00040     }
00041 }
00042 #endif /* LV_HAVE_SSE */
00043
00044 #ifdef LV_HAVE_AVX
00045 #include <immintrin.h>
00046 /*!
00047   \brief Scalar float multiply
00048   \param cVector The vector where the results will be stored
00049   \param aVector One of the vectors to be multiplied
00050   \param scalar the scalar value
00051   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00052 */
00053 static inline void volk_32f_s32f_multiply_32f_u_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00054     unsigned int number = 0;
00055     const unsigned int eighthPoints = num_points / 8;
00056
00057     float* cPtr = cVector;
00058     const float* aPtr = aVector;
00059
00060     __m256 aVal, bVal, cVal;
00061     bVal = _mm256_set1_ps(scalar);
00062     for(;number < eighthPoints; number++){
00063
00064       aVal = _mm256_loadu_ps(aPtr);
00065
00066       cVal = _mm256_mul_ps(aVal, bVal);
00067
00068       _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
00069
00070       aPtr += 8;
00071       cPtr += 8;
00072     }
00073
00074     number = eighthPoints * 8;
00075     for(;number < num_points; number++){
00076       *cPtr++ = (*aPtr++) * scalar;
00077     }
00078 }
00079 #endif /* LV_HAVE_AVX */
00080
00081 #ifdef LV_HAVE_GENERIC
00082 /*!
00083   \brief Scalar float multiply
00084   \param cVector The vector where the results will be stored
00085   \param aVector One of the vectors to be multiplied
00086   \param scalar the scalar value
00087   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00088 */
00089 static inline void volk_32f_s32f_multiply_32f_u_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00090   unsigned int number = 0;
00091   const float* inputPtr = aVector;
00092   float* outputPtr = cVector;
00093   for(number = 0; number < num_points; number++){
00094     *outputPtr = (*inputPtr) * scalar;
00095     inputPtr++;
00096     outputPtr++;
00097   }
00098 }
00099 #endif /* LV_HAVE_GENERIC */
00100
00101
00102 #endif /* INCLUDED_volk_32f_s32f_multiply_32f_u_H */