GNU Radio 3.5.3.2 C++ API
volk_32f_s32f_multiply_32f_u.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H
00002 #define INCLUDED_volk_32f_s32f_multiply_32f_u_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 
00007 #ifdef LV_HAVE_SSE
00008 #include <xmmintrin.h>
00009 /*!
00010   \brief Scalar float multiply
00011   \param cVector The vector where the results will be stored
00012   \param aVector One of the vectors to be multiplied
00013   \param scalar the scalar value
00014   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00015 */
00016 static inline void volk_32f_s32f_multiply_32f_u_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00017     unsigned int number = 0;
00018     const unsigned int quarterPoints = num_points / 4;
00019 
00020     float* cPtr = cVector;
00021     const float* aPtr = aVector;
00022 
00023     __m128 aVal, bVal, cVal;
00024     bVal = _mm_set_ps1(scalar);
00025     for(;number < quarterPoints; number++){
00026       
00027       aVal = _mm_loadu_ps(aPtr); 
00028       
00029       cVal = _mm_mul_ps(aVal, bVal); 
00030       
00031       _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
00032 
00033       aPtr += 4;
00034       cPtr += 4;
00035     }
00036 
00037     number = quarterPoints * 4;
00038     for(;number < num_points; number++){
00039       *cPtr++ = (*aPtr++) * scalar;
00040     }
00041 }
00042 #endif /* LV_HAVE_SSE */
00043 
00044 #ifdef LV_HAVE_AVX
00045 #include <immintrin.h>
00046 /*!
00047   \brief Scalar float multiply
00048   \param cVector The vector where the results will be stored
00049   \param aVector One of the vectors to be multiplied
00050   \param scalar the scalar value
00051   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00052 */
00053 static inline void volk_32f_s32f_multiply_32f_u_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00054     unsigned int number = 0;
00055     const unsigned int eighthPoints = num_points / 8;
00056 
00057     float* cPtr = cVector;
00058     const float* aPtr = aVector;
00059 
00060     __m256 aVal, bVal, cVal;
00061     bVal = _mm256_set1_ps(scalar);
00062     for(;number < eighthPoints; number++){
00063       
00064       aVal = _mm256_loadu_ps(aPtr); 
00065       
00066       cVal = _mm256_mul_ps(aVal, bVal); 
00067       
00068       _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
00069 
00070       aPtr += 8;
00071       cPtr += 8;
00072     }
00073 
00074     number = eighthPoints * 8;
00075     for(;number < num_points; number++){
00076       *cPtr++ = (*aPtr++) * scalar;
00077     }
00078 }
00079 #endif /* LV_HAVE_AVX */
00080 
00081 #ifdef LV_HAVE_GENERIC
00082 /*!
00083   \brief Scalar float multiply
00084   \param cVector The vector where the results will be stored
00085   \param aVector One of the vectors to be multiplied
00086   \param scalar the scalar value
00087   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00088 */
00089 static inline void volk_32f_s32f_multiply_32f_u_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00090   unsigned int number = 0;
00091   const float* inputPtr = aVector;
00092   float* outputPtr = cVector;
00093   for(number = 0; number < num_points; number++){
00094     *outputPtr = (*inputPtr) * scalar;
00095     inputPtr++;
00096     outputPtr++;
00097   }
00098 }
00099 #endif /* LV_HAVE_GENERIC */
00100 
00101 
00102 #endif /* INCLUDED_volk_32f_s32f_multiply_32f_u_H */