GNU Radio 3.5.3.2 C++ API
volk_32f_s32f_multiply_32f_a.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_a_H
00002 #define INCLUDED_volk_32f_s32f_multiply_32f_a_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 
00007 #ifdef LV_HAVE_SSE
00008 #include <xmmintrin.h>
00009 /*!
00010   \brief Scalar float multiply
00011   \param cVector The vector where the results will be stored
00012   \param aVector One of the vectors to be multiplied
00013   \param scalar the scalar value
00014   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00015 */
00016 static inline void volk_32f_s32f_multiply_32f_a_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00017     unsigned int number = 0;
00018     const unsigned int quarterPoints = num_points / 4;
00019 
00020     float* cPtr = cVector;
00021     const float* aPtr = aVector;
00022 
00023     __m128 aVal, bVal, cVal;
00024     bVal = _mm_set_ps1(scalar);
00025     for(;number < quarterPoints; number++){
00026       
00027       aVal = _mm_load_ps(aPtr); 
00028       
00029       cVal = _mm_mul_ps(aVal, bVal); 
00030       
00031       _mm_store_ps(cPtr,cVal); // Store the results back into the C container
00032 
00033       aPtr += 4;
00034       cPtr += 4;
00035     }
00036 
00037     number = quarterPoints * 4;
00038     for(;number < num_points; number++){
00039       *cPtr++ = (*aPtr++) * scalar;
00040     }
00041 }
00042 #endif /* LV_HAVE_SSE */
00043 
00044 #ifdef LV_HAVE_AVX
00045 #include <immintrin.h>
00046 /*!
00047   \brief Scalar float multiply
00048   \param cVector The vector where the results will be stored
00049   \param aVector One of the vectors to be multiplied
00050   \param scalar the scalar value
00051   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00052 */
00053 static inline void volk_32f_s32f_multiply_32f_a_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00054     unsigned int number = 0;
00055     const unsigned int eighthPoints = num_points / 8;
00056 
00057     float* cPtr = cVector;
00058     const float* aPtr = aVector;
00059 
00060     __m256 aVal, bVal, cVal;
00061     bVal = _mm256_set1_ps(scalar);
00062     for(;number < eighthPoints; number++){
00063       
00064       aVal = _mm256_load_ps(aPtr); 
00065       
00066       cVal = _mm256_mul_ps(aVal, bVal); 
00067       
00068       _mm256_store_ps(cPtr,cVal); // Store the results back into the C container
00069 
00070       aPtr += 8;
00071       cPtr += 8;
00072     }
00073 
00074     number = eighthPoints * 8;
00075     for(;number < num_points; number++){
00076       *cPtr++ = (*aPtr++) * scalar;
00077     }
00078 }
00079 #endif /* LV_HAVE_AVX */
00080 
00081 
00082 #ifdef LV_HAVE_GENERIC
00083 /*!
00084   \brief Scalar float multiply
00085   \param cVector The vector where the results will be stored
00086   \param aVector One of the vectors to be multiplied
00087   \param scalar the scalar value
00088   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00089 */
00090 static inline void volk_32f_s32f_multiply_32f_a_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00091   unsigned int number = 0;
00092   const float* inputPtr = aVector;
00093   float* outputPtr = cVector;
00094   for(number = 0; number < num_points; number++){
00095     *outputPtr = (*inputPtr) * scalar;
00096     inputPtr++;
00097     outputPtr++;
00098   }
00099 }
00100 #endif /* LV_HAVE_GENERIC */
00101 
00102 #ifdef LV_HAVE_ORC
00103 /*!
00104   \brief Scalar float multiply
00105   \param cVector The vector where the results will be stored
00106   \param aVector One of the vectors to be multiplied
00107   \param scalar the scalar value
00108   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00109 */
00110 extern void volk_32f_s32f_multiply_32f_a_orc_impl(float* dst, const float* src, const float scalar, unsigned int num_points);
00111 static inline void volk_32f_s32f_multiply_32f_a_orc(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00112     volk_32f_s32f_multiply_32f_a_orc_impl(cVector, aVector, scalar, num_points);
00113 }
00114 #endif /* LV_HAVE_GENERIC */
00115 
00116 
00117 
00118 
00119 #endif /* INCLUDED_volk_32f_s32f_multiply_32f_a_H */