doc/doxygen-3.6/volk__32f__s32f__multiply__32f__a_8h_source.html

00001 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_a_H
00002 #define INCLUDED_volk_32f_s32f_multiply_32f_a_H
00003
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006
00007 #ifdef LV_HAVE_SSE
00008 #include <xmmintrin.h>
00009 /*!
00010   \brief Scalar float multiply
00011   \param cVector The vector where the results will be stored
00012   \param aVector One of the vectors to be multiplied
00013   \param scalar the scalar value
00014   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00015 */
00016 static inline void volk_32f_s32f_multiply_32f_a_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00017     unsigned int number = 0;
00018     const unsigned int quarterPoints = num_points / 4;
00019
00020     float* cPtr = cVector;
00021     const float* aPtr = aVector;
00022
00023     __m128 aVal, bVal, cVal;
00024     bVal = _mm_set_ps1(scalar);
00025     for(;number < quarterPoints; number++){
00026
00027       aVal = _mm_load_ps(aPtr);
00028
00029       cVal = _mm_mul_ps(aVal, bVal);
00030
00031       _mm_store_ps(cPtr,cVal); // Store the results back into the C container
00032
00033       aPtr += 4;
00034       cPtr += 4;
00035     }
00036
00037     number = quarterPoints * 4;
00038     for(;number < num_points; number++){
00039       *cPtr++ = (*aPtr++) * scalar;
00040     }
00041 }
00042 #endif /* LV_HAVE_SSE */
00043
00044 #ifdef LV_HAVE_AVX
00045 #include <immintrin.h>
00046 /*!
00047   \brief Scalar float multiply
00048   \param cVector The vector where the results will be stored
00049   \param aVector One of the vectors to be multiplied
00050   \param scalar the scalar value
00051   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00052 */
00053 static inline void volk_32f_s32f_multiply_32f_a_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00054     unsigned int number = 0;
00055     const unsigned int eighthPoints = num_points / 8;
00056
00057     float* cPtr = cVector;
00058     const float* aPtr = aVector;
00059
00060     __m256 aVal, bVal, cVal;
00061     bVal = _mm256_set1_ps(scalar);
00062     for(;number < eighthPoints; number++){
00063
00064       aVal = _mm256_load_ps(aPtr);
00065
00066       cVal = _mm256_mul_ps(aVal, bVal);
00067
00068       _mm256_store_ps(cPtr,cVal); // Store the results back into the C container
00069
00070       aPtr += 8;
00071       cPtr += 8;
00072     }
00073
00074     number = eighthPoints * 8;
00075     for(;number < num_points; number++){
00076       *cPtr++ = (*aPtr++) * scalar;
00077     }
00078 }
00079 #endif /* LV_HAVE_AVX */
00080
00081
00082 #ifdef LV_HAVE_GENERIC
00083 /*!
00084   \brief Scalar float multiply
00085   \param cVector The vector where the results will be stored
00086   \param aVector One of the vectors to be multiplied
00087   \param scalar the scalar value
00088   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00089 */
00090 static inline void volk_32f_s32f_multiply_32f_a_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00091   unsigned int number = 0;
00092   const float* inputPtr = aVector;
00093   float* outputPtr = cVector;
00094   for(number = 0; number < num_points; number++){
00095     *outputPtr = (*inputPtr) * scalar;
00096     inputPtr++;
00097     outputPtr++;
00098   }
00099 }
00100 #endif /* LV_HAVE_GENERIC */
00101
00102 #ifdef LV_HAVE_ORC
00103 /*!
00104   \brief Scalar float multiply
00105   \param cVector The vector where the results will be stored
00106   \param aVector One of the vectors to be multiplied
00107   \param scalar the scalar value
00108   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00109 */
00110 extern void volk_32f_s32f_multiply_32f_a_orc_impl(float* dst, const float* src, const float scalar, unsigned int num_points);
00111 static inline void volk_32f_s32f_multiply_32f_a_orc(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00112     volk_32f_s32f_multiply_32f_a_orc_impl(cVector, aVector, scalar, num_points);
00113 }
00114 #endif /* LV_HAVE_GENERIC */
00115
00116
00117
00118
00119 #endif /* INCLUDED_volk_32f_s32f_multiply_32f_a_H */