GNU Radio 3.5.3.2 C++ API
|
00001 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H 00002 #define INCLUDED_volk_32f_s32f_multiply_32f_u_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 00007 #ifdef LV_HAVE_SSE 00008 #include <xmmintrin.h> 00009 /*! 00010 \brief Scalar float multiply 00011 \param cVector The vector where the results will be stored 00012 \param aVector One of the vectors to be multiplied 00013 \param scalar the scalar value 00014 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00015 */ 00016 static inline void volk_32f_s32f_multiply_32f_u_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ 00017 unsigned int number = 0; 00018 const unsigned int quarterPoints = num_points / 4; 00019 00020 float* cPtr = cVector; 00021 const float* aPtr = aVector; 00022 00023 __m128 aVal, bVal, cVal; 00024 bVal = _mm_set_ps1(scalar); 00025 for(;number < quarterPoints; number++){ 00026 00027 aVal = _mm_loadu_ps(aPtr); 00028 00029 cVal = _mm_mul_ps(aVal, bVal); 00030 00031 _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container 00032 00033 aPtr += 4; 00034 cPtr += 4; 00035 } 00036 00037 number = quarterPoints * 4; 00038 for(;number < num_points; number++){ 00039 *cPtr++ = (*aPtr++) * scalar; 00040 } 00041 } 00042 #endif /* LV_HAVE_SSE */ 00043 00044 #ifdef LV_HAVE_AVX 00045 #include <immintrin.h> 00046 /*! 00047 \brief Scalar float multiply 00048 \param cVector The vector where the results will be stored 00049 \param aVector One of the vectors to be multiplied 00050 \param scalar the scalar value 00051 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00052 */ 00053 static inline void volk_32f_s32f_multiply_32f_u_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ 00054 unsigned int number = 0; 00055 const unsigned int eighthPoints = num_points / 8; 00056 00057 float* cPtr = cVector; 00058 const float* aPtr = aVector; 00059 00060 __m256 aVal, bVal, cVal; 00061 bVal = _mm256_set1_ps(scalar); 00062 for(;number < eighthPoints; number++){ 00063 00064 aVal = _mm256_loadu_ps(aPtr); 00065 00066 cVal = _mm256_mul_ps(aVal, bVal); 00067 00068 _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container 00069 00070 aPtr += 8; 00071 cPtr += 8; 00072 } 00073 00074 number = eighthPoints * 8; 00075 for(;number < num_points; number++){ 00076 *cPtr++ = (*aPtr++) * scalar; 00077 } 00078 } 00079 #endif /* LV_HAVE_AVX */ 00080 00081 #ifdef LV_HAVE_GENERIC 00082 /*! 00083 \brief Scalar float multiply 00084 \param cVector The vector where the results will be stored 00085 \param aVector One of the vectors to be multiplied 00086 \param scalar the scalar value 00087 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00088 */ 00089 static inline void volk_32f_s32f_multiply_32f_u_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ 00090 unsigned int number = 0; 00091 const float* inputPtr = aVector; 00092 float* outputPtr = cVector; 00093 for(number = 0; number < num_points; number++){ 00094 *outputPtr = (*inputPtr) * scalar; 00095 inputPtr++; 00096 outputPtr++; 00097 } 00098 } 00099 #endif /* LV_HAVE_GENERIC */ 00100 00101 00102 #endif /* INCLUDED_volk_32f_s32f_multiply_32f_u_H */