GNU Radio 3.6.5 C++ API
|
00001 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_a_H 00002 #define INCLUDED_volk_32f_s32f_multiply_32f_a_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 00007 #ifdef LV_HAVE_SSE 00008 #include <xmmintrin.h> 00009 /*! 00010 \brief Scalar float multiply 00011 \param cVector The vector where the results will be stored 00012 \param aVector One of the vectors to be multiplied 00013 \param scalar the scalar value 00014 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00015 */ 00016 static inline void volk_32f_s32f_multiply_32f_a_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ 00017 unsigned int number = 0; 00018 const unsigned int quarterPoints = num_points / 4; 00019 00020 float* cPtr = cVector; 00021 const float* aPtr = aVector; 00022 00023 __m128 aVal, bVal, cVal; 00024 bVal = _mm_set_ps1(scalar); 00025 for(;number < quarterPoints; number++){ 00026 00027 aVal = _mm_load_ps(aPtr); 00028 00029 cVal = _mm_mul_ps(aVal, bVal); 00030 00031 _mm_store_ps(cPtr,cVal); // Store the results back into the C container 00032 00033 aPtr += 4; 00034 cPtr += 4; 00035 } 00036 00037 number = quarterPoints * 4; 00038 for(;number < num_points; number++){ 00039 *cPtr++ = (*aPtr++) * scalar; 00040 } 00041 } 00042 #endif /* LV_HAVE_SSE */ 00043 00044 #ifdef LV_HAVE_AVX 00045 #include <immintrin.h> 00046 /*! 00047 \brief Scalar float multiply 00048 \param cVector The vector where the results will be stored 00049 \param aVector One of the vectors to be multiplied 00050 \param scalar the scalar value 00051 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00052 */ 00053 static inline void volk_32f_s32f_multiply_32f_a_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ 00054 unsigned int number = 0; 00055 const unsigned int eighthPoints = num_points / 8; 00056 00057 float* cPtr = cVector; 00058 const float* aPtr = aVector; 00059 00060 __m256 aVal, bVal, cVal; 00061 bVal = _mm256_set1_ps(scalar); 00062 for(;number < eighthPoints; number++){ 00063 00064 aVal = _mm256_load_ps(aPtr); 00065 00066 cVal = _mm256_mul_ps(aVal, bVal); 00067 00068 _mm256_store_ps(cPtr,cVal); // Store the results back into the C container 00069 00070 aPtr += 8; 00071 cPtr += 8; 00072 } 00073 00074 number = eighthPoints * 8; 00075 for(;number < num_points; number++){ 00076 *cPtr++ = (*aPtr++) * scalar; 00077 } 00078 } 00079 #endif /* LV_HAVE_AVX */ 00080 00081 00082 #ifdef LV_HAVE_GENERIC 00083 /*! 00084 \brief Scalar float multiply 00085 \param cVector The vector where the results will be stored 00086 \param aVector One of the vectors to be multiplied 00087 \param scalar the scalar value 00088 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00089 */ 00090 static inline void volk_32f_s32f_multiply_32f_a_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ 00091 unsigned int number = 0; 00092 const float* inputPtr = aVector; 00093 float* outputPtr = cVector; 00094 for(number = 0; number < num_points; number++){ 00095 *outputPtr = (*inputPtr) * scalar; 00096 inputPtr++; 00097 outputPtr++; 00098 } 00099 } 00100 #endif /* LV_HAVE_GENERIC */ 00101 00102 #ifdef LV_HAVE_ORC 00103 /*! 00104 \brief Scalar float multiply 00105 \param cVector The vector where the results will be stored 00106 \param aVector One of the vectors to be multiplied 00107 \param scalar the scalar value 00108 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00109 */ 00110 extern void volk_32f_s32f_multiply_32f_a_orc_impl(float* dst, const float* src, const float scalar, unsigned int num_points); 00111 static inline void volk_32f_s32f_multiply_32f_a_orc(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ 00112 volk_32f_s32f_multiply_32f_a_orc_impl(cVector, aVector, scalar, num_points); 00113 } 00114 #endif /* LV_HAVE_GENERIC */ 00115 00116 00117 00118 00119 #endif /* INCLUDED_volk_32f_s32f_multiply_32f_a_H */