1 #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
2 #define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
10 #include <pmmintrin.h>
18 static inline void volk_16ic_s32f_magnitude_32f_a_sse3(
float* magnitudeVector,
const lv_16sc_t* complexVector,
const float scalar,
unsigned int num_points){
19 unsigned int number = 0;
20 const unsigned int quarterPoints = num_points / 4;
23 float* magnitudeVectorPtr = magnitudeVector;
25 __m128 invScalar = _mm_set_ps1(1.0/scalar);
27 __m128 cplxValue1, cplxValue2, result;
31 for(;number < quarterPoints; number++){
33 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
34 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
35 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
36 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
38 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
39 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
40 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
41 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
43 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
44 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
46 complexVectorPtr += 8;
48 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
49 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
51 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
52 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
54 result = _mm_hadd_ps(cplxValue1, cplxValue2);
56 result = _mm_sqrt_ps(result);
58 _mm_store_ps(magnitudeVectorPtr, result);
60 magnitudeVectorPtr += 4;
63 number = quarterPoints * 4;
64 magnitudeVectorPtr = &magnitudeVector[number];
65 complexVectorPtr = (
const int16_t*)&complexVector[number];
66 for(; number < num_points; number++){
67 float val1Real = (float)(*complexVectorPtr++) / scalar;
68 float val1Imag = (float)(*complexVectorPtr++) / scalar;
69 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
75 #include <xmmintrin.h>
83 static inline void volk_16ic_s32f_magnitude_32f_a_sse(
float* magnitudeVector,
const lv_16sc_t* complexVector,
const float scalar,
unsigned int num_points){
84 unsigned int number = 0;
85 const unsigned int quarterPoints = num_points / 4;
88 float* magnitudeVectorPtr = magnitudeVector;
90 const float iScalar = 1.0 / scalar;
91 __m128 invScalar = _mm_set_ps1(iScalar);
93 __m128 cplxValue1, cplxValue2, result, re, im;
97 for(;number < quarterPoints; number++){
98 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
99 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
100 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
101 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
103 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
104 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
105 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
106 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
108 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
109 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
111 re = _mm_shuffle_ps(cplxValue1, cplxValue2, 0x88);
112 im = _mm_shuffle_ps(cplxValue1, cplxValue2, 0xdd);
114 complexVectorPtr += 8;
116 cplxValue1 = _mm_mul_ps(re, invScalar);
117 cplxValue2 = _mm_mul_ps(im, invScalar);
119 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
120 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
122 result = _mm_add_ps(cplxValue1, cplxValue2);
124 result = _mm_sqrt_ps(result);
126 _mm_store_ps(magnitudeVectorPtr, result);
128 magnitudeVectorPtr += 4;
131 number = quarterPoints * 4;
132 magnitudeVectorPtr = &magnitudeVector[number];
133 complexVectorPtr = (
const int16_t*)&complexVector[number];
134 for(; number < num_points; number++){
135 float val1Real = (float)(*complexVectorPtr++) * iScalar;
136 float val1Imag = (float)(*complexVectorPtr++) * iScalar;
137 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
144 #ifdef LV_HAVE_GENERIC
152 static inline void volk_16ic_s32f_magnitude_32f_generic(
float* magnitudeVector,
const lv_16sc_t* complexVector,
const float scalar,
unsigned int num_points){
154 float* magnitudeVectorPtr = magnitudeVector;
155 unsigned int number = 0;
156 const float invScalar = 1.0 / scalar;
157 for(number = 0; number < num_points; number++){
158 float real = ( (float) (*complexVectorPtr++)) * invScalar;
159 float imag = ( (float) (*complexVectorPtr++)) * invScalar;
160 *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
165 #ifdef LV_HAVE_ORC_DISABLED
173 extern void volk_16ic_s32f_magnitude_32f_a_orc_impl(
float* magnitudeVector,
const lv_16sc_t* complexVector,
const float scalar,
unsigned int num_points);
174 static inline void volk_16ic_s32f_magnitude_32f_u_orc(
float* magnitudeVector,
const lv_16sc_t* complexVector,
const float scalar,
unsigned int num_points){
175 volk_16ic_s32f_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, scalar, num_points);
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27