1 #ifndef INCLUDED_volk_16ic_magnitude_16i_a_H
2 #define INCLUDED_volk_16ic_magnitude_16i_a_H
10 #include <pmmintrin.h>
17 static inline void volk_16ic_magnitude_16i_a_sse3(
int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
unsigned int num_points){
18 unsigned int number = 0;
19 const unsigned int quarterPoints = num_points / 4;
22 int16_t* magnitudeVectorPtr = magnitudeVector;
24 __m128 vScalar = _mm_set_ps1(32768.0);
25 __m128 invScalar = _mm_set_ps1(1.0/32768.0);
27 __m128 cplxValue1, cplxValue2, result;
32 for(;number < quarterPoints; number++){
34 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
35 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
36 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
37 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
39 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
40 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
41 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
42 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
44 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
45 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
47 complexVectorPtr += 8;
49 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
50 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
52 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
53 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
55 result = _mm_hadd_ps(cplxValue1, cplxValue2);
57 result = _mm_sqrt_ps(result);
59 result = _mm_mul_ps(result, vScalar);
61 _mm_store_ps(outputFloatBuffer, result);
62 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[0]);
63 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[1]);
64 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[2]);
65 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[3]);
68 number = quarterPoints * 4;
69 magnitudeVectorPtr = &magnitudeVector[number];
70 complexVectorPtr = (
const int16_t*)&complexVector[number];
71 for(; number < num_points; number++){
72 const float val1Real = (float)(*complexVectorPtr++) / 32768.0;
73 const float val1Imag = (float)(*complexVectorPtr++) / 32768.0;
74 const float val1Result = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * 32768.0;
75 *magnitudeVectorPtr++ = (
int16_t)(val1Result);
81 #include <xmmintrin.h>
88 static inline void volk_16ic_magnitude_16i_a_sse(
int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
unsigned int num_points){
89 unsigned int number = 0;
90 const unsigned int quarterPoints = num_points / 4;
93 int16_t* magnitudeVectorPtr = magnitudeVector;
95 __m128 vScalar = _mm_set_ps1(32768.0);
96 __m128 invScalar = _mm_set_ps1(1.0/32768.0);
98 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
103 for(;number < quarterPoints; number++){
105 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
106 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
107 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
108 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
110 cplxValue1 = _mm_load_ps(inputFloatBuffer);
111 complexVectorPtr += 4;
113 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
114 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
115 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
116 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
118 cplxValue2 = _mm_load_ps(inputFloatBuffer);
119 complexVectorPtr += 4;
121 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
122 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
125 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
127 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
129 iValue = _mm_mul_ps(iValue, iValue);
130 qValue = _mm_mul_ps(qValue, qValue);
132 result = _mm_add_ps(iValue, qValue);
134 result = _mm_sqrt_ps(result);
136 result = _mm_mul_ps(result, vScalar);
138 _mm_store_ps(outputFloatBuffer, result);
139 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[0]);
140 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[1]);
141 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[2]);
142 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[3]);
145 number = quarterPoints * 4;
146 magnitudeVectorPtr = &magnitudeVector[number];
147 complexVectorPtr = (
const int16_t*)&complexVector[number];
148 for(; number < num_points; number++){
149 const float val1Real = (float)(*complexVectorPtr++) / 32768.0;
150 const float val1Imag = (float)(*complexVectorPtr++) / 32768.0;
151 const float val1Result = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * 32768.0;
152 *magnitudeVectorPtr++ = (
int16_t)(val1Result);
157 #ifdef LV_HAVE_GENERIC
164 static inline void volk_16ic_magnitude_16i_generic(
int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
unsigned int num_points){
166 int16_t* magnitudeVectorPtr = magnitudeVector;
167 unsigned int number = 0;
168 const float scalar = 32768.0;
169 for(number = 0; number < num_points; number++){
170 float real = ((float)(*complexVectorPtr++)) / scalar;
171 float imag = ((float)(*complexVectorPtr++)) / scalar;
172 *magnitudeVectorPtr++ = (
int16_t)(sqrtf((real*real) + (imag*imag)) * scalar);
177 #ifdef LV_HAVE_ORC_DISABLED
184 extern void volk_16ic_magnitude_16i_a_orc_impl(
int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
float scalar,
unsigned int num_points);
185 static inline void volk_16ic_magnitude_16i_u_orc(
int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
unsigned int num_points){
186 volk_16ic_magnitude_16i_a_orc_impl(magnitudeVector, complexVector, 32768.0, num_points);
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27