1 #ifndef INCLUDED_volk_32fc_magnitude_32f_u_H
2 #define INCLUDED_volk_32fc_magnitude_32f_u_H
16 static inline void volk_32fc_magnitude_32f_u_sse3(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
17 unsigned int number = 0;
18 const unsigned int quarterPoints = num_points / 4;
20 const float* complexVectorPtr = (
float*)complexVector;
21 float* magnitudeVectorPtr = magnitudeVector;
23 __m128 cplxValue1, cplxValue2, result;
24 for(;number < quarterPoints; number++){
25 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
26 complexVectorPtr += 4;
28 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
29 complexVectorPtr += 4;
31 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
32 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
34 result = _mm_hadd_ps(cplxValue1, cplxValue2);
36 result = _mm_sqrt_ps(result);
38 _mm_storeu_ps(magnitudeVectorPtr, result);
39 magnitudeVectorPtr += 4;
42 number = quarterPoints * 4;
43 for(; number < num_points; number++){
44 float val1Real = *complexVectorPtr++;
45 float val1Imag = *complexVectorPtr++;
46 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
52 #include <xmmintrin.h>
59 static inline void volk_32fc_magnitude_32f_u_sse(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
60 unsigned int number = 0;
61 const unsigned int quarterPoints = num_points / 4;
63 const float* complexVectorPtr = (
float*)complexVector;
64 float* magnitudeVectorPtr = magnitudeVector;
66 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
67 for(;number < quarterPoints; number++){
68 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
69 complexVectorPtr += 4;
71 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
72 complexVectorPtr += 4;
75 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
77 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
79 iValue = _mm_mul_ps(iValue, iValue);
80 qValue = _mm_mul_ps(qValue, qValue);
82 result = _mm_add_ps(iValue, qValue);
84 result = _mm_sqrt_ps(result);
86 _mm_storeu_ps(magnitudeVectorPtr, result);
87 magnitudeVectorPtr += 4;
90 number = quarterPoints * 4;
91 for(; number < num_points; number++){
92 float val1Real = *complexVectorPtr++;
93 float val1Imag = *complexVectorPtr++;
94 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
99 #ifdef LV_HAVE_GENERIC
106 static inline void volk_32fc_magnitude_32f_generic(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
107 const float* complexVectorPtr = (
float*)complexVector;
108 float* magnitudeVectorPtr = magnitudeVector;
109 unsigned int number = 0;
110 for(number = 0; number < num_points; number++){
111 const float real = *complexVectorPtr++;
112 const float imag = *complexVectorPtr++;
113 *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
119 #ifndef INCLUDED_volk_32fc_magnitude_32f_a_H
120 #define INCLUDED_volk_32fc_magnitude_32f_a_H
127 #include <pmmintrin.h>
134 static inline void volk_32fc_magnitude_32f_a_sse3(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
135 unsigned int number = 0;
136 const unsigned int quarterPoints = num_points / 4;
138 const float* complexVectorPtr = (
float*)complexVector;
139 float* magnitudeVectorPtr = magnitudeVector;
141 __m128 cplxValue1, cplxValue2, result;
142 for(;number < quarterPoints; number++){
143 cplxValue1 = _mm_load_ps(complexVectorPtr);
144 complexVectorPtr += 4;
146 cplxValue2 = _mm_load_ps(complexVectorPtr);
147 complexVectorPtr += 4;
149 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
150 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
152 result = _mm_hadd_ps(cplxValue1, cplxValue2);
154 result = _mm_sqrt_ps(result);
156 _mm_store_ps(magnitudeVectorPtr, result);
157 magnitudeVectorPtr += 4;
160 number = quarterPoints * 4;
161 for(; number < num_points; number++){
162 float val1Real = *complexVectorPtr++;
163 float val1Imag = *complexVectorPtr++;
164 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
170 #include <xmmintrin.h>
177 static inline void volk_32fc_magnitude_32f_a_sse(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
178 unsigned int number = 0;
179 const unsigned int quarterPoints = num_points / 4;
181 const float* complexVectorPtr = (
float*)complexVector;
182 float* magnitudeVectorPtr = magnitudeVector;
184 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
185 for(;number < quarterPoints; number++){
186 cplxValue1 = _mm_load_ps(complexVectorPtr);
187 complexVectorPtr += 4;
189 cplxValue2 = _mm_load_ps(complexVectorPtr);
190 complexVectorPtr += 4;
193 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
195 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
197 iValue = _mm_mul_ps(iValue, iValue);
198 qValue = _mm_mul_ps(qValue, qValue);
200 result = _mm_add_ps(iValue, qValue);
202 result = _mm_sqrt_ps(result);
204 _mm_store_ps(magnitudeVectorPtr, result);
205 magnitudeVectorPtr += 4;
208 number = quarterPoints * 4;
209 for(; number < num_points; number++){
210 float val1Real = *complexVectorPtr++;
211 float val1Imag = *complexVectorPtr++;
212 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
217 #ifdef LV_HAVE_GENERIC
224 static inline void volk_32fc_magnitude_32f_a_generic(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
225 const float* complexVectorPtr = (
float*)complexVector;
226 float* magnitudeVectorPtr = magnitudeVector;
227 unsigned int number = 0;
228 for(number = 0; number < num_points; number++){
229 const float real = *complexVectorPtr++;
230 const float imag = *complexVectorPtr++;
231 *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
237 #include <arm_neon.h>
245 static inline void volk_32fc_magnitude_32f_neon(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
247 unsigned int quarter_points = num_points / 4;
248 const float* complexVectorPtr = (
float*)complexVector;
249 float* magnitudeVectorPtr = magnitudeVector;
251 float32x4x2_t complex_vec;
252 float32x4_t magnitude_vec;
253 for(number = 0; number < quarter_points; number++){
254 complex_vec = vld2q_f32(complexVectorPtr);
255 complex_vec.val[0] = vmulq_f32(complex_vec.val[0], complex_vec.val[0]);
256 magnitude_vec = vmlaq_f32(complex_vec.val[0], complex_vec.val[1], complex_vec.val[1]);
257 magnitude_vec = vrsqrteq_f32(magnitude_vec);
258 magnitude_vec = vrecpeq_f32( magnitude_vec );
259 vst1q_f32(magnitudeVectorPtr, magnitude_vec);
261 complexVectorPtr += 8;
262 magnitudeVectorPtr += 4;
265 for(number = quarter_points*4; number < num_points; number++){
266 const float real = *complexVectorPtr++;
267 const float imag = *complexVectorPtr++;
268 *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
288 static inline void volk_32fc_magnitude_32f_neon_fancy_sweet(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
290 unsigned int quarter_points = num_points / 4;
291 const float* complexVectorPtr = (
float*)complexVector;
292 float* magnitudeVectorPtr = magnitudeVector;
294 const float threshold = 0.4142135;
296 float32x4_t a_vec, b_vec, a_high, a_low, b_high, b_low;
297 a_high = vdupq_n_f32( 0.84 );
298 b_high = vdupq_n_f32( 0.561);
299 a_low = vdupq_n_f32( 0.99 );
300 b_low = vdupq_n_f32( 0.197);
302 uint32x4_t comp0, comp1;
304 float32x4x2_t complex_vec;
305 float32x4_t min_vec, max_vec, magnitude_vec;
306 float32x4_t real_abs, imag_abs;
307 for(number = 0; number < quarter_points; number++){
308 complex_vec = vld2q_f32(complexVectorPtr);
310 real_abs = vabsq_f32(complex_vec.val[0]);
311 imag_abs = vabsq_f32(complex_vec.val[1]);
313 min_vec = vminq_f32(real_abs, imag_abs);
314 max_vec = vmaxq_f32(real_abs, imag_abs);
317 comp0 = vcgtq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
318 comp1 = vcleq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
321 a_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)a_high), vandq_s32((int32x4_t)comp1, (int32x4_t)a_low));
322 b_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)b_high), vandq_s32((int32x4_t)comp1, (int32x4_t)b_low));
325 min_vec = vmulq_f32(min_vec, b_vec);
326 max_vec = vmulq_f32(max_vec, a_vec);
328 magnitude_vec = vaddq_f32(min_vec, max_vec);
329 vst1q_f32(magnitudeVectorPtr, magnitude_vec);
331 complexVectorPtr += 8;
332 magnitudeVectorPtr += 4;
335 for(number = quarter_points*4; number < num_points; number++){
336 const float real = *complexVectorPtr++;
337 const float imag = *complexVectorPtr++;
338 *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
351 extern void volk_32fc_magnitude_32f_a_orc_impl(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points);
352 static inline void volk_32fc_magnitude_32f_u_orc(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
353 volk_32fc_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, num_points);
float complex lv_32fc_t
Definition: volk_complex.h:56