1 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H
2 #define INCLUDED_volk_32fc_magnitude_squared_32f_u_H
16 static inline void volk_32fc_magnitude_squared_32f_u_sse3(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
17 unsigned int number = 0;
18 const unsigned int quarterPoints = num_points / 4;
20 const float* complexVectorPtr = (
float*)complexVector;
21 float* magnitudeVectorPtr = magnitudeVector;
23 __m128 cplxValue1, cplxValue2, result;
24 for(;number < quarterPoints; number++){
25 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
26 complexVectorPtr += 4;
28 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
29 complexVectorPtr += 4;
31 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
32 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
34 result = _mm_hadd_ps(cplxValue1, cplxValue2);
36 _mm_storeu_ps(magnitudeVectorPtr, result);
37 magnitudeVectorPtr += 4;
40 number = quarterPoints * 4;
41 for(; number < num_points; number++){
42 float val1Real = *complexVectorPtr++;
43 float val1Imag = *complexVectorPtr++;
44 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
50 #include <xmmintrin.h>
57 static inline void volk_32fc_magnitude_squared_32f_u_sse(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
58 unsigned int number = 0;
59 const unsigned int quarterPoints = num_points / 4;
61 const float* complexVectorPtr = (
float*)complexVector;
62 float* magnitudeVectorPtr = magnitudeVector;
64 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
65 for(;number < quarterPoints; number++){
66 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
67 complexVectorPtr += 4;
69 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
70 complexVectorPtr += 4;
73 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
75 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
77 iValue = _mm_mul_ps(iValue, iValue);
78 qValue = _mm_mul_ps(qValue, qValue);
80 result = _mm_add_ps(iValue, qValue);
82 _mm_storeu_ps(magnitudeVectorPtr, result);
83 magnitudeVectorPtr += 4;
86 number = quarterPoints * 4;
87 for(; number < num_points; number++){
88 float val1Real = *complexVectorPtr++;
89 float val1Imag = *complexVectorPtr++;
90 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
95 #ifdef LV_HAVE_GENERIC
102 static inline void volk_32fc_magnitude_squared_32f_generic(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
103 const float* complexVectorPtr = (
float*)complexVector;
104 float* magnitudeVectorPtr = magnitudeVector;
105 unsigned int number = 0;
106 for(number = 0; number < num_points; number++){
107 const float real = *complexVectorPtr++;
108 const float imag = *complexVectorPtr++;
109 *magnitudeVectorPtr++ = (real*real) + (imag*imag);
115 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H
116 #define INCLUDED_volk_32fc_magnitude_squared_32f_a_H
123 #include <pmmintrin.h>
130 static inline void volk_32fc_magnitude_squared_32f_a_sse3(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
131 unsigned int number = 0;
132 const unsigned int quarterPoints = num_points / 4;
134 const float* complexVectorPtr = (
float*)complexVector;
135 float* magnitudeVectorPtr = magnitudeVector;
137 __m128 cplxValue1, cplxValue2, result;
138 for(;number < quarterPoints; number++){
139 cplxValue1 = _mm_load_ps(complexVectorPtr);
140 complexVectorPtr += 4;
142 cplxValue2 = _mm_load_ps(complexVectorPtr);
143 complexVectorPtr += 4;
145 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
146 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
148 result = _mm_hadd_ps(cplxValue1, cplxValue2);
150 _mm_store_ps(magnitudeVectorPtr, result);
151 magnitudeVectorPtr += 4;
154 number = quarterPoints * 4;
155 for(; number < num_points; number++){
156 float val1Real = *complexVectorPtr++;
157 float val1Imag = *complexVectorPtr++;
158 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
164 #include <xmmintrin.h>
171 static inline void volk_32fc_magnitude_squared_32f_a_sse(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
172 unsigned int number = 0;
173 const unsigned int quarterPoints = num_points / 4;
175 const float* complexVectorPtr = (
float*)complexVector;
176 float* magnitudeVectorPtr = magnitudeVector;
178 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
179 for(;number < quarterPoints; number++){
180 cplxValue1 = _mm_load_ps(complexVectorPtr);
181 complexVectorPtr += 4;
183 cplxValue2 = _mm_load_ps(complexVectorPtr);
184 complexVectorPtr += 4;
187 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
189 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
191 iValue = _mm_mul_ps(iValue, iValue);
192 qValue = _mm_mul_ps(qValue, qValue);
194 result = _mm_add_ps(iValue, qValue);
196 _mm_store_ps(magnitudeVectorPtr, result);
197 magnitudeVectorPtr += 4;
200 number = quarterPoints * 4;
201 for(; number < num_points; number++){
202 float val1Real = *complexVectorPtr++;
203 float val1Imag = *complexVectorPtr++;
204 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
210 #include <arm_neon.h>
219 static inline void volk_32fc_magnitude_squared_32f_neon(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
220 unsigned int number = 0;
221 const unsigned int quarterPoints = num_points / 4;
223 const float* complexVectorPtr = (
float*)complexVector;
224 float* magnitudeVectorPtr = magnitudeVector;
226 float32x4x2_t cmplx_val;
228 for(;number < quarterPoints; number++){
229 cmplx_val = vld2q_f32(complexVectorPtr);
230 complexVectorPtr += 8;
232 cmplx_val.val[0] = vmulq_f32(cmplx_val.val[0], cmplx_val.val[0]);
233 cmplx_val.val[1] = vmulq_f32(cmplx_val.val[1], cmplx_val.val[1]);
235 result = vaddq_f32(cmplx_val.val[0], cmplx_val.val[1]);
237 vst1q_f32(magnitudeVectorPtr, result);
238 magnitudeVectorPtr += 4;
241 number = quarterPoints * 4;
242 for(; number < num_points; number++){
243 float val1Real = *complexVectorPtr++;
244 float val1Imag = *complexVectorPtr++;
245 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
251 #ifdef LV_HAVE_GENERIC
258 static inline void volk_32fc_magnitude_squared_32f_a_generic(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
259 const float* complexVectorPtr = (
float*)complexVector;
260 float* magnitudeVectorPtr = magnitudeVector;
261 unsigned int number = 0;
262 for(number = 0; number < num_points; number++){
263 const float real = *complexVectorPtr++;
264 const float imag = *complexVectorPtr++;
265 *magnitudeVectorPtr++ = (real*real) + (imag*imag);
float complex lv_32fc_t
Definition: volk_complex.h:56