1 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H
2 #define INCLUDED_volk_32fc_magnitude_squared_32f_u_H
16 static inline void volk_32fc_magnitude_squared_32f_u_sse3(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
17 unsigned int number = 0;
18 const unsigned int quarterPoints = num_points / 4;
20 const float* complexVectorPtr = (
float*)complexVector;
21 float* magnitudeVectorPtr = magnitudeVector;
23 __m128 cplxValue1, cplxValue2, result;
24 for(;number < quarterPoints; number++){
25 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
26 complexVectorPtr += 4;
28 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
29 complexVectorPtr += 4;
31 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
32 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
34 result = _mm_hadd_ps(cplxValue1, cplxValue2);
36 _mm_storeu_ps(magnitudeVectorPtr, result);
37 magnitudeVectorPtr += 4;
40 number = quarterPoints * 4;
41 for(; number < num_points; number++){
42 float val1Real = *complexVectorPtr++;
43 float val1Imag = *complexVectorPtr++;
44 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
50 #include <xmmintrin.h>
57 static inline void volk_32fc_magnitude_squared_32f_u_sse(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
58 unsigned int number = 0;
59 const unsigned int quarterPoints = num_points / 4;
61 const float* complexVectorPtr = (
float*)complexVector;
62 float* magnitudeVectorPtr = magnitudeVector;
64 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
65 for(;number < quarterPoints; number++){
66 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
67 complexVectorPtr += 4;
69 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
70 complexVectorPtr += 4;
73 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
75 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
77 iValue = _mm_mul_ps(iValue, iValue);
78 qValue = _mm_mul_ps(qValue, qValue);
80 result = _mm_add_ps(iValue, qValue);
82 _mm_storeu_ps(magnitudeVectorPtr, result);
83 magnitudeVectorPtr += 4;
86 number = quarterPoints * 4;
87 for(; number < num_points; number++){
88 float val1Real = *complexVectorPtr++;
89 float val1Imag = *complexVectorPtr++;
90 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
95 #ifdef LV_HAVE_GENERIC
102 static inline void volk_32fc_magnitude_squared_32f_generic(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
103 const float* complexVectorPtr = (
float*)complexVector;
104 float* magnitudeVectorPtr = magnitudeVector;
105 unsigned int number = 0;
106 for(number = 0; number < num_points; number++){
107 const float real = *complexVectorPtr++;
108 const float imag = *complexVectorPtr++;
109 *magnitudeVectorPtr++ = (real*real) + (imag*imag);
115 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H
116 #define INCLUDED_volk_32fc_magnitude_squared_32f_a_H
123 #include <pmmintrin.h>
130 static inline void volk_32fc_magnitude_squared_32f_a_sse3(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
131 unsigned int number = 0;
132 const unsigned int quarterPoints = num_points / 4;
134 const float* complexVectorPtr = (
float*)complexVector;
135 float* magnitudeVectorPtr = magnitudeVector;
137 __m128 cplxValue1, cplxValue2, result;
138 for(;number < quarterPoints; number++){
139 cplxValue1 = _mm_load_ps(complexVectorPtr);
140 complexVectorPtr += 4;
142 cplxValue2 = _mm_load_ps(complexVectorPtr);
143 complexVectorPtr += 4;
145 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
146 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
148 result = _mm_hadd_ps(cplxValue1, cplxValue2);
150 _mm_store_ps(magnitudeVectorPtr, result);
151 magnitudeVectorPtr += 4;
154 number = quarterPoints * 4;
155 for(; number < num_points; number++){
156 float val1Real = *complexVectorPtr++;
157 float val1Imag = *complexVectorPtr++;
158 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
164 #include <xmmintrin.h>
171 static inline void volk_32fc_magnitude_squared_32f_a_sse(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
172 unsigned int number = 0;
173 const unsigned int quarterPoints = num_points / 4;
175 const float* complexVectorPtr = (
float*)complexVector;
176 float* magnitudeVectorPtr = magnitudeVector;
178 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
179 for(;number < quarterPoints; number++){
180 cplxValue1 = _mm_load_ps(complexVectorPtr);
181 complexVectorPtr += 4;
183 cplxValue2 = _mm_load_ps(complexVectorPtr);
184 complexVectorPtr += 4;
187 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
189 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
191 iValue = _mm_mul_ps(iValue, iValue);
192 qValue = _mm_mul_ps(qValue, qValue);
194 result = _mm_add_ps(iValue, qValue);
196 _mm_store_ps(magnitudeVectorPtr, result);
197 magnitudeVectorPtr += 4;
200 number = quarterPoints * 4;
201 for(; number < num_points; number++){
202 float val1Real = *complexVectorPtr++;
203 float val1Imag = *complexVectorPtr++;
204 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
209 #ifdef LV_HAVE_GENERIC
216 static inline void volk_32fc_magnitude_squared_32f_a_generic(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
217 const float* complexVectorPtr = (
float*)complexVector;
218 float* magnitudeVectorPtr = magnitudeVector;
219 unsigned int number = 0;
220 for(number = 0; number < num_points; number++){
221 const float real = *complexVectorPtr++;
222 const float imag = *complexVectorPtr++;
223 *magnitudeVectorPtr++ = (real*real) + (imag*imag);
float complex lv_32fc_t
Definition: volk_complex.h:56