1 #ifndef INCLUDED_volk_32fc_magnitude_32f_u_H
2 #define INCLUDED_volk_32fc_magnitude_32f_u_H
16 static inline void volk_32fc_magnitude_32f_u_sse3(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
17 unsigned int number = 0;
18 const unsigned int quarterPoints = num_points / 4;
20 const float* complexVectorPtr = (
float*)complexVector;
21 float* magnitudeVectorPtr = magnitudeVector;
23 __m128 cplxValue1, cplxValue2, result;
24 for(;number < quarterPoints; number++){
25 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
26 complexVectorPtr += 4;
28 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
29 complexVectorPtr += 4;
31 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
32 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
34 result = _mm_hadd_ps(cplxValue1, cplxValue2);
36 result = _mm_sqrt_ps(result);
38 _mm_storeu_ps(magnitudeVectorPtr, result);
39 magnitudeVectorPtr += 4;
42 number = quarterPoints * 4;
43 for(; number < num_points; number++){
44 float val1Real = *complexVectorPtr++;
45 float val1Imag = *complexVectorPtr++;
46 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
52 #include <xmmintrin.h>
59 static inline void volk_32fc_magnitude_32f_u_sse(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
60 unsigned int number = 0;
61 const unsigned int quarterPoints = num_points / 4;
63 const float* complexVectorPtr = (
float*)complexVector;
64 float* magnitudeVectorPtr = magnitudeVector;
66 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
67 for(;number < quarterPoints; number++){
68 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
69 complexVectorPtr += 4;
71 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
72 complexVectorPtr += 4;
75 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
77 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
79 iValue = _mm_mul_ps(iValue, iValue);
80 qValue = _mm_mul_ps(qValue, qValue);
82 result = _mm_add_ps(iValue, qValue);
84 result = _mm_sqrt_ps(result);
86 _mm_storeu_ps(magnitudeVectorPtr, result);
87 magnitudeVectorPtr += 4;
90 number = quarterPoints * 4;
91 for(; number < num_points; number++){
92 float val1Real = *complexVectorPtr++;
93 float val1Imag = *complexVectorPtr++;
94 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
99 #ifdef LV_HAVE_GENERIC
106 static inline void volk_32fc_magnitude_32f_generic(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
107 const float* complexVectorPtr = (
float*)complexVector;
108 float* magnitudeVectorPtr = magnitudeVector;
109 unsigned int number = 0;
110 for(number = 0; number < num_points; number++){
111 const float real = *complexVectorPtr++;
112 const float imag = *complexVectorPtr++;
113 *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
119 #ifndef INCLUDED_volk_32fc_magnitude_32f_a_H
120 #define INCLUDED_volk_32fc_magnitude_32f_a_H
127 #include <pmmintrin.h>
134 static inline void volk_32fc_magnitude_32f_a_sse3(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
135 unsigned int number = 0;
136 const unsigned int quarterPoints = num_points / 4;
138 const float* complexVectorPtr = (
float*)complexVector;
139 float* magnitudeVectorPtr = magnitudeVector;
141 __m128 cplxValue1, cplxValue2, result;
142 for(;number < quarterPoints; number++){
143 cplxValue1 = _mm_load_ps(complexVectorPtr);
144 complexVectorPtr += 4;
146 cplxValue2 = _mm_load_ps(complexVectorPtr);
147 complexVectorPtr += 4;
149 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
150 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
152 result = _mm_hadd_ps(cplxValue1, cplxValue2);
154 result = _mm_sqrt_ps(result);
156 _mm_store_ps(magnitudeVectorPtr, result);
157 magnitudeVectorPtr += 4;
160 number = quarterPoints * 4;
161 for(; number < num_points; number++){
162 float val1Real = *complexVectorPtr++;
163 float val1Imag = *complexVectorPtr++;
164 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
170 #include <xmmintrin.h>
177 static inline void volk_32fc_magnitude_32f_a_sse(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
178 unsigned int number = 0;
179 const unsigned int quarterPoints = num_points / 4;
181 const float* complexVectorPtr = (
float*)complexVector;
182 float* magnitudeVectorPtr = magnitudeVector;
184 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
185 for(;number < quarterPoints; number++){
186 cplxValue1 = _mm_load_ps(complexVectorPtr);
187 complexVectorPtr += 4;
189 cplxValue2 = _mm_load_ps(complexVectorPtr);
190 complexVectorPtr += 4;
193 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
195 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
197 iValue = _mm_mul_ps(iValue, iValue);
198 qValue = _mm_mul_ps(qValue, qValue);
200 result = _mm_add_ps(iValue, qValue);
202 result = _mm_sqrt_ps(result);
204 _mm_store_ps(magnitudeVectorPtr, result);
205 magnitudeVectorPtr += 4;
208 number = quarterPoints * 4;
209 for(; number < num_points; number++){
210 float val1Real = *complexVectorPtr++;
211 float val1Imag = *complexVectorPtr++;
212 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
217 #ifdef LV_HAVE_GENERIC
224 static inline void volk_32fc_magnitude_32f_a_generic(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
225 const float* complexVectorPtr = (
float*)complexVector;
226 float* magnitudeVectorPtr = magnitudeVector;
227 unsigned int number = 0;
228 for(number = 0; number < num_points; number++){
229 const float real = *complexVectorPtr++;
230 const float imag = *complexVectorPtr++;
231 *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
243 extern void volk_32fc_magnitude_32f_a_orc_impl(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points);
244 static inline void volk_32fc_magnitude_32f_u_orc(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
245 volk_32fc_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, num_points);
float complex lv_32fc_t
Definition: volk_complex.h:56