1 #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
2 #define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
10 #include <pmmintrin.h>
18 static inline void volk_32fc_s32f_magnitude_16i_a_sse3(
int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
const float scalar,
unsigned int num_points){
19 unsigned int number = 0;
20 const unsigned int quarterPoints = num_points / 4;
22 const float* complexVectorPtr = (
const float*)complexVector;
23 int16_t* magnitudeVectorPtr = magnitudeVector;
25 __m128 vScalar = _mm_set_ps1(scalar);
27 __m128 cplxValue1, cplxValue2, result;
31 for(;number < quarterPoints; number++){
32 cplxValue1 = _mm_load_ps(complexVectorPtr);
33 complexVectorPtr += 4;
35 cplxValue2 = _mm_load_ps(complexVectorPtr);
36 complexVectorPtr += 4;
38 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
39 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
41 result = _mm_hadd_ps(cplxValue1, cplxValue2);
43 result = _mm_sqrt_ps(result);
45 result = _mm_mul_ps(result, vScalar);
47 _mm_store_ps(floatBuffer, result);
48 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[0]);
49 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[1]);
50 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[2]);
51 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[3]);
54 number = quarterPoints * 4;
55 magnitudeVectorPtr = &magnitudeVector[number];
56 for(; number < num_points; number++){
57 float val1Real = *complexVectorPtr++;
58 float val1Imag = *complexVectorPtr++;
59 *magnitudeVectorPtr++ = (
int16_t)(sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * scalar);
65 #include <xmmintrin.h>
73 static inline void volk_32fc_s32f_magnitude_16i_a_sse(
int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
const float scalar,
unsigned int num_points){
74 unsigned int number = 0;
75 const unsigned int quarterPoints = num_points / 4;
77 const float* complexVectorPtr = (
const float*)complexVector;
78 int16_t* magnitudeVectorPtr = magnitudeVector;
80 __m128 vScalar = _mm_set_ps1(scalar);
82 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
86 for(;number < quarterPoints; number++){
87 cplxValue1 = _mm_load_ps(complexVectorPtr);
88 complexVectorPtr += 4;
90 cplxValue2 = _mm_load_ps(complexVectorPtr);
91 complexVectorPtr += 4;
94 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
96 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
98 iValue = _mm_mul_ps(iValue, iValue);
99 qValue = _mm_mul_ps(qValue, qValue);
101 result = _mm_add_ps(iValue, qValue);
103 result = _mm_sqrt_ps(result);
105 result = _mm_mul_ps(result, vScalar);
107 _mm_store_ps(floatBuffer, result);
108 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[0]);
109 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[1]);
110 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[2]);
111 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[3]);
114 number = quarterPoints * 4;
115 magnitudeVectorPtr = &magnitudeVector[number];
116 for(; number < num_points; number++){
117 float val1Real = *complexVectorPtr++;
118 float val1Imag = *complexVectorPtr++;
119 *magnitudeVectorPtr++ = (
int16_t)(sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * scalar);
124 #ifdef LV_HAVE_GENERIC
132 static inline void volk_32fc_s32f_magnitude_16i_generic(
int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
const float scalar,
unsigned int num_points){
133 const float* complexVectorPtr = (
float*)complexVector;
134 int16_t* magnitudeVectorPtr = magnitudeVector;
135 unsigned int number = 0;
136 for(number = 0; number < num_points; number++){
137 const float real = *complexVectorPtr++;
138 const float imag = *complexVectorPtr++;
139 *magnitudeVectorPtr++ = (
int16_t)(sqrtf((real*real) + (imag*imag)) * scalar);
152 extern void volk_32fc_s32f_magnitude_16i_a_orc_impl(
int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
const float scalar,
unsigned int num_points);
153 static inline void volk_32fc_s32f_magnitude_16i_u_orc(
int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
const float scalar,
unsigned int num_points){
154 volk_32fc_s32f_magnitude_16i_a_orc_impl(magnitudeVector, complexVector, scalar, num_points);
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27
float complex lv_32fc_t
Definition: volk_complex.h:56