1 #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
2 #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
18 static inline void volk_32f_x2_s32f_interleave_16ic_a_sse2(
lv_16sc_t* complexVector,
const float* iBuffer,
const float* qBuffer,
const float scalar,
unsigned int num_points){
19 unsigned int number = 0;
20 const float* iBufferPtr = iBuffer;
21 const float* qBufferPtr = qBuffer;
23 __m128 vScalar = _mm_set_ps1(scalar);
25 const unsigned int quarterPoints = num_points / 4;
27 __m128 iValue, qValue, cplxValue1, cplxValue2;
28 __m128i intValue1, intValue2;
32 for(;number < quarterPoints; number++){
33 iValue = _mm_load_ps(iBufferPtr);
34 qValue = _mm_load_ps(qBufferPtr);
37 cplxValue1 = _mm_unpacklo_ps(iValue, qValue);
38 cplxValue1 = _mm_mul_ps(cplxValue1, vScalar);
41 cplxValue2 = _mm_unpackhi_ps(iValue, qValue);
42 cplxValue2 = _mm_mul_ps(cplxValue2, vScalar);
44 intValue1 = _mm_cvtps_epi32(cplxValue1);
45 intValue2 = _mm_cvtps_epi32(cplxValue2);
47 intValue1 = _mm_packs_epi32(intValue1, intValue2);
49 _mm_store_si128((__m128i*)complexVectorPtr, intValue1);
50 complexVectorPtr += 8;
56 number = quarterPoints * 4;
57 complexVectorPtr = (
int16_t*)(&complexVector[number]);
58 for(; number < num_points; number++){
59 *complexVectorPtr++ = (
int16_t)(*iBufferPtr++ * scalar);
60 *complexVectorPtr++ = (
int16_t)(*qBufferPtr++ * scalar);
67 #include <xmmintrin.h>
76 static inline void volk_32f_x2_s32f_interleave_16ic_a_sse(
lv_16sc_t* complexVector,
const float* iBuffer,
const float* qBuffer,
const float scalar,
unsigned int num_points){
77 unsigned int number = 0;
78 const float* iBufferPtr = iBuffer;
79 const float* qBufferPtr = qBuffer;
81 __m128 vScalar = _mm_set_ps1(scalar);
83 const unsigned int quarterPoints = num_points / 4;
85 __m128 iValue, qValue, cplxValue;
91 for(;number < quarterPoints; number++){
92 iValue = _mm_load_ps(iBufferPtr);
93 qValue = _mm_load_ps(qBufferPtr);
96 cplxValue = _mm_unpacklo_ps(iValue, qValue);
97 cplxValue = _mm_mul_ps(cplxValue, vScalar);
99 _mm_store_ps(floatBuffer, cplxValue);
101 *complexVectorPtr++ = (
int16_t)(floatBuffer[0]);
102 *complexVectorPtr++ = (
int16_t)(floatBuffer[1]);
103 *complexVectorPtr++ = (
int16_t)(floatBuffer[2]);
104 *complexVectorPtr++ = (
int16_t)(floatBuffer[3]);
107 cplxValue = _mm_unpackhi_ps(iValue, qValue);
108 cplxValue = _mm_mul_ps(cplxValue, vScalar);
110 _mm_store_ps(floatBuffer, cplxValue);
112 *complexVectorPtr++ = (
int16_t)(floatBuffer[0]);
113 *complexVectorPtr++ = (
int16_t)(floatBuffer[1]);
114 *complexVectorPtr++ = (
int16_t)(floatBuffer[2]);
115 *complexVectorPtr++ = (
int16_t)(floatBuffer[3]);
121 number = quarterPoints * 4;
122 complexVectorPtr = (
int16_t*)(&complexVector[number]);
123 for(; number < num_points; number++){
124 *complexVectorPtr++ = (
int16_t)(*iBufferPtr++ * scalar);
125 *complexVectorPtr++ = (
int16_t)(*qBufferPtr++ * scalar);
131 #ifdef LV_HAVE_GENERIC
140 static inline void volk_32f_x2_s32f_interleave_16ic_generic(
lv_16sc_t* complexVector,
const float* iBuffer,
const float* qBuffer,
const float scalar,
unsigned int num_points){
142 const float* iBufferPtr = iBuffer;
143 const float* qBufferPtr = qBuffer;
144 unsigned int number = 0;
146 for(number = 0; number < num_points; number++){
147 *complexVectorPtr++ = (
int16_t)(*iBufferPtr++ * scalar);
148 *complexVectorPtr++ = (
int16_t)(*qBufferPtr++ * scalar);
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27