1 #ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H
2 #define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H
10 #include <pmmintrin.h>
18 static inline void volk_32fc_x2_multiply_conjugate_32fc_u_sse3(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points){
19 unsigned int number = 0;
20 const unsigned int halfPoints = num_points / 2;
22 __m128 x, y, yl, yh, z, tmp1, tmp2;
27 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
29 for(;number < halfPoints; number++){
31 x = _mm_loadu_ps((
float*)a);
32 y = _mm_loadu_ps((
float*)b);
34 y = _mm_xor_ps(y, conjugator);
36 yl = _mm_moveldup_ps(y);
37 yh = _mm_movehdup_ps(y);
39 tmp1 = _mm_mul_ps(x,yl);
41 x = _mm_shuffle_ps(x,x,0xB1);
43 tmp2 = _mm_mul_ps(x,yh);
45 z = _mm_addsub_ps(tmp1,tmp2);
47 _mm_storeu_ps((
float*)c,z);
54 if((num_points % 2) != 0) {
60 #ifdef LV_HAVE_GENERIC
68 static inline void volk_32fc_x2_multiply_conjugate_32fc_generic(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points){
72 unsigned int number = 0;
74 for(number = 0; number < num_points; number++){
75 *cPtr++ = (*aPtr++) *
lv_conj(*bPtr++);
82 #ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H
83 #define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H
91 #include <pmmintrin.h>
99 static inline void volk_32fc_x2_multiply_conjugate_32fc_a_sse3(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points){
100 unsigned int number = 0;
101 const unsigned int halfPoints = num_points / 2;
103 __m128 x, y, yl, yh, z, tmp1, tmp2;
108 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
110 for(;number < halfPoints; number++){
112 x = _mm_load_ps((
float*)a);
113 y = _mm_load_ps((
float*)b);
115 y = _mm_xor_ps(y, conjugator);
117 yl = _mm_moveldup_ps(y);
118 yh = _mm_movehdup_ps(y);
120 tmp1 = _mm_mul_ps(x,yl);
122 x = _mm_shuffle_ps(x,x,0xB1);
124 tmp2 = _mm_mul_ps(x,yh);
126 z = _mm_addsub_ps(tmp1,tmp2);
128 _mm_store_ps((
float*)c,z);
135 if((num_points % 2) != 0) {
141 #ifdef LV_HAVE_GENERIC
149 static inline void volk_32fc_x2_multiply_conjugate_32fc_a_generic(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points){
153 unsigned int number = 0;
155 for(number = 0; number < num_points; number++){
156 *cPtr++ = (*aPtr++) *
lv_conj(*bPtr++);
#define lv_conj(x)
Definition: volk_complex.h:80
float complex lv_32fc_t
Definition: volk_complex.h:56