1 #ifndef INCLUDED_volk_32fc_x2_multiply_32fc_u_H
2 #define INCLUDED_volk_32fc_x2_multiply_32fc_u_H
10 #include <pmmintrin.h>
18 static inline void volk_32fc_x2_multiply_32fc_u_sse3(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points){
19 unsigned int number = 0;
20 const unsigned int halfPoints = num_points / 2;
22 __m128 x, y, yl, yh, z, tmp1, tmp2;
27 for(;number < halfPoints; number++){
29 x = _mm_loadu_ps((
float*)a);
30 y = _mm_loadu_ps((
float*)b);
32 yl = _mm_moveldup_ps(y);
33 yh = _mm_movehdup_ps(y);
35 tmp1 = _mm_mul_ps(x,yl);
37 x = _mm_shuffle_ps(x,x,0xB1);
39 tmp2 = _mm_mul_ps(x,yh);
41 z = _mm_addsub_ps(tmp1,tmp2);
43 _mm_storeu_ps((
float*)c,z);
50 if((num_points % 2) != 0) {
56 #ifdef LV_HAVE_GENERIC
64 static inline void volk_32fc_x2_multiply_32fc_generic(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points){
68 unsigned int number = 0;
70 for(number = 0; number < num_points; number++){
71 *cPtr++ = (*aPtr++) * (*bPtr++);
78 #ifndef INCLUDED_volk_32fc_x2_multiply_32fc_a_H
79 #define INCLUDED_volk_32fc_x2_multiply_32fc_a_H
87 #include <pmmintrin.h>
95 static inline void volk_32fc_x2_multiply_32fc_a_sse3(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points){
96 unsigned int number = 0;
97 const unsigned int halfPoints = num_points / 2;
99 __m128 x, y, yl, yh, z, tmp1, tmp2;
103 for(;number < halfPoints; number++){
105 x = _mm_load_ps((
float*)a);
106 y = _mm_load_ps((
float*)b);
108 yl = _mm_moveldup_ps(y);
109 yh = _mm_movehdup_ps(y);
111 tmp1 = _mm_mul_ps(x,yl);
113 x = _mm_shuffle_ps(x,x,0xB1);
115 tmp2 = _mm_mul_ps(x,yh);
117 z = _mm_addsub_ps(tmp1,tmp2);
119 _mm_store_ps((
float*)c,z);
126 if((num_points % 2) != 0) {
132 #ifdef LV_HAVE_GENERIC
140 static inline void volk_32fc_x2_multiply_32fc_a_generic(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points){
144 unsigned int number = 0;
146 for(number = 0; number < num_points; number++){
147 *cPtr++ = (*aPtr++) * (*bPtr++);
160 extern void volk_32fc_x2_multiply_32fc_a_orc_impl(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points);
161 static inline void volk_32fc_x2_multiply_32fc_u_orc(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points){
162 volk_32fc_x2_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points);
float complex lv_32fc_t
Definition: volk_complex.h:56