1 #ifndef INCLUDED_volk_32fc_32f_multiply_32fc_a_H
2 #define INCLUDED_volk_32fc_32f_multiply_32fc_a_H
16 static inline void volk_32fc_32f_multiply_32fc_a_sse(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const float* bVector,
unsigned int num_points){
17 unsigned int number = 0;
18 const unsigned int quarterPoints = num_points / 4;
22 const float* bPtr= bVector;
24 __m128 aVal1, aVal2, bVal, bVal1, bVal2, cVal;
25 for(;number < quarterPoints; number++){
27 aVal1 = _mm_load_ps((
const float*)aPtr);
30 aVal2 = _mm_load_ps((
const float*)aPtr);
33 bVal = _mm_load_ps(bPtr);
36 bVal1 = _mm_shuffle_ps(bVal, bVal, _MM_SHUFFLE(1,1,0,0));
37 bVal2 = _mm_shuffle_ps(bVal, bVal, _MM_SHUFFLE(3,3,2,2));
39 cVal = _mm_mul_ps(aVal1, bVal1);
41 _mm_store_ps((
float*)cPtr,cVal);
44 cVal = _mm_mul_ps(aVal2, bVal2);
46 _mm_store_ps((
float*)cPtr,cVal);
51 number = quarterPoints * 4;
52 for(;number < num_points; number++){
53 *cPtr++ = (*aPtr++) * (*bPtr);
59 #ifdef LV_HAVE_GENERIC
67 static inline void volk_32fc_32f_multiply_32fc_generic(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const float* bVector,
unsigned int num_points){
70 const float* bPtr= bVector;
71 unsigned int number = 0;
73 for(number = 0; number < num_points; number++){
74 *cPtr++ = (*aPtr++) * (*bPtr++);
87 extern void volk_32fc_32f_multiply_32fc_a_orc_impl(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const float* bVector,
unsigned int num_points);
88 static inline void volk_32fc_32f_multiply_32fc_u_orc(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const float* bVector,
unsigned int num_points){
89 volk_32fc_32f_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points);
float complex lv_32fc_t
Definition: volk_complex.h:56