GNU Radio 3.6.5 C++ API
|
00001 #ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H 00002 #define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 #include <volk/volk_complex.h> 00007 #include <float.h> 00008 00009 #ifdef LV_HAVE_SSE3 00010 #include <pmmintrin.h> 00011 /*! 00012 \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector 00013 \param cVector The vector where the results will be stored 00014 \param aVector First vector to be multiplied 00015 \param bVector Second vector that is conjugated before being multiplied 00016 \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector 00017 */ 00018 static inline void volk_32fc_x2_multiply_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ 00019 unsigned int number = 0; 00020 const unsigned int halfPoints = num_points / 2; 00021 00022 __m128 x, y, yl, yh, z, tmp1, tmp2; 00023 lv_32fc_t* c = cVector; 00024 const lv_32fc_t* a = aVector; 00025 const lv_32fc_t* b = bVector; 00026 00027 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); 00028 00029 for(;number < halfPoints; number++){ 00030 00031 x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi 00032 y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di 00033 00034 y = _mm_xor_ps(y, conjugator); // conjugate y 00035 00036 yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr 00037 yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di 00038 00039 tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr 00040 00041 x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br 00042 00043 tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di 00044 00045 z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di 00046 00047 _mm_storeu_ps((float*)c,z); // Store the results back into the C container 00048 00049 a += 2; 00050 b += 2; 00051 c += 2; 00052 } 00053 00054 if((num_points % 2) != 0) { 00055 *c = (*a) * lv_conj(*b); 00056 } 00057 } 00058 #endif /* LV_HAVE_SSE */ 00059 00060 #ifdef LV_HAVE_GENERIC 00061 /*! 00062 \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector 00063 \param cVector The vector where the results will be stored 00064 \param aVector First vector to be multiplied 00065 \param bVector Second vector that is conjugated before being multiplied 00066 \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector 00067 */ 00068 static inline void volk_32fc_x2_multiply_conjugate_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ 00069 lv_32fc_t* cPtr = cVector; 00070 const lv_32fc_t* aPtr = aVector; 00071 const lv_32fc_t* bPtr= bVector; 00072 unsigned int number = 0; 00073 00074 for(number = 0; number < num_points; number++){ 00075 *cPtr++ = (*aPtr++) * lv_conj(*bPtr++); 00076 } 00077 } 00078 #endif /* LV_HAVE_GENERIC */ 00079 00080 00081 #endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H */