GNU Radio 3.5.3.2 C++ API
|
00001 #ifndef INCLUDED_volk_32fc_x2_multiply_32fc_u_H 00002 #define INCLUDED_volk_32fc_x2_multiply_32fc_u_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 #include <volk/volk_complex.h> 00007 #include <float.h> 00008 00009 #ifdef LV_HAVE_SSE3 00010 #include <pmmintrin.h> 00011 /*! 00012 \brief Multiplies the two input complex vectors and stores their results in the third vector 00013 \param cVector The vector where the results will be stored 00014 \param aVector One of the vectors to be multiplied 00015 \param bVector One of the vectors to be multiplied 00016 \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector 00017 */ 00018 static inline void volk_32fc_x2_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ 00019 unsigned int number = 0; 00020 const unsigned int halfPoints = num_points / 2; 00021 00022 __m128 x, y, yl, yh, z, tmp1, tmp2; 00023 lv_32fc_t* c = cVector; 00024 const lv_32fc_t* a = aVector; 00025 const lv_32fc_t* b = bVector; 00026 00027 for(;number < halfPoints; number++){ 00028 00029 x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi 00030 y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di 00031 00032 yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr 00033 yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di 00034 00035 tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr 00036 00037 x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br 00038 00039 tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di 00040 00041 z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di 00042 00043 _mm_storeu_ps((float*)c,z); // Store the results back into the C container 00044 00045 a += 2; 00046 b += 2; 00047 c += 2; 00048 } 00049 00050 if((num_points % 2) != 0) { 00051 *c = (*a) * (*b); 00052 } 00053 } 00054 #endif /* LV_HAVE_SSE */ 00055 00056 #ifdef LV_HAVE_GENERIC 00057 /*! 00058 \brief Multiplies the two input complex vectors and stores their results in the third vector 00059 \param cVector The vector where the results will be stored 00060 \param aVector One of the vectors to be multiplied 00061 \param bVector One of the vectors to be multiplied 00062 \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector 00063 */ 00064 static inline void volk_32fc_x2_multiply_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ 00065 lv_32fc_t* cPtr = cVector; 00066 const lv_32fc_t* aPtr = aVector; 00067 const lv_32fc_t* bPtr= bVector; 00068 unsigned int number = 0; 00069 00070 for(number = 0; number < num_points; number++){ 00071 *cPtr++ = (*aPtr++) * (*bPtr++); 00072 } 00073 } 00074 #endif /* LV_HAVE_GENERIC */ 00075 00076 00077 #endif /* INCLUDED_volk_32fc_x2_multiply_32fc_u_H */