GNU Radio 3.6.5 C++ API
|
00001 #ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H 00002 #define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 #include <volk/volk_complex.h> 00007 00008 #ifdef LV_HAVE_SSE4_1 00009 #include <smmintrin.h> 00010 /*! 00011 \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector 00012 \param cVector The complex vector where the results will be stored 00013 \param aVector One of the complex vectors to be multiplied 00014 \param bVector The complex vector which will be converted to complex conjugate and multiplied 00015 \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector 00016 */ 00017 static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ 00018 unsigned int number = 0; 00019 const unsigned int quarterPoints = num_points / 4; 00020 00021 __m128i x, y, realz, imagz; 00022 __m128 ret; 00023 lv_32fc_t* c = cVector; 00024 const lv_8sc_t* a = aVector; 00025 const lv_8sc_t* b = bVector; 00026 __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1); 00027 00028 __m128 invScalar = _mm_set_ps1(1.0/scalar); 00029 00030 for(;number < quarterPoints; number++){ 00031 // Convert into 8 bit values into 16 bit values 00032 x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a)); 00033 y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b)); 00034 00035 // Calculate the ar*cr - ai*(-ci) portions 00036 realz = _mm_madd_epi16(x,y); 00037 00038 // Calculate the complex conjugate of the cr + ci j values 00039 y = _mm_sign_epi16(y, conjugateSign); 00040 00041 // Shift the order of the cr and ci values 00042 y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1)); 00043 00044 // Calculate the ar*(-ci) + cr*(ai) 00045 imagz = _mm_madd_epi16(x,y); 00046 00047 // Interleave real and imaginary and then convert to float values 00048 ret = _mm_cvtepi32_ps(_mm_unpacklo_epi32(realz, imagz)); 00049 00050 // Normalize the floating point values 00051 ret = _mm_mul_ps(ret, invScalar); 00052 00053 // Store the floating point values 00054 _mm_store_ps((float*)c, ret); 00055 c += 2; 00056 00057 // Interleave real and imaginary and then convert to float values 00058 ret = _mm_cvtepi32_ps(_mm_unpackhi_epi32(realz, imagz)); 00059 00060 // Normalize the floating point values 00061 ret = _mm_mul_ps(ret, invScalar); 00062 00063 // Store the floating point values 00064 _mm_store_ps((float*)c, ret); 00065 c += 2; 00066 00067 a += 4; 00068 b += 4; 00069 } 00070 00071 number = quarterPoints * 4; 00072 float* cFloatPtr = (float*)&cVector[number]; 00073 int8_t* a8Ptr = (int8_t*)&aVector[number]; 00074 int8_t* b8Ptr = (int8_t*)&bVector[number]; 00075 for(; number < num_points; number++){ 00076 float aReal = (float)*a8Ptr++; 00077 float aImag = (float)*a8Ptr++; 00078 lv_32fc_t aVal = lv_cmake(aReal, aImag ); 00079 float bReal = (float)*b8Ptr++; 00080 float bImag = (float)*b8Ptr++; 00081 lv_32fc_t bVal = lv_cmake( bReal, -bImag ); 00082 lv_32fc_t temp = aVal * bVal; 00083 00084 *cFloatPtr++ = lv_creal(temp) / scalar; 00085 *cFloatPtr++ = lv_cimag(temp) / scalar; 00086 } 00087 } 00088 #endif /* LV_HAVE_SSE4_1 */ 00089 00090 #ifdef LV_HAVE_GENERIC 00091 /*! 00092 \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector 00093 \param cVector The complex vector where the results will be stored 00094 \param aVector One of the complex vectors to be multiplied 00095 \param bVector The complex vector which will be converted to complex conjugate and multiplied 00096 \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector 00097 */ 00098 static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ 00099 unsigned int number = 0; 00100 float* cPtr = (float*)cVector; 00101 const float invScalar = 1.0 / scalar; 00102 int8_t* a8Ptr = (int8_t*)aVector; 00103 int8_t* b8Ptr = (int8_t*)bVector; 00104 for(number = 0; number < num_points; number++){ 00105 float aReal = (float)*a8Ptr++; 00106 float aImag = (float)*a8Ptr++; 00107 lv_32fc_t aVal = lv_cmake(aReal, aImag ); 00108 float bReal = (float)*b8Ptr++; 00109 float bImag = (float)*b8Ptr++; 00110 lv_32fc_t bVal = lv_cmake( bReal, -bImag ); 00111 lv_32fc_t temp = aVal * bVal; 00112 00113 *cPtr++ = (lv_creal(temp) * invScalar); 00114 *cPtr++ = (lv_cimag(temp) * invScalar); 00115 } 00116 } 00117 #endif /* LV_HAVE_GENERIC */ 00118 00119 00120 00121 00122 #endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H */