GNU Radio 3.5.3.2 C++ API
|
00001 #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H 00002 #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 #include <volk/volk_complex.h> 00007 00008 #ifdef LV_HAVE_SSE4_1 00009 #include <smmintrin.h> 00010 /*! 00011 \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector 00012 \param cVector The complex vector where the results will be stored 00013 \param aVector One of the complex vectors to be multiplied 00014 \param bVector The complex vector which will be converted to complex conjugate and multiplied 00015 \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector 00016 */ 00017 static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ 00018 unsigned int number = 0; 00019 const unsigned int quarterPoints = num_points / 4; 00020 00021 __m128i x, y, realz, imagz; 00022 lv_16sc_t* c = cVector; 00023 const lv_8sc_t* a = aVector; 00024 const lv_8sc_t* b = bVector; 00025 __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1); 00026 00027 for(;number < quarterPoints; number++){ 00028 // Convert into 8 bit values into 16 bit values 00029 x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a)); 00030 y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b)); 00031 00032 // Calculate the ar*cr - ai*(-ci) portions 00033 realz = _mm_madd_epi16(x,y); 00034 00035 // Calculate the complex conjugate of the cr + ci j values 00036 y = _mm_sign_epi16(y, conjugateSign); 00037 00038 // Shift the order of the cr and ci values 00039 y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1)); 00040 00041 // Calculate the ar*(-ci) + cr*(ai) 00042 imagz = _mm_madd_epi16(x,y); 00043 00044 _mm_store_si128((__m128i*)c, _mm_packs_epi32(_mm_unpacklo_epi32(realz, imagz), _mm_unpackhi_epi32(realz, imagz))); 00045 00046 a += 4; 00047 b += 4; 00048 c += 4; 00049 } 00050 00051 number = quarterPoints * 4; 00052 int16_t* c16Ptr = (int16_t*)&cVector[number]; 00053 int8_t* a8Ptr = (int8_t*)&aVector[number]; 00054 int8_t* b8Ptr = (int8_t*)&bVector[number]; 00055 for(; number < num_points; number++){ 00056 float aReal = (float)*a8Ptr++; 00057 float aImag = (float)*a8Ptr++; 00058 lv_32fc_t aVal = lv_cmake(aReal, aImag ); 00059 float bReal = (float)*b8Ptr++; 00060 float bImag = (float)*b8Ptr++; 00061 lv_32fc_t bVal = lv_cmake( bReal, -bImag ); 00062 lv_32fc_t temp = aVal * bVal; 00063 00064 *c16Ptr++ = (int16_t)lv_creal(temp); 00065 *c16Ptr++ = (int16_t)lv_cimag(temp); 00066 } 00067 } 00068 #endif /* LV_HAVE_SSE4_1 */ 00069 00070 #ifdef LV_HAVE_GENERIC 00071 /*! 00072 \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector 00073 \param cVector The complex vector where the results will be stored 00074 \param aVector One of the complex vectors to be multiplied 00075 \param bVector The complex vector which will be converted to complex conjugate and multiplied 00076 \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector 00077 */ 00078 static inline void volk_8ic_x2_multiply_conjugate_16ic_a_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ 00079 unsigned int number = 0; 00080 int16_t* c16Ptr = (int16_t*)cVector; 00081 int8_t* a8Ptr = (int8_t*)aVector; 00082 int8_t* b8Ptr = (int8_t*)bVector; 00083 for(number =0; number < num_points; number++){ 00084 float aReal = (float)*a8Ptr++; 00085 float aImag = (float)*a8Ptr++; 00086 lv_32fc_t aVal = lv_cmake(aReal, aImag ); 00087 float bReal = (float)*b8Ptr++; 00088 float bImag = (float)*b8Ptr++; 00089 lv_32fc_t bVal = lv_cmake( bReal, -bImag ); 00090 lv_32fc_t temp = aVal * bVal; 00091 00092 *c16Ptr++ = (int16_t)lv_creal(temp); 00093 *c16Ptr++ = (int16_t)lv_cimag(temp); 00094 } 00095 } 00096 #endif /* LV_HAVE_GENERIC */ 00097 00098 00099 00100 00101 #endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H */