doc/doxygen-3.6/volk__8ic__x2__multiply__conjugate__16ic__a_8h_source.html

00001 #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
00002 #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
00003
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 #include <volk/volk_complex.h>
00007
00008 #ifdef LV_HAVE_SSE4_1
00009 #include <smmintrin.h>
00010 /*!
00011   \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector
00012   \param cVector The complex vector where the results will be stored
00013   \param aVector One of the complex vectors to be multiplied
00014   \param bVector The complex vector which will be converted to complex conjugate and multiplied
00015   \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00016 */
00017 static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
00018   unsigned int number = 0;
00019   const unsigned int quarterPoints = num_points / 4;
00020
00021   __m128i x, y, realz, imagz;
00022   lv_16sc_t* c = cVector;
00023   const lv_8sc_t* a = aVector;
00024   const lv_8sc_t* b = bVector;
00025   __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
00026
00027   for(;number < quarterPoints; number++){
00028     // Convert into 8 bit values into 16 bit values
00029     x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
00030     y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
00031
00032     // Calculate the ar*cr - ai*(-ci) portions
00033     realz = _mm_madd_epi16(x,y);
00034
00035     // Calculate the complex conjugate of the cr + ci j values
00036     y = _mm_sign_epi16(y, conjugateSign);
00037
00038     // Shift the order of the cr and ci values
00039     y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
00040
00041     // Calculate the ar*(-ci) + cr*(ai)
00042     imagz = _mm_madd_epi16(x,y);
00043
00044     _mm_store_si128((__m128i*)c, _mm_packs_epi32(_mm_unpacklo_epi32(realz, imagz), _mm_unpackhi_epi32(realz, imagz)));
00045
00046     a += 4;
00047     b += 4;
00048     c += 4;
00049   }
00050
00051   number = quarterPoints * 4;
00052   int16_t* c16Ptr = (int16_t*)&cVector[number];
00053   int8_t* a8Ptr = (int8_t*)&aVector[number];
00054   int8_t* b8Ptr = (int8_t*)&bVector[number];
00055   for(; number < num_points; number++){
00056     float aReal =  (float)*a8Ptr++;
00057     float aImag =  (float)*a8Ptr++;
00058     lv_32fc_t aVal = lv_cmake(aReal, aImag );
00059     float bReal = (float)*b8Ptr++;
00060     float bImag = (float)*b8Ptr++;
00061     lv_32fc_t bVal = lv_cmake( bReal, -bImag );
00062     lv_32fc_t temp = aVal * bVal;
00063
00064     *c16Ptr++ = (int16_t)lv_creal(temp);
00065     *c16Ptr++ = (int16_t)lv_cimag(temp);
00066   }
00067 }
00068 #endif /* LV_HAVE_SSE4_1 */
00069
00070 #ifdef LV_HAVE_GENERIC
00071 /*!
00072   \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector
00073   \param cVector The complex vector where the results will be stored
00074   \param aVector One of the complex vectors to be multiplied
00075   \param bVector The complex vector which will be converted to complex conjugate and multiplied
00076   \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00077 */
00078 static inline void volk_8ic_x2_multiply_conjugate_16ic_a_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
00079   unsigned int number = 0;
00080   int16_t* c16Ptr = (int16_t*)cVector;
00081   int8_t* a8Ptr = (int8_t*)aVector;
00082   int8_t* b8Ptr = (int8_t*)bVector;
00083   for(number =0; number < num_points; number++){
00084     float aReal =  (float)*a8Ptr++;
00085     float aImag =  (float)*a8Ptr++;
00086     lv_32fc_t aVal = lv_cmake(aReal, aImag );
00087     float bReal = (float)*b8Ptr++;
00088     float bImag = (float)*b8Ptr++;
00089     lv_32fc_t bVal = lv_cmake( bReal, -bImag );
00090     lv_32fc_t temp = aVal * bVal;
00091
00092     *c16Ptr++ = (int16_t)lv_creal(temp);
00093     *c16Ptr++ = (int16_t)lv_cimag(temp);
00094   }
00095 }
00096 #endif /* LV_HAVE_GENERIC */
00097
00098
00099
00100
00101 #endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H */