doc/doxygen-3.6/volk__8ic__x2__s32f__multiply__conjugate__32fc__a_8h_source.html

00001 #ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H
00002 #define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H
00003
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 #include <volk/volk_complex.h>
00007
00008 #ifdef LV_HAVE_SSE4_1
00009 #include <smmintrin.h>
00010 /*!
00011   \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector
00012   \param cVector The complex vector where the results will be stored
00013   \param aVector One of the complex vectors to be multiplied
00014   \param bVector The complex vector which will be converted to complex conjugate and multiplied
00015   \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00016 */
00017 static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){
00018   unsigned int number = 0;
00019   const unsigned int quarterPoints = num_points / 4;
00020
00021   __m128i x, y, realz, imagz;
00022   __m128 ret;
00023   lv_32fc_t* c = cVector;
00024   const lv_8sc_t* a = aVector;
00025   const lv_8sc_t* b = bVector;
00026   __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
00027
00028   __m128 invScalar = _mm_set_ps1(1.0/scalar);
00029
00030   for(;number < quarterPoints; number++){
00031     // Convert into 8 bit values into 16 bit values
00032     x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
00033     y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
00034
00035     // Calculate the ar*cr - ai*(-ci) portions
00036     realz = _mm_madd_epi16(x,y);
00037
00038     // Calculate the complex conjugate of the cr + ci j values
00039     y = _mm_sign_epi16(y, conjugateSign);
00040
00041     // Shift the order of the cr and ci values
00042     y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
00043
00044     // Calculate the ar*(-ci) + cr*(ai)
00045     imagz = _mm_madd_epi16(x,y);
00046
00047     // Interleave real and imaginary and then convert to float values
00048     ret = _mm_cvtepi32_ps(_mm_unpacklo_epi32(realz, imagz));
00049
00050     // Normalize the floating point values
00051     ret = _mm_mul_ps(ret, invScalar);
00052
00053     // Store the floating point values
00054     _mm_store_ps((float*)c, ret);
00055     c += 2;
00056
00057     // Interleave real and imaginary and then convert to float values
00058     ret = _mm_cvtepi32_ps(_mm_unpackhi_epi32(realz, imagz));
00059
00060     // Normalize the floating point values
00061     ret = _mm_mul_ps(ret, invScalar);
00062
00063     // Store the floating point values
00064     _mm_store_ps((float*)c, ret);
00065     c += 2;
00066
00067     a += 4;
00068     b += 4;
00069   }
00070
00071   number = quarterPoints * 4;
00072   float* cFloatPtr = (float*)&cVector[number];
00073   int8_t* a8Ptr = (int8_t*)&aVector[number];
00074   int8_t* b8Ptr = (int8_t*)&bVector[number];
00075   for(; number < num_points; number++){
00076     float aReal =  (float)*a8Ptr++;
00077     float aImag =  (float)*a8Ptr++;
00078     lv_32fc_t aVal = lv_cmake(aReal, aImag );
00079     float bReal = (float)*b8Ptr++;
00080     float bImag = (float)*b8Ptr++;
00081     lv_32fc_t bVal = lv_cmake( bReal, -bImag );
00082     lv_32fc_t temp = aVal * bVal;
00083
00084     *cFloatPtr++ = lv_creal(temp) / scalar;
00085     *cFloatPtr++ = lv_cimag(temp) / scalar;
00086   }
00087 }
00088 #endif /* LV_HAVE_SSE4_1 */
00089
00090 #ifdef LV_HAVE_GENERIC
00091 /*!
00092   \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector
00093   \param cVector The complex vector where the results will be stored
00094   \param aVector One of the complex vectors to be multiplied
00095   \param bVector The complex vector which will be converted to complex conjugate and multiplied
00096   \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00097 */
00098 static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){
00099   unsigned int number = 0;
00100   float* cPtr = (float*)cVector;
00101   const float invScalar = 1.0 / scalar;
00102   int8_t* a8Ptr = (int8_t*)aVector;
00103   int8_t* b8Ptr = (int8_t*)bVector;
00104   for(number = 0; number < num_points; number++){
00105     float aReal =  (float)*a8Ptr++;
00106     float aImag =  (float)*a8Ptr++;
00107     lv_32fc_t aVal = lv_cmake(aReal, aImag );
00108     float bReal = (float)*b8Ptr++;
00109     float bImag = (float)*b8Ptr++;
00110     lv_32fc_t bVal = lv_cmake( bReal, -bImag );
00111     lv_32fc_t temp = aVal * bVal;
00112
00113     *cPtr++ = (lv_creal(temp) * invScalar);
00114     *cPtr++ = (lv_cimag(temp) * invScalar);
00115   }
00116 }
00117 #endif /* LV_HAVE_GENERIC */
00118
00119
00120
00121
00122 #endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H */