GNU Radio 3.6.5 C++ API

volk_32fc_s32fc_x2_rotator_32fc_a.h

Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
00002 #define INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
00003 
00004 
00005 #include <volk/volk_complex.h>
00006 #include <stdio.h>
00007 #include <stdlib.h>
00008 #define ROTATOR_RELOAD 512
00009 
00010 
00011 #ifdef LV_HAVE_GENERIC
00012 
00013 /*!
00014   \brief rotate input vector at fixed rate per sample from initial phase offset
00015   \param outVector The vector where the results will be stored
00016   \param inVector Vector to be rotated
00017   \param phase_inc rotational velocity
00018   \param phase initial phase offset
00019   \param num_points The number of values in inVector to be rotated and stored into cVector
00020 */
00021 
00022 
00023 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_generic(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){    
00024     unsigned int i = 0; 
00025     int j = 0;    
00026     for(i = 0; i < (unsigned int)(num_points/ROTATOR_RELOAD); ++i) {
00027         for(j = 0; j < ROTATOR_RELOAD; ++j) {
00028             *outVector++ = *inVector++ * (*phase);
00029             (*phase) *= phase_inc;
00030         }
00031         (*phase) /= abs((*phase));
00032     }
00033     for(i = 0; i < num_points%ROTATOR_RELOAD; ++i) {
00034         *outVector++ = *inVector++ * (*phase);
00035         (*phase) *= phase_inc;
00036     }
00037     
00038 }
00039 #endif /* LV_HAVE_GENERIC */
00040 
00041 
00042 #ifdef LV_HAVE_SSE4_1
00043 #include <smmintrin.h>
00044 
00045 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
00046     lv_32fc_t* cPtr = outVector;
00047     const lv_32fc_t* aPtr = inVector;
00048     lv_32fc_t incr = 1;
00049     lv_32fc_t phase_Ptr[2] = {(*phase), (*phase)};
00050     
00051     unsigned int i, j = 0;
00052 
00053     for(i = 0; i < 2; ++i) {
00054         phase_Ptr[i] *= incr;
00055         incr *= (phase_inc);
00056     }
00057 
00058     /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
00059     printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
00060     printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
00061     printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
00062     printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
00063     __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
00064     
00065     phase_Val = _mm_loadu_ps((float*)phase_Ptr);
00066     inc_Val = _mm_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
00067     
00068     const unsigned int halfPoints = num_points / 2;
00069 
00070     
00071     for(i = 0; i < (unsigned int)(halfPoints/ROTATOR_RELOAD); i++) {
00072         for(j = 0; j < ROTATOR_RELOAD; ++j) {
00073             
00074             aVal = _mm_load_ps((float*)aPtr);
00075             
00076             yl = _mm_moveldup_ps(phase_Val);
00077             yh = _mm_movehdup_ps(phase_Val);
00078             ylp = _mm_moveldup_ps(inc_Val);
00079             yhp = _mm_movehdup_ps(inc_Val);
00080             
00081             tmp1 = _mm_mul_ps(aVal, yl);
00082             tmp1p = _mm_mul_ps(phase_Val, ylp);
00083             
00084             aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
00085             phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
00086             tmp2 = _mm_mul_ps(aVal, yh);
00087             tmp2p = _mm_mul_ps(phase_Val, yhp);
00088             
00089             z = _mm_addsub_ps(tmp1, tmp2);
00090             phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
00091             
00092             _mm_store_ps((float*)cPtr, z);
00093             
00094             aPtr += 2;
00095             cPtr += 2;
00096         }
00097         tmp1 = _mm_mul_ps(phase_Val, phase_Val);
00098         tmp2 = _mm_hadd_ps(tmp1, tmp1);
00099         tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
00100         phase_Val = _mm_div_ps(phase_Val, tmp1);
00101     }
00102     for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) {
00103         aVal = _mm_load_ps((float*)aPtr);
00104         
00105         yl = _mm_moveldup_ps(phase_Val);
00106         yh = _mm_movehdup_ps(phase_Val);
00107         ylp = _mm_moveldup_ps(inc_Val);
00108         yhp = _mm_movehdup_ps(inc_Val);
00109         
00110         tmp1 = _mm_mul_ps(aVal, yl);
00111 
00112         tmp1p = _mm_mul_ps(phase_Val, ylp);
00113         
00114         aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
00115         phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
00116         tmp2 = _mm_mul_ps(aVal, yh);
00117         tmp2p = _mm_mul_ps(phase_Val, yhp);
00118         
00119         z = _mm_addsub_ps(tmp1, tmp2);
00120         phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
00121         
00122         _mm_store_ps((float*)cPtr, z);
00123         
00124         aPtr += 2;
00125         cPtr += 2;
00126     }
00127 
00128     _mm_storeu_ps((float*)phase_Ptr, phase_Val);
00129     for(i = 0; i < num_points%2; ++i) {
00130         *cPtr++ = *aPtr++ * phase_Ptr[0];
00131         phase_Ptr[0] *= (phase_inc);
00132     }
00133      
00134     (*phase) = phase_Ptr[0];
00135 
00136 }
00137     
00138 #endif /* LV_HAVE_SSE4_1 */
00139 
00140 
00141 #ifdef LV_HAVE_AVX
00142 #include <immintrin.h>
00143 
00144 /*!
00145   \brief rotate input vector at fixed rate per sample from initial phase offset
00146   \param outVector The vector where the results will be stored
00147   \param inVector Vector to be rotated
00148   \param phase_inc rotational velocity
00149   \param phase initial phase offset
00150   \param num_points The number of values in inVector to be rotated and stored into cVector
00151 */
00152 
00153 
00154 
00155 
00156 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
00157     lv_32fc_t* cPtr = outVector;
00158     const lv_32fc_t* aPtr = inVector;
00159     lv_32fc_t incr = 1;
00160     lv_32fc_t phase_Ptr[4] = {(*phase), (*phase), (*phase), (*phase)};
00161     
00162     unsigned int i, j = 0;
00163 
00164     for(i = 0; i < 4; ++i) {
00165         phase_Ptr[i] *= incr;
00166         incr *= (phase_inc);
00167     }
00168 
00169     /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
00170     printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
00171     printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
00172     printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
00173     printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
00174     __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
00175     
00176     phase_Val = _mm256_loadu_ps((float*)phase_Ptr);
00177     inc_Val = _mm256_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
00178     const unsigned int fourthPoints = num_points / 4;
00179 
00180     
00181     for(i = 0; i < (unsigned int)(fourthPoints/ROTATOR_RELOAD); i++) {
00182         for(j = 0; j < ROTATOR_RELOAD; ++j) {
00183             
00184             aVal = _mm256_load_ps((float*)aPtr);
00185             
00186             yl = _mm256_moveldup_ps(phase_Val);
00187             yh = _mm256_movehdup_ps(phase_Val);
00188             ylp = _mm256_moveldup_ps(inc_Val);
00189             yhp = _mm256_movehdup_ps(inc_Val);
00190             
00191             tmp1 = _mm256_mul_ps(aVal, yl);
00192             tmp1p = _mm256_mul_ps(phase_Val, ylp);
00193             
00194             aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
00195             phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
00196             tmp2 = _mm256_mul_ps(aVal, yh);
00197             tmp2p = _mm256_mul_ps(phase_Val, yhp);
00198             
00199             z = _mm256_addsub_ps(tmp1, tmp2);
00200             phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
00201             
00202             _mm256_store_ps((float*)cPtr, z);
00203             
00204             aPtr += 4;
00205             cPtr += 4;
00206         }
00207         tmp1 = _mm256_mul_ps(phase_Val, phase_Val);
00208         tmp2 = _mm256_hadd_ps(tmp1, tmp1);
00209         tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
00210         phase_Val = _mm256_div_ps(phase_Val, tmp1);
00211     }
00212     for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) {
00213         aVal = _mm256_load_ps((float*)aPtr);
00214         
00215         yl = _mm256_moveldup_ps(phase_Val);
00216         yh = _mm256_movehdup_ps(phase_Val);
00217         ylp = _mm256_moveldup_ps(inc_Val);
00218         yhp = _mm256_movehdup_ps(inc_Val);
00219         
00220         tmp1 = _mm256_mul_ps(aVal, yl);
00221 
00222         tmp1p = _mm256_mul_ps(phase_Val, ylp);
00223         
00224         aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
00225         phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
00226         tmp2 = _mm256_mul_ps(aVal, yh);
00227         tmp2p = _mm256_mul_ps(phase_Val, yhp);
00228         
00229         z = _mm256_addsub_ps(tmp1, tmp2);
00230         phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
00231         
00232         _mm256_store_ps((float*)cPtr, z);
00233         
00234         aPtr += 4;
00235         cPtr += 4;
00236     }
00237 
00238     _mm256_storeu_ps((float*)phase_Ptr, phase_Val);
00239     for(i = 0; i < num_points%4; ++i) {
00240         *cPtr++ = *aPtr++ * phase_Ptr[0];
00241         phase_Ptr[0] *= (phase_inc);
00242     }
00243      
00244     (*phase) = phase_Ptr[0];
00245 
00246 }
00247     
00248 #endif /* LV_HAVE_AVX */
00249 
00250 
00251 
00252 
00253 
00254 
00255 
00256 
00257 #endif /* INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H */