GNU Radio 3.6.5 C++ API
|
00001 #ifndef INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H 00002 #define INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H 00003 00004 00005 #include <volk/volk_complex.h> 00006 #include <stdio.h> 00007 #include <stdlib.h> 00008 #define ROTATOR_RELOAD 512 00009 00010 00011 #ifdef LV_HAVE_GENERIC 00012 00013 /*! 00014 \brief rotate input vector at fixed rate per sample from initial phase offset 00015 \param outVector The vector where the results will be stored 00016 \param inVector Vector to be rotated 00017 \param phase_inc rotational velocity 00018 \param phase initial phase offset 00019 \param num_points The number of values in inVector to be rotated and stored into cVector 00020 */ 00021 00022 00023 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_generic(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){ 00024 unsigned int i = 0; 00025 int j = 0; 00026 for(i = 0; i < (unsigned int)(num_points/ROTATOR_RELOAD); ++i) { 00027 for(j = 0; j < ROTATOR_RELOAD; ++j) { 00028 *outVector++ = *inVector++ * (*phase); 00029 (*phase) *= phase_inc; 00030 } 00031 (*phase) /= abs((*phase)); 00032 } 00033 for(i = 0; i < num_points%ROTATOR_RELOAD; ++i) { 00034 *outVector++ = *inVector++ * (*phase); 00035 (*phase) *= phase_inc; 00036 } 00037 00038 } 00039 #endif /* LV_HAVE_GENERIC */ 00040 00041 00042 #ifdef LV_HAVE_SSE4_1 00043 #include <smmintrin.h> 00044 00045 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){ 00046 lv_32fc_t* cPtr = outVector; 00047 const lv_32fc_t* aPtr = inVector; 00048 lv_32fc_t incr = 1; 00049 lv_32fc_t phase_Ptr[2] = {(*phase), (*phase)}; 00050 00051 unsigned int i, j = 0; 00052 00053 for(i = 0; i < 2; ++i) { 00054 phase_Ptr[i] *= incr; 00055 incr *= (phase_inc); 00056 } 00057 00058 /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0])); 00059 printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1])); 00060 printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2])); 00061 printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3])); 00062 printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/ 00063 __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p; 00064 00065 phase_Val = _mm_loadu_ps((float*)phase_Ptr); 00066 inc_Val = _mm_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr)); 00067 00068 const unsigned int halfPoints = num_points / 2; 00069 00070 00071 for(i = 0; i < (unsigned int)(halfPoints/ROTATOR_RELOAD); i++) { 00072 for(j = 0; j < ROTATOR_RELOAD; ++j) { 00073 00074 aVal = _mm_load_ps((float*)aPtr); 00075 00076 yl = _mm_moveldup_ps(phase_Val); 00077 yh = _mm_movehdup_ps(phase_Val); 00078 ylp = _mm_moveldup_ps(inc_Val); 00079 yhp = _mm_movehdup_ps(inc_Val); 00080 00081 tmp1 = _mm_mul_ps(aVal, yl); 00082 tmp1p = _mm_mul_ps(phase_Val, ylp); 00083 00084 aVal = _mm_shuffle_ps(aVal, aVal, 0xB1); 00085 phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1); 00086 tmp2 = _mm_mul_ps(aVal, yh); 00087 tmp2p = _mm_mul_ps(phase_Val, yhp); 00088 00089 z = _mm_addsub_ps(tmp1, tmp2); 00090 phase_Val = _mm_addsub_ps(tmp1p, tmp2p); 00091 00092 _mm_store_ps((float*)cPtr, z); 00093 00094 aPtr += 2; 00095 cPtr += 2; 00096 } 00097 tmp1 = _mm_mul_ps(phase_Val, phase_Val); 00098 tmp2 = _mm_hadd_ps(tmp1, tmp1); 00099 tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8); 00100 phase_Val = _mm_div_ps(phase_Val, tmp1); 00101 } 00102 for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) { 00103 aVal = _mm_load_ps((float*)aPtr); 00104 00105 yl = _mm_moveldup_ps(phase_Val); 00106 yh = _mm_movehdup_ps(phase_Val); 00107 ylp = _mm_moveldup_ps(inc_Val); 00108 yhp = _mm_movehdup_ps(inc_Val); 00109 00110 tmp1 = _mm_mul_ps(aVal, yl); 00111 00112 tmp1p = _mm_mul_ps(phase_Val, ylp); 00113 00114 aVal = _mm_shuffle_ps(aVal, aVal, 0xB1); 00115 phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1); 00116 tmp2 = _mm_mul_ps(aVal, yh); 00117 tmp2p = _mm_mul_ps(phase_Val, yhp); 00118 00119 z = _mm_addsub_ps(tmp1, tmp2); 00120 phase_Val = _mm_addsub_ps(tmp1p, tmp2p); 00121 00122 _mm_store_ps((float*)cPtr, z); 00123 00124 aPtr += 2; 00125 cPtr += 2; 00126 } 00127 00128 _mm_storeu_ps((float*)phase_Ptr, phase_Val); 00129 for(i = 0; i < num_points%2; ++i) { 00130 *cPtr++ = *aPtr++ * phase_Ptr[0]; 00131 phase_Ptr[0] *= (phase_inc); 00132 } 00133 00134 (*phase) = phase_Ptr[0]; 00135 00136 } 00137 00138 #endif /* LV_HAVE_SSE4_1 */ 00139 00140 00141 #ifdef LV_HAVE_AVX 00142 #include <immintrin.h> 00143 00144 /*! 00145 \brief rotate input vector at fixed rate per sample from initial phase offset 00146 \param outVector The vector where the results will be stored 00147 \param inVector Vector to be rotated 00148 \param phase_inc rotational velocity 00149 \param phase initial phase offset 00150 \param num_points The number of values in inVector to be rotated and stored into cVector 00151 */ 00152 00153 00154 00155 00156 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){ 00157 lv_32fc_t* cPtr = outVector; 00158 const lv_32fc_t* aPtr = inVector; 00159 lv_32fc_t incr = 1; 00160 lv_32fc_t phase_Ptr[4] = {(*phase), (*phase), (*phase), (*phase)}; 00161 00162 unsigned int i, j = 0; 00163 00164 for(i = 0; i < 4; ++i) { 00165 phase_Ptr[i] *= incr; 00166 incr *= (phase_inc); 00167 } 00168 00169 /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0])); 00170 printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1])); 00171 printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2])); 00172 printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3])); 00173 printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/ 00174 __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p; 00175 00176 phase_Val = _mm256_loadu_ps((float*)phase_Ptr); 00177 inc_Val = _mm256_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr)); 00178 const unsigned int fourthPoints = num_points / 4; 00179 00180 00181 for(i = 0; i < (unsigned int)(fourthPoints/ROTATOR_RELOAD); i++) { 00182 for(j = 0; j < ROTATOR_RELOAD; ++j) { 00183 00184 aVal = _mm256_load_ps((float*)aPtr); 00185 00186 yl = _mm256_moveldup_ps(phase_Val); 00187 yh = _mm256_movehdup_ps(phase_Val); 00188 ylp = _mm256_moveldup_ps(inc_Val); 00189 yhp = _mm256_movehdup_ps(inc_Val); 00190 00191 tmp1 = _mm256_mul_ps(aVal, yl); 00192 tmp1p = _mm256_mul_ps(phase_Val, ylp); 00193 00194 aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1); 00195 phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1); 00196 tmp2 = _mm256_mul_ps(aVal, yh); 00197 tmp2p = _mm256_mul_ps(phase_Val, yhp); 00198 00199 z = _mm256_addsub_ps(tmp1, tmp2); 00200 phase_Val = _mm256_addsub_ps(tmp1p, tmp2p); 00201 00202 _mm256_store_ps((float*)cPtr, z); 00203 00204 aPtr += 4; 00205 cPtr += 4; 00206 } 00207 tmp1 = _mm256_mul_ps(phase_Val, phase_Val); 00208 tmp2 = _mm256_hadd_ps(tmp1, tmp1); 00209 tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8); 00210 phase_Val = _mm256_div_ps(phase_Val, tmp1); 00211 } 00212 for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) { 00213 aVal = _mm256_load_ps((float*)aPtr); 00214 00215 yl = _mm256_moveldup_ps(phase_Val); 00216 yh = _mm256_movehdup_ps(phase_Val); 00217 ylp = _mm256_moveldup_ps(inc_Val); 00218 yhp = _mm256_movehdup_ps(inc_Val); 00219 00220 tmp1 = _mm256_mul_ps(aVal, yl); 00221 00222 tmp1p = _mm256_mul_ps(phase_Val, ylp); 00223 00224 aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1); 00225 phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1); 00226 tmp2 = _mm256_mul_ps(aVal, yh); 00227 tmp2p = _mm256_mul_ps(phase_Val, yhp); 00228 00229 z = _mm256_addsub_ps(tmp1, tmp2); 00230 phase_Val = _mm256_addsub_ps(tmp1p, tmp2p); 00231 00232 _mm256_store_ps((float*)cPtr, z); 00233 00234 aPtr += 4; 00235 cPtr += 4; 00236 } 00237 00238 _mm256_storeu_ps((float*)phase_Ptr, phase_Val); 00239 for(i = 0; i < num_points%4; ++i) { 00240 *cPtr++ = *aPtr++ * phase_Ptr[0]; 00241 phase_Ptr[0] *= (phase_inc); 00242 } 00243 00244 (*phase) = phase_Ptr[0]; 00245 00246 } 00247 00248 #endif /* LV_HAVE_AVX */ 00249 00250 00251 00252 00253 00254 00255 00256 00257 #endif /* INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H */