GNU Radio 3.7.3 C++ API
volk_32fc_s32fc_x2_rotator_32fc.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
2 #define INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
3 
4 
5 #include <volk/volk_complex.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #define ROTATOR_RELOAD 512
9 
10 
11 #ifdef LV_HAVE_GENERIC
12 
13 /*!
14  \brief rotate input vector at fixed rate per sample from initial phase offset
15  \param outVector The vector where the results will be stored
16  \param inVector Vector to be rotated
17  \param phase_inc rotational velocity
18  \param phase initial phase offset
19  \param num_points The number of values in inVector to be rotated and stored into cVector
20 */
21 static inline void volk_32fc_s32fc_x2_rotator_32fc_generic(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
22  unsigned int i = 0;
23  int j = 0;
24  for(i = 0; i < (unsigned int)(num_points/ROTATOR_RELOAD); ++i) {
25  for(j = 0; j < ROTATOR_RELOAD; ++j) {
26  *outVector++ = *inVector++ * (*phase);
27  (*phase) *= phase_inc;
28  }
29 #ifdef __cplusplus
30  (*phase) /= std::abs((*phase));
31 #else
32  (*phase) /= cabsf((*phase));
33 #endif
34  }
35  for(i = 0; i < num_points%ROTATOR_RELOAD; ++i) {
36  *outVector++ = *inVector++ * (*phase);
37  (*phase) *= phase_inc;
38  }
39 
40 }
41 
42 #endif /* LV_HAVE_GENERIC */
43 
44 
45 #ifdef LV_HAVE_SSE4_1
46 #include <smmintrin.h>
47 
48 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
49  lv_32fc_t* cPtr = outVector;
50  const lv_32fc_t* aPtr = inVector;
51  lv_32fc_t incr = 1;
52  lv_32fc_t phase_Ptr[2] = {(*phase), (*phase)};
53 
54  unsigned int i, j = 0;
55 
56  for(i = 0; i < 2; ++i) {
57  phase_Ptr[i] *= incr;
58  incr *= (phase_inc);
59  }
60 
61  /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
62  printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
63  printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
64  __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
65 
66  phase_Val = _mm_loadu_ps((float*)phase_Ptr);
67  inc_Val = _mm_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
68 
69  const unsigned int halfPoints = num_points / 2;
70 
71 
72  for(i = 0; i < (unsigned int)(halfPoints/ROTATOR_RELOAD); i++) {
73  for(j = 0; j < ROTATOR_RELOAD; ++j) {
74 
75  aVal = _mm_load_ps((float*)aPtr);
76 
77  yl = _mm_moveldup_ps(phase_Val);
78  yh = _mm_movehdup_ps(phase_Val);
79  ylp = _mm_moveldup_ps(inc_Val);
80  yhp = _mm_movehdup_ps(inc_Val);
81 
82  tmp1 = _mm_mul_ps(aVal, yl);
83  tmp1p = _mm_mul_ps(phase_Val, ylp);
84 
85  aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
86  phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
87  tmp2 = _mm_mul_ps(aVal, yh);
88  tmp2p = _mm_mul_ps(phase_Val, yhp);
89 
90  z = _mm_addsub_ps(tmp1, tmp2);
91  phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
92 
93  _mm_store_ps((float*)cPtr, z);
94 
95  aPtr += 2;
96  cPtr += 2;
97  }
98  tmp1 = _mm_mul_ps(phase_Val, phase_Val);
99  tmp2 = _mm_hadd_ps(tmp1, tmp1);
100  tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
101  tmp2 = _mm_sqrt_ps(tmp1);
102  phase_Val = _mm_div_ps(phase_Val, tmp2);
103  }
104  for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) {
105  aVal = _mm_load_ps((float*)aPtr);
106 
107  yl = _mm_moveldup_ps(phase_Val);
108  yh = _mm_movehdup_ps(phase_Val);
109  ylp = _mm_moveldup_ps(inc_Val);
110  yhp = _mm_movehdup_ps(inc_Val);
111 
112  tmp1 = _mm_mul_ps(aVal, yl);
113 
114  tmp1p = _mm_mul_ps(phase_Val, ylp);
115 
116  aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
117  phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
118  tmp2 = _mm_mul_ps(aVal, yh);
119  tmp2p = _mm_mul_ps(phase_Val, yhp);
120 
121  z = _mm_addsub_ps(tmp1, tmp2);
122  phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
123 
124  _mm_store_ps((float*)cPtr, z);
125 
126  aPtr += 2;
127  cPtr += 2;
128  }
129 
130  _mm_storeu_ps((float*)phase_Ptr, phase_Val);
131  for(i = 0; i < num_points%2; ++i) {
132  *cPtr++ = *aPtr++ * phase_Ptr[0];
133  phase_Ptr[0] *= (phase_inc);
134  }
135 
136  (*phase) = phase_Ptr[0];
137 
138 }
139 
140 #endif /* LV_HAVE_SSE4_1 for aligned */
141 
142 
143 #ifdef LV_HAVE_SSE4_1
144 #include <smmintrin.h>
145 
146 /*!
147  \brief rotate input vector at fixed rate per sample from initial phase offset
148  \param outVector The vector where the results will be stored
149  \param inVector Vector to be rotated
150  \param phase_inc rotational velocity
151  \param phase initial phase offset
152  \param num_points The number of values in inVector to be rotated and stored into cVector
153 */
154 static inline void volk_32fc_s32fc_x2_rotator_32fc_u_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
155  lv_32fc_t* cPtr = outVector;
156  const lv_32fc_t* aPtr = inVector;
157  lv_32fc_t incr = 1;
158  lv_32fc_t phase_Ptr[2] = {(*phase), (*phase)};
159 
160  unsigned int i, j = 0;
161 
162  for(i = 0; i < 2; ++i) {
163  phase_Ptr[i] *= incr;
164  incr *= (phase_inc);
165  }
166 
167  /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
168  printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
169  printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
170  __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
171 
172  phase_Val = _mm_loadu_ps((float*)phase_Ptr);
173  inc_Val = _mm_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
174 
175  const unsigned int halfPoints = num_points / 2;
176 
177 
178  for(i = 0; i < (unsigned int)(halfPoints/ROTATOR_RELOAD); i++) {
179  for(j = 0; j < ROTATOR_RELOAD; ++j) {
180 
181  aVal = _mm_loadu_ps((float*)aPtr);
182 
183  yl = _mm_moveldup_ps(phase_Val);
184  yh = _mm_movehdup_ps(phase_Val);
185  ylp = _mm_moveldup_ps(inc_Val);
186  yhp = _mm_movehdup_ps(inc_Val);
187 
188  tmp1 = _mm_mul_ps(aVal, yl);
189  tmp1p = _mm_mul_ps(phase_Val, ylp);
190 
191  aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
192  phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
193  tmp2 = _mm_mul_ps(aVal, yh);
194  tmp2p = _mm_mul_ps(phase_Val, yhp);
195 
196  z = _mm_addsub_ps(tmp1, tmp2);
197  phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
198 
199  _mm_storeu_ps((float*)cPtr, z);
200 
201  aPtr += 2;
202  cPtr += 2;
203  }
204  tmp1 = _mm_mul_ps(phase_Val, phase_Val);
205  tmp2 = _mm_hadd_ps(tmp1, tmp1);
206  tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
207  tmp2 = _mm_sqrt_ps(tmp1);
208  phase_Val = _mm_div_ps(phase_Val, tmp2);
209  }
210  for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) {
211  aVal = _mm_loadu_ps((float*)aPtr);
212 
213  yl = _mm_moveldup_ps(phase_Val);
214  yh = _mm_movehdup_ps(phase_Val);
215  ylp = _mm_moveldup_ps(inc_Val);
216  yhp = _mm_movehdup_ps(inc_Val);
217 
218  tmp1 = _mm_mul_ps(aVal, yl);
219 
220  tmp1p = _mm_mul_ps(phase_Val, ylp);
221 
222  aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
223  phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
224  tmp2 = _mm_mul_ps(aVal, yh);
225  tmp2p = _mm_mul_ps(phase_Val, yhp);
226 
227  z = _mm_addsub_ps(tmp1, tmp2);
228  phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
229 
230  _mm_storeu_ps((float*)cPtr, z);
231 
232  aPtr += 2;
233  cPtr += 2;
234  }
235 
236  _mm_storeu_ps((float*)phase_Ptr, phase_Val);
237  for(i = 0; i < num_points%2; ++i) {
238  *cPtr++ = *aPtr++ * phase_Ptr[0];
239  phase_Ptr[0] *= (phase_inc);
240  }
241 
242  (*phase) = phase_Ptr[0];
243 
244 }
245 
246 #endif /* LV_HAVE_SSE4_1 */
247 
248 
249 #ifdef LV_HAVE_AVX
250 #include <immintrin.h>
251 
252 /*!
253  \brief rotate input vector at fixed rate per sample from initial phase offset
254  \param outVector The vector where the results will be stored
255  \param inVector Vector to be rotated
256  \param phase_inc rotational velocity
257  \param phase initial phase offset
258  \param num_points The number of values in inVector to be rotated and stored into cVector
259 */
260 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
261  lv_32fc_t* cPtr = outVector;
262  const lv_32fc_t* aPtr = inVector;
263  lv_32fc_t incr = 1;
264  lv_32fc_t phase_Ptr[4] = {(*phase), (*phase), (*phase), (*phase)};
265 
266  unsigned int i, j = 0;
267 
268  for(i = 0; i < 4; ++i) {
269  phase_Ptr[i] *= incr;
270  incr *= (phase_inc);
271  }
272 
273  /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
274  printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
275  printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
276  printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
277  printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
278  __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
279 
280  phase_Val = _mm256_loadu_ps((float*)phase_Ptr);
281  inc_Val = _mm256_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
282  const unsigned int fourthPoints = num_points / 4;
283 
284 
285  for(i = 0; i < (unsigned int)(fourthPoints/ROTATOR_RELOAD); i++) {
286  for(j = 0; j < ROTATOR_RELOAD; ++j) {
287 
288  aVal = _mm256_load_ps((float*)aPtr);
289 
290  yl = _mm256_moveldup_ps(phase_Val);
291  yh = _mm256_movehdup_ps(phase_Val);
292  ylp = _mm256_moveldup_ps(inc_Val);
293  yhp = _mm256_movehdup_ps(inc_Val);
294 
295  tmp1 = _mm256_mul_ps(aVal, yl);
296  tmp1p = _mm256_mul_ps(phase_Val, ylp);
297 
298  aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
299  phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
300  tmp2 = _mm256_mul_ps(aVal, yh);
301  tmp2p = _mm256_mul_ps(phase_Val, yhp);
302 
303  z = _mm256_addsub_ps(tmp1, tmp2);
304  phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
305 
306  _mm256_store_ps((float*)cPtr, z);
307 
308  aPtr += 4;
309  cPtr += 4;
310  }
311  tmp1 = _mm256_mul_ps(phase_Val, phase_Val);
312  tmp2 = _mm256_hadd_ps(tmp1, tmp1);
313  tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
314  tmp2 = _mm256_sqrt_ps(tmp1);
315  phase_Val = _mm256_div_ps(phase_Val, tmp2);
316  }
317  for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) {
318  aVal = _mm256_load_ps((float*)aPtr);
319 
320  yl = _mm256_moveldup_ps(phase_Val);
321  yh = _mm256_movehdup_ps(phase_Val);
322  ylp = _mm256_moveldup_ps(inc_Val);
323  yhp = _mm256_movehdup_ps(inc_Val);
324 
325  tmp1 = _mm256_mul_ps(aVal, yl);
326 
327  tmp1p = _mm256_mul_ps(phase_Val, ylp);
328 
329  aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
330  phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
331  tmp2 = _mm256_mul_ps(aVal, yh);
332  tmp2p = _mm256_mul_ps(phase_Val, yhp);
333 
334  z = _mm256_addsub_ps(tmp1, tmp2);
335  phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
336 
337  _mm256_store_ps((float*)cPtr, z);
338 
339  aPtr += 4;
340  cPtr += 4;
341  }
342 
343  _mm256_storeu_ps((float*)phase_Ptr, phase_Val);
344  for(i = 0; i < num_points%4; ++i) {
345  *cPtr++ = *aPtr++ * phase_Ptr[0];
346  phase_Ptr[0] *= (phase_inc);
347  }
348 
349  (*phase) = phase_Ptr[0];
350 
351 }
352 
353 #endif /* LV_HAVE_AVX for aligned */
354 
355 
356 #ifdef LV_HAVE_AVX
357 #include <immintrin.h>
358 
359 /*!
360  \brief rotate input vector at fixed rate per sample from initial phase offset
361  \param outVector The vector where the results will be stored
362  \param inVector Vector to be rotated
363  \param phase_inc rotational velocity
364  \param phase initial phase offset
365  \param num_points The number of values in inVector to be rotated and stored into cVector
366 */
367 static inline void volk_32fc_s32fc_x2_rotator_32fc_u_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
368  lv_32fc_t* cPtr = outVector;
369  const lv_32fc_t* aPtr = inVector;
370  lv_32fc_t incr = 1;
371  lv_32fc_t phase_Ptr[4] = {(*phase), (*phase), (*phase), (*phase)};
372 
373  unsigned int i, j = 0;
374 
375  for(i = 0; i < 4; ++i) {
376  phase_Ptr[i] *= incr;
377  incr *= (phase_inc);
378  }
379 
380  /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
381  printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
382  printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
383  printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
384  printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
385  __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
386 
387  phase_Val = _mm256_loadu_ps((float*)phase_Ptr);
388  inc_Val = _mm256_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
389  const unsigned int fourthPoints = num_points / 4;
390 
391 
392  for(i = 0; i < (unsigned int)(fourthPoints/ROTATOR_RELOAD); i++) {
393  for(j = 0; j < ROTATOR_RELOAD; ++j) {
394 
395  aVal = _mm256_loadu_ps((float*)aPtr);
396 
397  yl = _mm256_moveldup_ps(phase_Val);
398  yh = _mm256_movehdup_ps(phase_Val);
399  ylp = _mm256_moveldup_ps(inc_Val);
400  yhp = _mm256_movehdup_ps(inc_Val);
401 
402  tmp1 = _mm256_mul_ps(aVal, yl);
403  tmp1p = _mm256_mul_ps(phase_Val, ylp);
404 
405  aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
406  phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
407  tmp2 = _mm256_mul_ps(aVal, yh);
408  tmp2p = _mm256_mul_ps(phase_Val, yhp);
409 
410  z = _mm256_addsub_ps(tmp1, tmp2);
411  phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
412 
413  _mm256_storeu_ps((float*)cPtr, z);
414 
415  aPtr += 4;
416  cPtr += 4;
417  }
418  tmp1 = _mm256_mul_ps(phase_Val, phase_Val);
419  tmp2 = _mm256_hadd_ps(tmp1, tmp1);
420  tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
421  tmp2 = _mm256_sqrt_ps(tmp1);
422  phase_Val = _mm256_div_ps(phase_Val, tmp2);
423  }
424  for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) {
425  aVal = _mm256_loadu_ps((float*)aPtr);
426 
427  yl = _mm256_moveldup_ps(phase_Val);
428  yh = _mm256_movehdup_ps(phase_Val);
429  ylp = _mm256_moveldup_ps(inc_Val);
430  yhp = _mm256_movehdup_ps(inc_Val);
431 
432  tmp1 = _mm256_mul_ps(aVal, yl);
433 
434  tmp1p = _mm256_mul_ps(phase_Val, ylp);
435 
436  aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
437  phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
438  tmp2 = _mm256_mul_ps(aVal, yh);
439  tmp2p = _mm256_mul_ps(phase_Val, yhp);
440 
441  z = _mm256_addsub_ps(tmp1, tmp2);
442  phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
443 
444  _mm256_storeu_ps((float*)cPtr, z);
445 
446  aPtr += 4;
447  cPtr += 4;
448  }
449 
450  _mm256_storeu_ps((float*)phase_Ptr, phase_Val);
451  for(i = 0; i < num_points%4; ++i) {
452  *cPtr++ = *aPtr++ * phase_Ptr[0];
453  phase_Ptr[0] *= (phase_inc);
454  }
455 
456  (*phase) = phase_Ptr[0];
457 
458 }
459 
460 #endif /* LV_HAVE_AVX */
461 
462 #endif /* INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H */
float complex lv_32fc_t
Definition: volk_complex.h:56
#define lv_creal(x)
Definition: volk_complex.h:76
#define ROTATOR_RELOAD
Definition: volk_32fc_s32fc_x2_rotator_32fc.h:8
#define lv_cimag(x)
Definition: volk_complex.h:78
uint32_t i[4]
Definition: volk_common.h:80