GNU Radio 3.7.2 C++ API
volk_32fc_s32fc_x2_rotator_32fc.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
2 #define INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
3 
4 
5 #include <volk/volk_complex.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #define ROTATOR_RELOAD 512
9 
10 
11 #ifdef LV_HAVE_GENERIC
12 
13 /*!
14  \brief rotate input vector at fixed rate per sample from initial phase offset
15  \param outVector The vector where the results will be stored
16  \param inVector Vector to be rotated
17  \param phase_inc rotational velocity
18  \param phase initial phase offset
19  \param num_points The number of values in inVector to be rotated and stored into cVector
20 */
21 static inline void volk_32fc_s32fc_x2_rotator_32fc_generic(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
22  unsigned int i = 0;
23  int j = 0;
24  for(i = 0; i < (unsigned int)(num_points/ROTATOR_RELOAD); ++i) {
25  for(j = 0; j < ROTATOR_RELOAD; ++j) {
26  *outVector++ = *inVector++ * (*phase);
27  (*phase) *= phase_inc;
28  }
29  (*phase) /= abs((*phase));
30  }
31  for(i = 0; i < num_points%ROTATOR_RELOAD; ++i) {
32  *outVector++ = *inVector++ * (*phase);
33  (*phase) *= phase_inc;
34  }
35 
36 }
37 #endif /* LV_HAVE_GENERIC */
38 
39 
40 #ifdef LV_HAVE_SSE4_1
41 #include <smmintrin.h>
42 
43 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
44  lv_32fc_t* cPtr = outVector;
45  const lv_32fc_t* aPtr = inVector;
46  lv_32fc_t incr = 1;
47  lv_32fc_t phase_Ptr[2] = {(*phase), (*phase)};
48 
49  unsigned int i, j = 0;
50 
51  for(i = 0; i < 2; ++i) {
52  phase_Ptr[i] *= incr;
53  incr *= (phase_inc);
54  }
55 
56  /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
57  printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
58  printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
59  printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
60  printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
61  __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
62 
63  phase_Val = _mm_loadu_ps((float*)phase_Ptr);
64  inc_Val = _mm_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
65 
66  const unsigned int halfPoints = num_points / 2;
67 
68 
69  for(i = 0; i < (unsigned int)(halfPoints/ROTATOR_RELOAD); i++) {
70  for(j = 0; j < ROTATOR_RELOAD; ++j) {
71 
72  aVal = _mm_load_ps((float*)aPtr);
73 
74  yl = _mm_moveldup_ps(phase_Val);
75  yh = _mm_movehdup_ps(phase_Val);
76  ylp = _mm_moveldup_ps(inc_Val);
77  yhp = _mm_movehdup_ps(inc_Val);
78 
79  tmp1 = _mm_mul_ps(aVal, yl);
80  tmp1p = _mm_mul_ps(phase_Val, ylp);
81 
82  aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
83  phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
84  tmp2 = _mm_mul_ps(aVal, yh);
85  tmp2p = _mm_mul_ps(phase_Val, yhp);
86 
87  z = _mm_addsub_ps(tmp1, tmp2);
88  phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
89 
90  _mm_store_ps((float*)cPtr, z);
91 
92  aPtr += 2;
93  cPtr += 2;
94  }
95  tmp1 = _mm_mul_ps(phase_Val, phase_Val);
96  tmp2 = _mm_hadd_ps(tmp1, tmp1);
97  tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
98  phase_Val = _mm_div_ps(phase_Val, tmp1);
99  }
100  for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) {
101  aVal = _mm_load_ps((float*)aPtr);
102 
103  yl = _mm_moveldup_ps(phase_Val);
104  yh = _mm_movehdup_ps(phase_Val);
105  ylp = _mm_moveldup_ps(inc_Val);
106  yhp = _mm_movehdup_ps(inc_Val);
107 
108  tmp1 = _mm_mul_ps(aVal, yl);
109 
110  tmp1p = _mm_mul_ps(phase_Val, ylp);
111 
112  aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
113  phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
114  tmp2 = _mm_mul_ps(aVal, yh);
115  tmp2p = _mm_mul_ps(phase_Val, yhp);
116 
117  z = _mm_addsub_ps(tmp1, tmp2);
118  phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
119 
120  _mm_store_ps((float*)cPtr, z);
121 
122  aPtr += 2;
123  cPtr += 2;
124  }
125 
126  _mm_storeu_ps((float*)phase_Ptr, phase_Val);
127  for(i = 0; i < num_points%2; ++i) {
128  *cPtr++ = *aPtr++ * phase_Ptr[0];
129  phase_Ptr[0] *= (phase_inc);
130  }
131 
132  (*phase) = phase_Ptr[0];
133 
134 }
135 
136 
137 /*!
138  \brief rotate input vector at fixed rate per sample from initial phase offset
139  \param outVector The vector where the results will be stored
140  \param inVector Vector to be rotated
141  \param phase_inc rotational velocity
142  \param phase initial phase offset
143  \param num_points The number of values in inVector to be rotated and stored into cVector
144 */
145 static inline void volk_32fc_s32fc_x2_rotator_32fc_u_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
146  lv_32fc_t* cPtr = outVector;
147  const lv_32fc_t* aPtr = inVector;
148  lv_32fc_t incr = 1;
149  lv_32fc_t phase_Ptr[2] = {(*phase), (*phase)};
150 
151  unsigned int i, j = 0;
152 
153  for(i = 0; i < 2; ++i) {
154  phase_Ptr[i] *= incr;
155  incr *= (phase_inc);
156  }
157 
158  /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
159  printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
160  printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
161  printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
162  printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
163  __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
164 
165  phase_Val = _mm_loadu_ps((float*)phase_Ptr);
166  inc_Val = _mm_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
167 
168  const unsigned int halfPoints = num_points / 2;
169 
170 
171  for(i = 0; i < (unsigned int)(halfPoints/ROTATOR_RELOAD); i++) {
172  for(j = 0; j < ROTATOR_RELOAD; ++j) {
173 
174  aVal = _mm_loadu_ps((float*)aPtr);
175 
176  yl = _mm_moveldup_ps(phase_Val);
177  yh = _mm_movehdup_ps(phase_Val);
178  ylp = _mm_moveldup_ps(inc_Val);
179  yhp = _mm_movehdup_ps(inc_Val);
180 
181  tmp1 = _mm_mul_ps(aVal, yl);
182  tmp1p = _mm_mul_ps(phase_Val, ylp);
183 
184  aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
185  phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
186  tmp2 = _mm_mul_ps(aVal, yh);
187  tmp2p = _mm_mul_ps(phase_Val, yhp);
188 
189  z = _mm_addsub_ps(tmp1, tmp2);
190  phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
191 
192  _mm_storeu_ps((float*)cPtr, z);
193 
194  aPtr += 2;
195  cPtr += 2;
196  }
197  tmp1 = _mm_mul_ps(phase_Val, phase_Val);
198  tmp2 = _mm_hadd_ps(tmp1, tmp1);
199  tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
200  phase_Val = _mm_div_ps(phase_Val, tmp1);
201  }
202  for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) {
203  aVal = _mm_loadu_ps((float*)aPtr);
204 
205  yl = _mm_moveldup_ps(phase_Val);
206  yh = _mm_movehdup_ps(phase_Val);
207  ylp = _mm_moveldup_ps(inc_Val);
208  yhp = _mm_movehdup_ps(inc_Val);
209 
210  tmp1 = _mm_mul_ps(aVal, yl);
211 
212  tmp1p = _mm_mul_ps(phase_Val, ylp);
213 
214  aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
215  phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
216  tmp2 = _mm_mul_ps(aVal, yh);
217  tmp2p = _mm_mul_ps(phase_Val, yhp);
218 
219  z = _mm_addsub_ps(tmp1, tmp2);
220  phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
221 
222  _mm_storeu_ps((float*)cPtr, z);
223 
224  aPtr += 2;
225  cPtr += 2;
226  }
227 
228  _mm_storeu_ps((float*)phase_Ptr, phase_Val);
229  for(i = 0; i < num_points%2; ++i) {
230  *cPtr++ = *aPtr++ * phase_Ptr[0];
231  phase_Ptr[0] *= (phase_inc);
232  }
233 
234  (*phase) = phase_Ptr[0];
235 
236 }
237 
238 #endif /* LV_HAVE_SSE4_1 */
239 
240 
241 #ifdef LV_HAVE_AVX
242 #include <immintrin.h>
243 
244 /*!
245  \brief rotate input vector at fixed rate per sample from initial phase offset
246  \param outVector The vector where the results will be stored
247  \param inVector Vector to be rotated
248  \param phase_inc rotational velocity
249  \param phase initial phase offset
250  \param num_points The number of values in inVector to be rotated and stored into cVector
251 */
252 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
253  lv_32fc_t* cPtr = outVector;
254  const lv_32fc_t* aPtr = inVector;
255  lv_32fc_t incr = 1;
256  lv_32fc_t phase_Ptr[4] = {(*phase), (*phase), (*phase), (*phase)};
257 
258  unsigned int i, j = 0;
259 
260  for(i = 0; i < 4; ++i) {
261  phase_Ptr[i] *= incr;
262  incr *= (phase_inc);
263  }
264 
265  /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
266  printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
267  printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
268  printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
269  printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
270  __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
271 
272  phase_Val = _mm256_loadu_ps((float*)phase_Ptr);
273  inc_Val = _mm256_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
274  const unsigned int fourthPoints = num_points / 4;
275 
276 
277  for(i = 0; i < (unsigned int)(fourthPoints/ROTATOR_RELOAD); i++) {
278  for(j = 0; j < ROTATOR_RELOAD; ++j) {
279 
280  aVal = _mm256_load_ps((float*)aPtr);
281 
282  yl = _mm256_moveldup_ps(phase_Val);
283  yh = _mm256_movehdup_ps(phase_Val);
284  ylp = _mm256_moveldup_ps(inc_Val);
285  yhp = _mm256_movehdup_ps(inc_Val);
286 
287  tmp1 = _mm256_mul_ps(aVal, yl);
288  tmp1p = _mm256_mul_ps(phase_Val, ylp);
289 
290  aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
291  phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
292  tmp2 = _mm256_mul_ps(aVal, yh);
293  tmp2p = _mm256_mul_ps(phase_Val, yhp);
294 
295  z = _mm256_addsub_ps(tmp1, tmp2);
296  phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
297 
298  _mm256_store_ps((float*)cPtr, z);
299 
300  aPtr += 4;
301  cPtr += 4;
302  }
303  tmp1 = _mm256_mul_ps(phase_Val, phase_Val);
304  tmp2 = _mm256_hadd_ps(tmp1, tmp1);
305  tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
306  phase_Val = _mm256_div_ps(phase_Val, tmp1);
307  }
308  for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) {
309  aVal = _mm256_load_ps((float*)aPtr);
310 
311  yl = _mm256_moveldup_ps(phase_Val);
312  yh = _mm256_movehdup_ps(phase_Val);
313  ylp = _mm256_moveldup_ps(inc_Val);
314  yhp = _mm256_movehdup_ps(inc_Val);
315 
316  tmp1 = _mm256_mul_ps(aVal, yl);
317 
318  tmp1p = _mm256_mul_ps(phase_Val, ylp);
319 
320  aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
321  phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
322  tmp2 = _mm256_mul_ps(aVal, yh);
323  tmp2p = _mm256_mul_ps(phase_Val, yhp);
324 
325  z = _mm256_addsub_ps(tmp1, tmp2);
326  phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
327 
328  _mm256_store_ps((float*)cPtr, z);
329 
330  aPtr += 4;
331  cPtr += 4;
332  }
333 
334  _mm256_storeu_ps((float*)phase_Ptr, phase_Val);
335  for(i = 0; i < num_points%4; ++i) {
336  *cPtr++ = *aPtr++ * phase_Ptr[0];
337  phase_Ptr[0] *= (phase_inc);
338  }
339 
340  (*phase) = phase_Ptr[0];
341 
342 }
343 
344 
345 
346 
347 /*!
348  \brief rotate input vector at fixed rate per sample from initial phase offset
349  \param outVector The vector where the results will be stored
350  \param inVector Vector to be rotated
351  \param phase_inc rotational velocity
352  \param phase initial phase offset
353  \param num_points The number of values in inVector to be rotated and stored into cVector
354 */
355 static inline void volk_32fc_s32fc_x2_rotator_32fc_u_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
356  lv_32fc_t* cPtr = outVector;
357  const lv_32fc_t* aPtr = inVector;
358  lv_32fc_t incr = 1;
359  lv_32fc_t phase_Ptr[4] = {(*phase), (*phase), (*phase), (*phase)};
360 
361  unsigned int i, j = 0;
362 
363  for(i = 0; i < 4; ++i) {
364  phase_Ptr[i] *= incr;
365  incr *= (phase_inc);
366  }
367 
368  /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
369  printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
370  printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
371  printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
372  printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
373  __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
374 
375  phase_Val = _mm256_loadu_ps((float*)phase_Ptr);
376  inc_Val = _mm256_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
377  const unsigned int fourthPoints = num_points / 4;
378 
379 
380  for(i = 0; i < (unsigned int)(fourthPoints/ROTATOR_RELOAD); i++) {
381  for(j = 0; j < ROTATOR_RELOAD; ++j) {
382 
383  aVal = _mm256_loadu_ps((float*)aPtr);
384 
385  yl = _mm256_moveldup_ps(phase_Val);
386  yh = _mm256_movehdup_ps(phase_Val);
387  ylp = _mm256_moveldup_ps(inc_Val);
388  yhp = _mm256_movehdup_ps(inc_Val);
389 
390  tmp1 = _mm256_mul_ps(aVal, yl);
391  tmp1p = _mm256_mul_ps(phase_Val, ylp);
392 
393  aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
394  phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
395  tmp2 = _mm256_mul_ps(aVal, yh);
396  tmp2p = _mm256_mul_ps(phase_Val, yhp);
397 
398  z = _mm256_addsub_ps(tmp1, tmp2);
399  phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
400 
401  _mm256_storeu_ps((float*)cPtr, z);
402 
403  aPtr += 4;
404  cPtr += 4;
405  }
406  tmp1 = _mm256_mul_ps(phase_Val, phase_Val);
407  tmp2 = _mm256_hadd_ps(tmp1, tmp1);
408  tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
409  phase_Val = _mm256_div_ps(phase_Val, tmp1);
410  }
411  for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) {
412  aVal = _mm256_loadu_ps((float*)aPtr);
413 
414  yl = _mm256_moveldup_ps(phase_Val);
415  yh = _mm256_movehdup_ps(phase_Val);
416  ylp = _mm256_moveldup_ps(inc_Val);
417  yhp = _mm256_movehdup_ps(inc_Val);
418 
419  tmp1 = _mm256_mul_ps(aVal, yl);
420 
421  tmp1p = _mm256_mul_ps(phase_Val, ylp);
422 
423  aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
424  phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
425  tmp2 = _mm256_mul_ps(aVal, yh);
426  tmp2p = _mm256_mul_ps(phase_Val, yhp);
427 
428  z = _mm256_addsub_ps(tmp1, tmp2);
429  phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
430 
431  _mm256_storeu_ps((float*)cPtr, z);
432 
433  aPtr += 4;
434  cPtr += 4;
435  }
436 
437  _mm256_storeu_ps((float*)phase_Ptr, phase_Val);
438  for(i = 0; i < num_points%4; ++i) {
439  *cPtr++ = *aPtr++ * phase_Ptr[0];
440  phase_Ptr[0] *= (phase_inc);
441  }
442 
443  (*phase) = phase_Ptr[0];
444 
445 }
446 
447 #endif /* LV_HAVE_AVX */
448 
449 #endif /* INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H */
float complex lv_32fc_t
Definition: volk_complex.h:56
#define lv_creal(x)
Definition: volk_complex.h:76
#define ROTATOR_RELOAD
Definition: volk_32fc_s32fc_x2_rotator_32fc.h:8
#define lv_cimag(x)
Definition: volk_complex.h:78
uint32_t i[4]
Definition: volk_common.h:80