GNU Radio Manual and C++ API Reference  3.7.5.1
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
volk_32f_x2_add_32f.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_x2_add_32f_u_H
2 #define INCLUDED_volk_32f_x2_add_32f_u_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 
7 #ifdef LV_HAVE_SSE
8 #include <xmmintrin.h>
9 /*!
10  \brief Adds the two input vectors and store their results in the third vector
11  \param cVector The vector where the results will be stored
12  \param aVector One of the vectors to be added
13  \param bVector One of the vectors to be added
14  \param num_points The number of values in aVector and bVector to be added together and stored into cVector
15 */
16 static inline void volk_32f_x2_add_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
17  unsigned int number = 0;
18  const unsigned int quarterPoints = num_points / 4;
19 
20  float* cPtr = cVector;
21  const float* aPtr = aVector;
22  const float* bPtr= bVector;
23 
24  __m128 aVal, bVal, cVal;
25  for(;number < quarterPoints; number++){
26 
27  aVal = _mm_loadu_ps(aPtr);
28  bVal = _mm_loadu_ps(bPtr);
29 
30  cVal = _mm_add_ps(aVal, bVal);
31 
32  _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
33 
34  aPtr += 4;
35  bPtr += 4;
36  cPtr += 4;
37  }
38 
39  number = quarterPoints * 4;
40  for(;number < num_points; number++){
41  *cPtr++ = (*aPtr++) + (*bPtr++);
42  }
43 }
44 #endif /* LV_HAVE_SSE */
45 
46 #ifdef LV_HAVE_GENERIC
47 /*!
48  \brief Adds the two input vectors and store their results in the third vector
49  \param cVector The vector where the results will be stored
50  \param aVector One of the vectors to be added
51  \param bVector One of the vectors to be added
52  \param num_points The number of values in aVector and bVector to be added together and stored into cVector
53 */
54 static inline void volk_32f_x2_add_32f_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
55  float* cPtr = cVector;
56  const float* aPtr = aVector;
57  const float* bPtr= bVector;
58  unsigned int number = 0;
59 
60  for(number = 0; number < num_points; number++){
61  *cPtr++ = (*aPtr++) + (*bPtr++);
62  }
63 }
64 #endif /* LV_HAVE_GENERIC */
65 
66 #endif /* INCLUDED_volk_32f_x2_add_32f_u_H */
67 #ifndef INCLUDED_volk_32f_x2_add_32f_a_H
68 #define INCLUDED_volk_32f_x2_add_32f_a_H
69 
70 #include <inttypes.h>
71 #include <stdio.h>
72 
73 #ifdef LV_HAVE_SSE
74 #include <xmmintrin.h>
75 /*!
76  \brief Adds the two input vectors and store their results in the third vector
77  \param cVector The vector where the results will be stored
78  \param aVector One of the vectors to be added
79  \param bVector One of the vectors to be added
80  \param num_points The number of values in aVector and bVector to be added together and stored into cVector
81 */
82 static inline void volk_32f_x2_add_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
83  unsigned int number = 0;
84  const unsigned int quarterPoints = num_points / 4;
85 
86  float* cPtr = cVector;
87  const float* aPtr = aVector;
88  const float* bPtr= bVector;
89 
90  __m128 aVal, bVal, cVal;
91  for(;number < quarterPoints; number++){
92 
93  aVal = _mm_load_ps(aPtr);
94  bVal = _mm_load_ps(bPtr);
95 
96  cVal = _mm_add_ps(aVal, bVal);
97 
98  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
99 
100  aPtr += 4;
101  bPtr += 4;
102  cPtr += 4;
103  }
104 
105  number = quarterPoints * 4;
106  for(;number < num_points; number++){
107  *cPtr++ = (*aPtr++) + (*bPtr++);
108  }
109 }
110 #endif /* LV_HAVE_SSE */
111 
112 #ifdef LV_HAVE_NEON
113 #include <arm_neon.h>
114 /*
115  \brief Adds the two input vectors and store their results in the third vector
116  \param cVector The vector where the results will be stored
117  \param aVector One of the vectors to be added
118  \param bVector One of the vectors to be added
119  \param num_points The number of values in aVector and bVector to be added together and stored into cVector
120 */
121 static inline void volk_32f_x2_add_32f_u_neon(float* cVector, const float* aVector, const float* bVector, unsigned int num_points) {
122  unsigned int number = 0;
123  const unsigned int quarterPoints = num_points / 4;
124 
125  float* cPtr = cVector;
126  const float* aPtr = aVector;
127  const float* bPtr= bVector;
128  float32x4_t aVal, bVal, cVal;
129  for(number=0; number < quarterPoints; number++){
130  // Load in to NEON registers
131  aVal = vld1q_f32(aPtr);
132  bVal = vld1q_f32(bPtr);
133  __builtin_prefetch(aPtr+4);
134  __builtin_prefetch(bPtr+4);
135 
136  // vector add
137  cVal = vaddq_f32(aVal, bVal);
138  // Store the results back into the C container
139  vst1q_f32(cPtr,cVal);
140 
141  aPtr += 4; // q uses quadwords, 4 floats per vadd
142  bPtr += 4;
143  cPtr += 4;
144  }
145 
146  number = quarterPoints * 4; // should be = num_points
147  for(;number < num_points; number++){
148  *cPtr++ = (*aPtr++) + (*bPtr++);
149  }
150 
151 }
152 
153 #endif /* LV_HAVE_NEON */
154 
155 #ifdef LV_HAVE_GENERIC
156 /*!
157  \brief Adds the two input vectors and store their results in the third vector
158  \param cVector The vector where the results will be stored
159  \param aVector One of the vectors to be added
160  \param bVector One of the vectors to be added
161  \param num_points The number of values in aVector and bVector to be added together and stored into cVector
162 */
163 static inline void volk_32f_x2_add_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
164  float* cPtr = cVector;
165  const float* aPtr = aVector;
166  const float* bPtr= bVector;
167  unsigned int number = 0;
168 
169  for(number = 0; number < num_points; number++){
170  *cPtr++ = (*aPtr++) + (*bPtr++);
171  }
172 }
173 #endif /* LV_HAVE_GENERIC */
174 
175 #ifdef LV_HAVE_ORC
176 /*!
177  \brief Adds the two input vectors and store their results in the third vector
178  \param cVector The vector where the results will be stored
179  \param aVector One of the vectors to be added
180  \param bVector One of the vectors to be added
181  \param num_points The number of values in aVector and bVector to be added together and stored into cVector
182 */
183 extern void volk_32f_x2_add_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
184 static inline void volk_32f_x2_add_32f_u_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
185  volk_32f_x2_add_32f_a_orc_impl(cVector, aVector, bVector, num_points);
186 }
187 #endif /* LV_HAVE_ORC */
188 
189 
190 #endif /* INCLUDED_volk_32f_x2_add_32f_a_H */