GNU Radio Manual and C++ API Reference  3.7.5.1
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
volk_32f_x2_multiply_32f.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_x2_multiply_32f_u_H
2 #define INCLUDED_volk_32f_x2_multiply_32f_u_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 
7 #ifdef LV_HAVE_SSE
8 #include <xmmintrin.h>
9 /*!
10  \brief Multiplys the two input vectors and store their results in the third vector
11  \param cVector The vector where the results will be stored
12  \param aVector One of the vectors to be multiplied
13  \param bVector One of the vectors to be multiplied
14  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
15 */
16 static inline void volk_32f_x2_multiply_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
17  unsigned int number = 0;
18  const unsigned int quarterPoints = num_points / 4;
19 
20  float* cPtr = cVector;
21  const float* aPtr = aVector;
22  const float* bPtr= bVector;
23 
24  __m128 aVal, bVal, cVal;
25  for(;number < quarterPoints; number++){
26 
27  aVal = _mm_loadu_ps(aPtr);
28  bVal = _mm_loadu_ps(bPtr);
29 
30  cVal = _mm_mul_ps(aVal, bVal);
31 
32  _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
33 
34  aPtr += 4;
35  bPtr += 4;
36  cPtr += 4;
37  }
38 
39  number = quarterPoints * 4;
40  for(;number < num_points; number++){
41  *cPtr++ = (*aPtr++) * (*bPtr++);
42  }
43 }
44 #endif /* LV_HAVE_SSE */
45 
46 #ifdef LV_HAVE_AVX
47 #include <immintrin.h>
48 /*!
49  \brief Multiplies the two input vectors and store their results in the third vector
50  \param cVector The vector where the results will be stored
51  \param aVector One of the vectors to be multiplied
52  \param bVector One of the vectors to be multiplied
53  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
54 */
55 static inline void volk_32f_x2_multiply_32f_u_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
56  unsigned int number = 0;
57  const unsigned int eighthPoints = num_points / 8;
58 
59  float* cPtr = cVector;
60  const float* aPtr = aVector;
61  const float* bPtr= bVector;
62 
63  __m256 aVal, bVal, cVal;
64  for(;number < eighthPoints; number++){
65 
66  aVal = _mm256_loadu_ps(aPtr);
67  bVal = _mm256_loadu_ps(bPtr);
68 
69  cVal = _mm256_mul_ps(aVal, bVal);
70 
71  _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
72 
73  aPtr += 8;
74  bPtr += 8;
75  cPtr += 8;
76  }
77 
78  number = eighthPoints * 8;
79  for(;number < num_points; number++){
80  *cPtr++ = (*aPtr++) * (*bPtr++);
81  }
82 }
83 #endif /* LV_HAVE_AVX */
84 
85 #ifdef LV_HAVE_GENERIC
86 /*!
87  \brief Multiplys the two input vectors and store their results in the third vector
88  \param cVector The vector where the results will be stored
89  \param aVector One of the vectors to be multiplied
90  \param bVector One of the vectors to be multiplied
91  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
92 */
93 static inline void volk_32f_x2_multiply_32f_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
94  float* cPtr = cVector;
95  const float* aPtr = aVector;
96  const float* bPtr= bVector;
97  unsigned int number = 0;
98 
99  for(number = 0; number < num_points; number++){
100  *cPtr++ = (*aPtr++) * (*bPtr++);
101  }
102 }
103 #endif /* LV_HAVE_GENERIC */
104 
105 
106 #endif /* INCLUDED_volk_32f_x2_multiply_32f_u_H */
107 #ifndef INCLUDED_volk_32f_x2_multiply_32f_a_H
108 #define INCLUDED_volk_32f_x2_multiply_32f_a_H
109 
110 #include <inttypes.h>
111 #include <stdio.h>
112 
113 #ifdef LV_HAVE_SSE
114 #include <xmmintrin.h>
115 /*!
116  \brief Multiplys the two input vectors and store their results in the third vector
117  \param cVector The vector where the results will be stored
118  \param aVector One of the vectors to be multiplied
119  \param bVector One of the vectors to be multiplied
120  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
121 */
122 static inline void volk_32f_x2_multiply_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
123  unsigned int number = 0;
124  const unsigned int quarterPoints = num_points / 4;
125 
126  float* cPtr = cVector;
127  const float* aPtr = aVector;
128  const float* bPtr= bVector;
129 
130  __m128 aVal, bVal, cVal;
131  for(;number < quarterPoints; number++){
132 
133  aVal = _mm_load_ps(aPtr);
134  bVal = _mm_load_ps(bPtr);
135 
136  cVal = _mm_mul_ps(aVal, bVal);
137 
138  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
139 
140  aPtr += 4;
141  bPtr += 4;
142  cPtr += 4;
143  }
144 
145  number = quarterPoints * 4;
146  for(;number < num_points; number++){
147  *cPtr++ = (*aPtr++) * (*bPtr++);
148  }
149 }
150 #endif /* LV_HAVE_SSE */
151 
152 #ifdef LV_HAVE_AVX
153 #include <immintrin.h>
154 /*!
155  \brief Multiplies the two input vectors and store their results in the third vector
156  \param cVector The vector where the results will be stored
157  \param aVector One of the vectors to be multiplied
158  \param bVector One of the vectors to be multiplied
159  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
160 */
161 static inline void volk_32f_x2_multiply_32f_a_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
162  unsigned int number = 0;
163  const unsigned int eighthPoints = num_points / 8;
164 
165  float* cPtr = cVector;
166  const float* aPtr = aVector;
167  const float* bPtr= bVector;
168 
169  __m256 aVal, bVal, cVal;
170  for(;number < eighthPoints; number++){
171 
172  aVal = _mm256_load_ps(aPtr);
173  bVal = _mm256_load_ps(bPtr);
174 
175  cVal = _mm256_mul_ps(aVal, bVal);
176 
177  _mm256_store_ps(cPtr,cVal); // Store the results back into the C container
178 
179  aPtr += 8;
180  bPtr += 8;
181  cPtr += 8;
182  }
183 
184  number = eighthPoints * 8;
185  for(;number < num_points; number++){
186  *cPtr++ = (*aPtr++) * (*bPtr++);
187  }
188 }
189 #endif /* LV_HAVE_AVX */
190 
191 #ifdef LV_HAVE_NEON
192 #include <arm_neon.h>
193 
194 /*!
195  \brief Multiplys the two input vectors and store their results in the third vector
196  \param cVector The vector where the results will be stored
197  \param aVector One of the vectors to be multiplied
198  \param bVector One of the vectors to be multiplied
199  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
200 */
201 static inline void volk_32f_x2_multiply_32f_neon(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
202  const unsigned int quarter_points = num_points / 4;
203  unsigned int number;
204  float32x4_t avec, bvec, cvec;
205  for(number=0; number < quarter_points; ++number) {
206  avec = vld1q_f32(aVector);
207  bvec = vld1q_f32(bVector);
208  cvec = vmulq_f32(avec, bvec);
209  vst1q_f32(cVector, cvec);
210  aVector += 4;
211  bVector += 4;
212  cVector += 4;
213  }
214  for(number=quarter_points*4; number < num_points; ++number) {
215  *cVector++ = *aVector++ * *bVector++;
216  }
217 }
218 #endif /* LV_HAVE_NEON */
219 
220 #ifdef LV_HAVE_GENERIC
221 /*!
222  \brief Multiplys the two input vectors and store their results in the third vector
223  \param cVector The vector where the results will be stored
224  \param aVector One of the vectors to be multiplied
225  \param bVector One of the vectors to be multiplied
226  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
227 */
228 static inline void volk_32f_x2_multiply_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
229  float* cPtr = cVector;
230  const float* aPtr = aVector;
231  const float* bPtr= bVector;
232  unsigned int number = 0;
233 
234  for(number = 0; number < num_points; number++){
235  *cPtr++ = (*aPtr++) * (*bPtr++);
236  }
237 }
238 #endif /* LV_HAVE_GENERIC */
239 
240 #ifdef LV_HAVE_ORC
241 /*!
242  \brief Multiplys the two input vectors and store their results in the third vector
243  \param cVector The vector where the results will be stored
244  \param aVector One of the vectors to be multiplied
245  \param bVector One of the vectors to be multiplied
246  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
247 */
248 extern void volk_32f_x2_multiply_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
249 static inline void volk_32f_x2_multiply_32f_u_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
250  volk_32f_x2_multiply_32f_a_orc_impl(cVector, aVector, bVector, num_points);
251 }
252 #endif /* LV_HAVE_ORC */
253 
254 
255 #endif /* INCLUDED_volk_32f_x2_multiply_32f_a_H */