GNU Radio Manual and C++ API Reference  3.7.4.1
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
volk_32f_x2_multiply_32f.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_x2_multiply_32f_u_H
2 #define INCLUDED_volk_32f_x2_multiply_32f_u_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 
7 #ifdef LV_HAVE_SSE
8 #include <xmmintrin.h>
9 /*!
10  \brief Multiplys the two input vectors and store their results in the third vector
11  \param cVector The vector where the results will be stored
12  \param aVector One of the vectors to be multiplied
13  \param bVector One of the vectors to be multiplied
14  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
15 */
16 static inline void volk_32f_x2_multiply_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
17  unsigned int number = 0;
18  const unsigned int quarterPoints = num_points / 4;
19 
20  float* cPtr = cVector;
21  const float* aPtr = aVector;
22  const float* bPtr= bVector;
23 
24  __m128 aVal, bVal, cVal;
25  for(;number < quarterPoints; number++){
26 
27  aVal = _mm_loadu_ps(aPtr);
28  bVal = _mm_loadu_ps(bPtr);
29 
30  cVal = _mm_mul_ps(aVal, bVal);
31 
32  _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
33 
34  aPtr += 4;
35  bPtr += 4;
36  cPtr += 4;
37  }
38 
39  number = quarterPoints * 4;
40  for(;number < num_points; number++){
41  *cPtr++ = (*aPtr++) * (*bPtr++);
42  }
43 }
44 #endif /* LV_HAVE_SSE */
45 
46 #ifdef LV_HAVE_AVX
47 #include <immintrin.h>
48 /*!
49  \brief Multiplies the two input vectors and store their results in the third vector
50  \param cVector The vector where the results will be stored
51  \param aVector One of the vectors to be multiplied
52  \param bVector One of the vectors to be multiplied
53  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
54 */
55 static inline void volk_32f_x2_multiply_32f_u_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
56  unsigned int number = 0;
57  const unsigned int eighthPoints = num_points / 8;
58 
59  float* cPtr = cVector;
60  const float* aPtr = aVector;
61  const float* bPtr= bVector;
62 
63  __m256 aVal, bVal, cVal;
64  for(;number < eighthPoints; number++){
65 
66  aVal = _mm256_loadu_ps(aPtr);
67  bVal = _mm256_loadu_ps(bPtr);
68 
69  cVal = _mm256_mul_ps(aVal, bVal);
70 
71  _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
72 
73  aPtr += 8;
74  bPtr += 8;
75  cPtr += 8;
76  }
77 
78  number = eighthPoints * 8;
79  for(;number < num_points; number++){
80  *cPtr++ = (*aPtr++) * (*bPtr++);
81  }
82 }
83 #endif /* LV_HAVE_AVX */
84 
85 #ifdef LV_HAVE_GENERIC
86 /*!
87  \brief Multiplys the two input vectors and store their results in the third vector
88  \param cVector The vector where the results will be stored
89  \param aVector One of the vectors to be multiplied
90  \param bVector One of the vectors to be multiplied
91  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
92 */
93 static inline void volk_32f_x2_multiply_32f_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
94  float* cPtr = cVector;
95  const float* aPtr = aVector;
96  const float* bPtr= bVector;
97  unsigned int number = 0;
98 
99  for(number = 0; number < num_points; number++){
100  *cPtr++ = (*aPtr++) * (*bPtr++);
101  }
102 }
103 #endif /* LV_HAVE_GENERIC */
104 
105 
106 #endif /* INCLUDED_volk_32f_x2_multiply_32f_u_H */
107 #ifndef INCLUDED_volk_32f_x2_multiply_32f_a_H
108 #define INCLUDED_volk_32f_x2_multiply_32f_a_H
109 
110 #include <inttypes.h>
111 #include <stdio.h>
112 
113 #ifdef LV_HAVE_SSE
114 #include <xmmintrin.h>
115 /*!
116  \brief Multiplys the two input vectors and store their results in the third vector
117  \param cVector The vector where the results will be stored
118  \param aVector One of the vectors to be multiplied
119  \param bVector One of the vectors to be multiplied
120  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
121 */
122 static inline void volk_32f_x2_multiply_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
123  unsigned int number = 0;
124  const unsigned int quarterPoints = num_points / 4;
125 
126  float* cPtr = cVector;
127  const float* aPtr = aVector;
128  const float* bPtr= bVector;
129 
130  __m128 aVal, bVal, cVal;
131  for(;number < quarterPoints; number++){
132 
133  aVal = _mm_load_ps(aPtr);
134  bVal = _mm_load_ps(bPtr);
135 
136  cVal = _mm_mul_ps(aVal, bVal);
137 
138  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
139 
140  aPtr += 4;
141  bPtr += 4;
142  cPtr += 4;
143  }
144 
145  number = quarterPoints * 4;
146  for(;number < num_points; number++){
147  *cPtr++ = (*aPtr++) * (*bPtr++);
148  }
149 }
150 #endif /* LV_HAVE_SSE */
151 
152 #ifdef LV_HAVE_AVX
153 #include <immintrin.h>
154 /*!
155  \brief Multiplies the two input vectors and store their results in the third vector
156  \param cVector The vector where the results will be stored
157  \param aVector One of the vectors to be multiplied
158  \param bVector One of the vectors to be multiplied
159  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
160 */
161 static inline void volk_32f_x2_multiply_32f_a_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
162  unsigned int number = 0;
163  const unsigned int eighthPoints = num_points / 8;
164 
165  float* cPtr = cVector;
166  const float* aPtr = aVector;
167  const float* bPtr= bVector;
168 
169  __m256 aVal, bVal, cVal;
170  for(;number < eighthPoints; number++){
171 
172  aVal = _mm256_load_ps(aPtr);
173  bVal = _mm256_load_ps(bPtr);
174 
175  cVal = _mm256_mul_ps(aVal, bVal);
176 
177  _mm256_store_ps(cPtr,cVal); // Store the results back into the C container
178 
179  aPtr += 8;
180  bPtr += 8;
181  cPtr += 8;
182  }
183 
184  number = eighthPoints * 8;
185  for(;number < num_points; number++){
186  *cPtr++ = (*aPtr++) * (*bPtr++);
187  }
188 }
189 #endif /* LV_HAVE_AVX */
190 
191 #ifdef LV_HAVE_GENERIC
192 /*!
193  \brief Multiplys the two input vectors and store their results in the third vector
194  \param cVector The vector where the results will be stored
195  \param aVector One of the vectors to be multiplied
196  \param bVector One of the vectors to be multiplied
197  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
198 */
199 static inline void volk_32f_x2_multiply_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
200  float* cPtr = cVector;
201  const float* aPtr = aVector;
202  const float* bPtr= bVector;
203  unsigned int number = 0;
204 
205  for(number = 0; number < num_points; number++){
206  *cPtr++ = (*aPtr++) * (*bPtr++);
207  }
208 }
209 #endif /* LV_HAVE_GENERIC */
210 
211 #ifdef LV_HAVE_ORC
212 /*!
213  \brief Multiplys the two input vectors and store their results in the third vector
214  \param cVector The vector where the results will be stored
215  \param aVector One of the vectors to be multiplied
216  \param bVector One of the vectors to be multiplied
217  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
218 */
219 extern void volk_32f_x2_multiply_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
220 static inline void volk_32f_x2_multiply_32f_u_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
221  volk_32f_x2_multiply_32f_a_orc_impl(cVector, aVector, bVector, num_points);
222 }
223 #endif /* LV_HAVE_ORC */
224 
225 
226 #endif /* INCLUDED_volk_32f_x2_multiply_32f_a_H */