GNU Radio Manual and C++ API Reference  3.7.5.1
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
volk_32f_s32f_multiply_32f.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H
2 #define INCLUDED_volk_32f_s32f_multiply_32f_u_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 
7 #ifdef LV_HAVE_SSE
8 #include <xmmintrin.h>
9 /*!
10  \brief Scalar float multiply
11  \param cVector The vector where the results will be stored
12  \param aVector One of the vectors to be multiplied
13  \param scalar the scalar value
14  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
15 */
16 static inline void volk_32f_s32f_multiply_32f_u_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
17  unsigned int number = 0;
18  const unsigned int quarterPoints = num_points / 4;
19 
20  float* cPtr = cVector;
21  const float* aPtr = aVector;
22 
23  __m128 aVal, bVal, cVal;
24  bVal = _mm_set_ps1(scalar);
25  for(;number < quarterPoints; number++){
26 
27  aVal = _mm_loadu_ps(aPtr);
28 
29  cVal = _mm_mul_ps(aVal, bVal);
30 
31  _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
32 
33  aPtr += 4;
34  cPtr += 4;
35  }
36 
37  number = quarterPoints * 4;
38  for(;number < num_points; number++){
39  *cPtr++ = (*aPtr++) * scalar;
40  }
41 }
42 #endif /* LV_HAVE_SSE */
43 
44 #ifdef LV_HAVE_AVX
45 #include <immintrin.h>
46 /*!
47  \brief Scalar float multiply
48  \param cVector The vector where the results will be stored
49  \param aVector One of the vectors to be multiplied
50  \param scalar the scalar value
51  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
52 */
53 static inline void volk_32f_s32f_multiply_32f_u_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
54  unsigned int number = 0;
55  const unsigned int eighthPoints = num_points / 8;
56 
57  float* cPtr = cVector;
58  const float* aPtr = aVector;
59 
60  __m256 aVal, bVal, cVal;
61  bVal = _mm256_set1_ps(scalar);
62  for(;number < eighthPoints; number++){
63 
64  aVal = _mm256_loadu_ps(aPtr);
65 
66  cVal = _mm256_mul_ps(aVal, bVal);
67 
68  _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
69 
70  aPtr += 8;
71  cPtr += 8;
72  }
73 
74  number = eighthPoints * 8;
75  for(;number < num_points; number++){
76  *cPtr++ = (*aPtr++) * scalar;
77  }
78 }
79 #endif /* LV_HAVE_AVX */
80 
81 #ifdef LV_HAVE_GENERIC
82 /*!
83  \brief Scalar float multiply
84  \param cVector The vector where the results will be stored
85  \param aVector One of the vectors to be multiplied
86  \param scalar the scalar value
87  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
88 */
89 static inline void volk_32f_s32f_multiply_32f_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
90  unsigned int number = 0;
91  const float* inputPtr = aVector;
92  float* outputPtr = cVector;
93  for(number = 0; number < num_points; number++){
94  *outputPtr = (*inputPtr) * scalar;
95  inputPtr++;
96  outputPtr++;
97  }
98 }
99 #endif /* LV_HAVE_GENERIC */
100 
101 
102 #endif /* INCLUDED_volk_32f_s32f_multiply_32f_u_H */
103 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_a_H
104 #define INCLUDED_volk_32f_s32f_multiply_32f_a_H
105 
106 #include <inttypes.h>
107 #include <stdio.h>
108 
109 #ifdef LV_HAVE_SSE
110 #include <xmmintrin.h>
111 /*!
112  \brief Scalar float multiply
113  \param cVector The vector where the results will be stored
114  \param aVector One of the vectors to be multiplied
115  \param scalar the scalar value
116  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
117 */
118 static inline void volk_32f_s32f_multiply_32f_a_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
119  unsigned int number = 0;
120  const unsigned int quarterPoints = num_points / 4;
121 
122  float* cPtr = cVector;
123  const float* aPtr = aVector;
124 
125  __m128 aVal, bVal, cVal;
126  bVal = _mm_set_ps1(scalar);
127  for(;number < quarterPoints; number++){
128 
129  aVal = _mm_load_ps(aPtr);
130 
131  cVal = _mm_mul_ps(aVal, bVal);
132 
133  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
134 
135  aPtr += 4;
136  cPtr += 4;
137  }
138 
139  number = quarterPoints * 4;
140  for(;number < num_points; number++){
141  *cPtr++ = (*aPtr++) * scalar;
142  }
143 }
144 #endif /* LV_HAVE_SSE */
145 
146 #ifdef LV_HAVE_AVX
147 #include <immintrin.h>
148 /*!
149  \brief Scalar float multiply
150  \param cVector The vector where the results will be stored
151  \param aVector One of the vectors to be multiplied
152  \param scalar the scalar value
153  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
154 */
155 static inline void volk_32f_s32f_multiply_32f_a_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
156  unsigned int number = 0;
157  const unsigned int eighthPoints = num_points / 8;
158 
159  float* cPtr = cVector;
160  const float* aPtr = aVector;
161 
162  __m256 aVal, bVal, cVal;
163  bVal = _mm256_set1_ps(scalar);
164  for(;number < eighthPoints; number++){
165 
166  aVal = _mm256_load_ps(aPtr);
167 
168  cVal = _mm256_mul_ps(aVal, bVal);
169 
170  _mm256_store_ps(cPtr,cVal); // Store the results back into the C container
171 
172  aPtr += 8;
173  cPtr += 8;
174  }
175 
176  number = eighthPoints * 8;
177  for(;number < num_points; number++){
178  *cPtr++ = (*aPtr++) * scalar;
179  }
180 }
181 #endif /* LV_HAVE_AVX */
182 
183 #ifdef LV_HAVE_NEON
184 #include <arm_neon.h>
185 /*!
186  \brief Scalar float multiply
187  \param cVector The vector where the results will be stored
188  \param aVector One of the vectors to be multiplied
189  \param scalar the scalar value
190  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
191 */
192 static inline void volk_32f_s32f_multiply_32f_u_neon(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
193  unsigned int number = 0;
194  const float* inputPtr = aVector;
195  float* outputPtr = cVector;
196  const unsigned int quarterPoints = num_points / 4;
197 
198  float32x4_t aVal, cVal;
199 
200  for(number = 0; number < quarterPoints; number++){
201  aVal = vld1q_f32(inputPtr); // Load into NEON regs
202  cVal = vmulq_n_f32 (aVal, scalar); // Do the multiply
203  vst1q_f32(outputPtr, cVal); // Store results back to output
204  inputPtr += 4;
205  outputPtr += 4;
206  }
207  for(number = quarterPoints * 4; number < num_points; number++){
208  *outputPtr++ = (*inputPtr++) * scalar;
209  }
210 }
211 #endif /* LV_HAVE_NEON */
212 
213 #ifdef LV_HAVE_GENERIC
214 /*!
215  \brief Scalar float multiply
216  \param cVector The vector where the results will be stored
217  \param aVector One of the vectors to be multiplied
218  \param scalar the scalar value
219  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
220 */
221 static inline void volk_32f_s32f_multiply_32f_a_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
222  unsigned int number = 0;
223  const float* inputPtr = aVector;
224  float* outputPtr = cVector;
225  for(number = 0; number < num_points; number++){
226  *outputPtr = (*inputPtr) * scalar;
227  inputPtr++;
228  outputPtr++;
229  }
230 }
231 #endif /* LV_HAVE_GENERIC */
232 
233 #ifdef LV_HAVE_ORC
234 /*!
235  \brief Scalar float multiply
236  \param cVector The vector where the results will be stored
237  \param aVector One of the vectors to be multiplied
238  \param scalar the scalar value
239  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
240 */
241 extern void volk_32f_s32f_multiply_32f_a_orc_impl(float* dst, const float* src, const float scalar, unsigned int num_points);
242 static inline void volk_32f_s32f_multiply_32f_u_orc(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
243  volk_32f_s32f_multiply_32f_a_orc_impl(cVector, aVector, scalar, num_points);
244 }
245 #endif /* LV_HAVE_GENERIC */
246 
247 
248 #endif /* INCLUDED_volk_32f_s32f_multiply_32f_a_H */