GNU Radio Manual and C++ API Reference  3.7.4
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
volk_32f_s32f_convert_8i.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_s32f_convert_8i_u_H
2 #define INCLUDED_volk_32f_s32f_convert_8i_u_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 
7 #ifdef LV_HAVE_SSE2
8 #include <emmintrin.h>
9  /*!
10  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
11  \param inputVector The floating point input data buffer
12  \param outputVector The 8 bit output data buffer
13  \param scalar The value multiplied against each point in the input buffer
14  \param num_points The number of data values to be converted
15  \note Input buffer does NOT need to be properly aligned
16  */
17 static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
18  unsigned int number = 0;
19 
20  const unsigned int sixteenthPoints = num_points / 16;
21 
22  const float* inputVectorPtr = (const float*)inputVector;
23  int8_t* outputVectorPtr = outputVector;
24 
25  float min_val = -128;
26  float max_val = 127;
27  float r;
28 
29  __m128 vScalar = _mm_set_ps1(scalar);
30  __m128 inputVal1, inputVal2, inputVal3, inputVal4;
31  __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
32  __m128 vmin_val = _mm_set_ps1(min_val);
33  __m128 vmax_val = _mm_set_ps1(max_val);
34 
35  for(;number < sixteenthPoints; number++){
36  inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
37  inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
38  inputVal3 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
39  inputVal4 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
40 
41  inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
42  inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
43  inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
44  inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
45 
46  intInputVal1 = _mm_cvtps_epi32(inputVal1);
47  intInputVal2 = _mm_cvtps_epi32(inputVal2);
48  intInputVal3 = _mm_cvtps_epi32(inputVal3);
49  intInputVal4 = _mm_cvtps_epi32(inputVal4);
50 
51  intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
52  intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
53 
54  intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3);
55 
56  _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
57  outputVectorPtr += 16;
58  }
59 
60  number = sixteenthPoints * 16;
61  for(; number < num_points; number++){
62  r = inputVector[number] * scalar;
63  if(r > max_val)
64  r = max_val;
65  else if(r < min_val)
66  r = min_val;
67  outputVector[number] = (int16_t)(r);
68  }
69 }
70 #endif /* LV_HAVE_SSE2 */
71 
72 #ifdef LV_HAVE_SSE
73 #include <xmmintrin.h>
74  /*!
75  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
76  \param inputVector The floating point input data buffer
77  \param outputVector The 8 bit output data buffer
78  \param scalar The value multiplied against each point in the input buffer
79  \param num_points The number of data values to be converted
80  \note Input buffer does NOT need to be properly aligned
81  */
82 static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
83  unsigned int number = 0;
84 
85  const unsigned int quarterPoints = num_points / 4;
86 
87  const float* inputVectorPtr = (const float*)inputVector;
88  int8_t* outputVectorPtr = outputVector;
89 
90  float min_val = -128;
91  float max_val = 127;
92  float r;
93 
94  __m128 vScalar = _mm_set_ps1(scalar);
95  __m128 ret;
96  __m128 vmin_val = _mm_set_ps1(min_val);
97  __m128 vmax_val = _mm_set_ps1(max_val);
98 
99  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
100 
101  for(;number < quarterPoints; number++){
102  ret = _mm_loadu_ps(inputVectorPtr);
103  inputVectorPtr += 4;
104 
105  ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
106 
107  _mm_store_ps(outputFloatBuffer, ret);
108  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]);
109  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[1]);
110  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[2]);
111  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[3]);
112  }
113 
114  number = quarterPoints * 4;
115  for(; number < num_points; number++){
116  r = inputVector[number] * scalar;
117  if(r > max_val)
118  r = max_val;
119  else if(r < min_val)
120  r = min_val;
121  outputVector[number] = (int16_t)(r);
122  }
123 }
124 #endif /* LV_HAVE_SSE */
125 
126 #ifdef LV_HAVE_GENERIC
127  /*!
128  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
129  \param inputVector The floating point input data buffer
130  \param outputVector The 8 bit output data buffer
131  \param scalar The value multiplied against each point in the input buffer
132  \param num_points The number of data values to be converted
133  \note Input buffer does NOT need to be properly aligned
134  */
135 static inline void volk_32f_s32f_convert_8i_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
136  int8_t* outputVectorPtr = outputVector;
137  const float* inputVectorPtr = inputVector;
138  unsigned int number = 0;
139  float min_val = -128;
140  float max_val = 127;
141  float r;
142 
143  for(number = 0; number < num_points; number++){
144  r = *inputVectorPtr++ * scalar;
145  if(r > max_val)
146  r = max_val;
147  else if(r < min_val)
148  r = min_val;
149  *outputVectorPtr++ = (int16_t)(r);
150  }
151 }
152 #endif /* LV_HAVE_GENERIC */
153 
154 
155 
156 
157 #endif /* INCLUDED_volk_32f_s32f_convert_8i_u_H */
158 #ifndef INCLUDED_volk_32f_s32f_convert_8i_a_H
159 #define INCLUDED_volk_32f_s32f_convert_8i_a_H
160 
161 #include <volk/volk_common.h>
162 #include <inttypes.h>
163 #include <stdio.h>
164 
165 #ifdef LV_HAVE_SSE2
166 #include <emmintrin.h>
167  /*!
168  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
169  \param inputVector The floating point input data buffer
170  \param outputVector The 8 bit output data buffer
171  \param scalar The value multiplied against each point in the input buffer
172  \param num_points The number of data values to be converted
173  */
174 static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
175  unsigned int number = 0;
176 
177  const unsigned int sixteenthPoints = num_points / 16;
178 
179  const float* inputVectorPtr = (const float*)inputVector;
180  int8_t* outputVectorPtr = outputVector;
181 
182  float min_val = -128;
183  float max_val = 127;
184  float r;
185 
186  __m128 vScalar = _mm_set_ps1(scalar);
187  __m128 inputVal1, inputVal2, inputVal3, inputVal4;
188  __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
189  __m128 vmin_val = _mm_set_ps1(min_val);
190  __m128 vmax_val = _mm_set_ps1(max_val);
191 
192  for(;number < sixteenthPoints; number++){
193  inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
194  inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
195  inputVal3 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
196  inputVal4 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
197 
198  inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
199  inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
200  inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
201  inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
202 
203  intInputVal1 = _mm_cvtps_epi32(inputVal1);
204  intInputVal2 = _mm_cvtps_epi32(inputVal2);
205  intInputVal3 = _mm_cvtps_epi32(inputVal3);
206  intInputVal4 = _mm_cvtps_epi32(inputVal4);
207 
208  intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
209  intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
210 
211  intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3);
212 
213  _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
214  outputVectorPtr += 16;
215  }
216 
217  number = sixteenthPoints * 16;
218  for(; number < num_points; number++){
219  r = inputVector[number] * scalar;
220  if(r > max_val)
221  r = max_val;
222  else if(r < min_val)
223  r = min_val;
224  outputVector[number] = (int8_t)(r);
225  }
226 }
227 #endif /* LV_HAVE_SSE2 */
228 
229 #ifdef LV_HAVE_SSE
230 #include <xmmintrin.h>
231  /*!
232  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
233  \param inputVector The floating point input data buffer
234  \param outputVector The 8 bit output data buffer
235  \param scalar The value multiplied against each point in the input buffer
236  \param num_points The number of data values to be converted
237  */
238 static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
239  unsigned int number = 0;
240 
241  const unsigned int quarterPoints = num_points / 4;
242 
243  const float* inputVectorPtr = (const float*)inputVector;
244 
245  float min_val = -128;
246  float max_val = 127;
247  float r;
248 
249  int8_t* outputVectorPtr = outputVector;
250  __m128 vScalar = _mm_set_ps1(scalar);
251  __m128 ret;
252  __m128 vmin_val = _mm_set_ps1(min_val);
253  __m128 vmax_val = _mm_set_ps1(max_val);
254 
255  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
256 
257  for(;number < quarterPoints; number++){
258  ret = _mm_load_ps(inputVectorPtr);
259  inputVectorPtr += 4;
260 
261  ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
262 
263  _mm_store_ps(outputFloatBuffer, ret);
264  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]);
265  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[1]);
266  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[2]);
267  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[3]);
268  }
269 
270  number = quarterPoints * 4;
271  for(; number < num_points; number++){
272  r = inputVector[number] * scalar;
273  if(r > max_val)
274  r = max_val;
275  else if(r < min_val)
276  r = min_val;
277  outputVector[number] = (int8_t)(r);
278  }
279 }
280 #endif /* LV_HAVE_SSE */
281 
282 #ifdef LV_HAVE_GENERIC
283  /*!
284  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
285  \param inputVector The floating point input data buffer
286  \param outputVector The 8 bit output data buffer
287  \param scalar The value multiplied against each point in the input buffer
288  \param num_points The number of data values to be converted
289  */
290 static inline void volk_32f_s32f_convert_8i_a_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
291  int8_t* outputVectorPtr = outputVector;
292  const float* inputVectorPtr = inputVector;
293  unsigned int number = 0;
294  float min_val = -128;
295  float max_val = 127;
296  float r;
297 
298  for(number = 0; number < num_points; number++){
299  r = *inputVectorPtr++ * scalar;
300  if(r > max_val)
301  r = max_val;
302  else if(r < min_val)
303  r = min_val;
304  *outputVectorPtr++ = (int8_t)(r);
305  }
306 }
307 #endif /* LV_HAVE_GENERIC */
308 
309 
310 
311 
312 #endif /* INCLUDED_volk_32f_s32f_convert_8i_a_H */
signed short int16_t
Definition: stdint.h:76
signed char int8_t
Definition: stdint.h:75
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27