1 #ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H
2 #define INCLUDED_volk_32f_s32f_convert_16i_u_H
18 static inline void volk_32f_s32f_convert_16i_u_sse2(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
19 unsigned int number = 0;
21 const unsigned int eighthPoints = num_points / 8;
23 const float* inputVectorPtr = (
const float*)inputVector;
24 int16_t* outputVectorPtr = outputVector;
26 float min_val = -32768;
27 float max_val = 32767;
30 __m128 vScalar = _mm_set_ps1(scalar);
31 __m128 inputVal1, inputVal2;
32 __m128i intInputVal1, intInputVal2;
34 __m128 vmin_val = _mm_set_ps1(min_val);
35 __m128 vmax_val = _mm_set_ps1(max_val);
37 for(;number < eighthPoints; number++){
38 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
39 inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
42 ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
43 ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
45 intInputVal1 = _mm_cvtps_epi32(ret1);
46 intInputVal2 = _mm_cvtps_epi32(ret2);
48 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
50 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
54 number = eighthPoints * 8;
55 for(; number < num_points; number++){
56 r = inputVector[number] * scalar;
67 #include <xmmintrin.h>
76 static inline void volk_32f_s32f_convert_16i_u_sse(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
77 unsigned int number = 0;
79 const unsigned int quarterPoints = num_points / 4;
81 const float* inputVectorPtr = (
const float*)inputVector;
82 int16_t* outputVectorPtr = outputVector;
84 float min_val = -32768;
85 float max_val = 32767;
88 __m128 vScalar = _mm_set_ps1(scalar);
90 __m128 vmin_val = _mm_set_ps1(min_val);
91 __m128 vmax_val = _mm_set_ps1(max_val);
95 for(;number < quarterPoints; number++){
96 ret = _mm_loadu_ps(inputVectorPtr);
100 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
102 _mm_store_ps(outputFloatBuffer, ret);
103 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[0]);
104 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[1]);
105 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[2]);
106 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[3]);
109 number = quarterPoints * 4;
110 for(; number < num_points; number++){
111 r = inputVector[number] * scalar;
121 #ifdef LV_HAVE_GENERIC
130 static inline void volk_32f_s32f_convert_16i_generic(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
131 int16_t* outputVectorPtr = outputVector;
132 const float* inputVectorPtr = inputVector;
133 unsigned int number = 0;
134 float min_val = -32768;
135 float max_val = 32767;
138 for(number = 0; number < num_points; number++){
139 r = *inputVectorPtr++ * scalar;
153 #ifndef INCLUDED_volk_32f_s32f_convert_16i_a_H
154 #define INCLUDED_volk_32f_s32f_convert_16i_a_H
162 #include <emmintrin.h>
170 static inline void volk_32f_s32f_convert_16i_a_sse2(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
171 unsigned int number = 0;
173 const unsigned int eighthPoints = num_points / 8;
175 const float* inputVectorPtr = (
const float*)inputVector;
176 int16_t* outputVectorPtr = outputVector;
178 float min_val = -32768;
179 float max_val = 32767;
182 __m128 vScalar = _mm_set_ps1(scalar);
183 __m128 inputVal1, inputVal2;
184 __m128i intInputVal1, intInputVal2;
186 __m128 vmin_val = _mm_set_ps1(min_val);
187 __m128 vmax_val = _mm_set_ps1(max_val);
189 for(;number < eighthPoints; number++){
190 inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
191 inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
194 ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
195 ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
197 intInputVal1 = _mm_cvtps_epi32(ret1);
198 intInputVal2 = _mm_cvtps_epi32(ret2);
200 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
202 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
203 outputVectorPtr += 8;
206 number = eighthPoints * 8;
207 for(; number < num_points; number++){
208 r = inputVector[number] * scalar;
219 #include <xmmintrin.h>
227 static inline void volk_32f_s32f_convert_16i_a_sse(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
228 unsigned int number = 0;
230 const unsigned int quarterPoints = num_points / 4;
232 const float* inputVectorPtr = (
const float*)inputVector;
233 int16_t* outputVectorPtr = outputVector;
235 float min_val = -32768;
236 float max_val = 32767;
239 __m128 vScalar = _mm_set_ps1(scalar);
241 __m128 vmin_val = _mm_set_ps1(min_val);
242 __m128 vmax_val = _mm_set_ps1(max_val);
246 for(;number < quarterPoints; number++){
247 ret = _mm_load_ps(inputVectorPtr);
251 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
253 _mm_store_ps(outputFloatBuffer, ret);
254 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[0]);
255 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[1]);
256 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[2]);
257 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[3]);
260 number = quarterPoints * 4;
261 for(; number < num_points; number++){
262 r = inputVector[number] * scalar;
272 #ifdef LV_HAVE_GENERIC
280 static inline void volk_32f_s32f_convert_16i_a_generic(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
281 int16_t* outputVectorPtr = outputVector;
282 const float* inputVectorPtr = inputVector;
283 unsigned int number = 0;
284 float min_val = -32768;
285 float max_val = 32767;
288 for(number = 0; number < num_points; number++){
289 r = *inputVectorPtr++ * scalar;
static float rintf(float x)
Definition: gnuradio/volk/cmake/msvc/config.h:30
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27