1 #ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H
2 #define INCLUDED_volk_32f_s32f_convert_32i_u_H
17 static inline void volk_32f_s32f_convert_32i_u_sse2(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
18 unsigned int number = 0;
20 const unsigned int quarterPoints = num_points / 4;
22 const float* inputVectorPtr = (
const float*)inputVector;
23 int32_t* outputVectorPtr = outputVector;
25 float min_val = -2147483647;
26 float max_val = 2147483647;
29 __m128 vScalar = _mm_set_ps1(scalar);
32 __m128 vmin_val = _mm_set_ps1(min_val);
33 __m128 vmax_val = _mm_set_ps1(max_val);
35 for(;number < quarterPoints; number++){
36 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
38 inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
39 intInputVal1 = _mm_cvtps_epi32(inputVal1);
41 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
45 number = quarterPoints * 4;
46 for(; number < num_points; number++){
47 r = inputVector[number] * scalar;
52 outputVector[number] = (
int32_t)(r);
58 #include <xmmintrin.h>
67 static inline void volk_32f_s32f_convert_32i_u_sse(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
68 unsigned int number = 0;
70 const unsigned int quarterPoints = num_points / 4;
72 const float* inputVectorPtr = (
const float*)inputVector;
73 int32_t* outputVectorPtr = outputVector;
75 float min_val = -2147483647;
76 float max_val = 2147483647;
79 __m128 vScalar = _mm_set_ps1(scalar);
81 __m128 vmin_val = _mm_set_ps1(min_val);
82 __m128 vmax_val = _mm_set_ps1(max_val);
86 for(;number < quarterPoints; number++){
87 ret = _mm_loadu_ps(inputVectorPtr);
90 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
92 _mm_store_ps(outputFloatBuffer, ret);
93 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[0]);
94 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[1]);
95 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[2]);
96 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[3]);
99 number = quarterPoints * 4;
100 for(; number < num_points; number++){
101 r = inputVector[number] * scalar;
106 outputVector[number] = (
int32_t)(r);
111 #ifdef LV_HAVE_GENERIC
120 static inline void volk_32f_s32f_convert_32i_generic(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
121 int32_t* outputVectorPtr = outputVector;
122 const float* inputVectorPtr = inputVector;
123 unsigned int number = 0;
124 float min_val = -2147483647;
125 float max_val = 2147483647;
128 for(number = 0; number < num_points; number++){
129 r = *inputVectorPtr++ * scalar;
134 *outputVectorPtr++ = (
int32_t)(r);
143 #ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H
144 #define INCLUDED_volk_32f_s32f_convert_32i_a_H
151 #include <immintrin.h>
159 static inline void volk_32f_s32f_convert_32i_a_avx(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
160 unsigned int number = 0;
162 const unsigned int eighthPoints = num_points / 8;
164 const float* inputVectorPtr = (
const float*)inputVector;
165 int32_t* outputVectorPtr = outputVector;
167 float min_val = -2147483647;
168 float max_val = 2147483647;
171 __m256 vScalar = _mm256_set1_ps(scalar);
173 __m256i intInputVal1;
174 __m256 vmin_val = _mm256_set1_ps(min_val);
175 __m256 vmax_val = _mm256_set1_ps(max_val);
177 for(;number < eighthPoints; number++){
178 inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
180 inputVal1 = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
181 intInputVal1 = _mm256_cvtps_epi32(inputVal1);
183 _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
184 outputVectorPtr += 8;
187 number = eighthPoints * 8;
188 for(; number < num_points; number++){
189 r = inputVector[number] * scalar;
194 outputVector[number] = (
int32_t)(r);
200 #include <emmintrin.h>
208 static inline void volk_32f_s32f_convert_32i_a_sse2(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
209 unsigned int number = 0;
211 const unsigned int quarterPoints = num_points / 4;
213 const float* inputVectorPtr = (
const float*)inputVector;
214 int32_t* outputVectorPtr = outputVector;
216 float min_val = -2147483647;
217 float max_val = 2147483647;
220 __m128 vScalar = _mm_set_ps1(scalar);
222 __m128i intInputVal1;
223 __m128 vmin_val = _mm_set_ps1(min_val);
224 __m128 vmax_val = _mm_set_ps1(max_val);
226 for(;number < quarterPoints; number++){
227 inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
229 inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
230 intInputVal1 = _mm_cvtps_epi32(inputVal1);
232 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
233 outputVectorPtr += 4;
236 number = quarterPoints * 4;
237 for(; number < num_points; number++){
238 r = inputVector[number] * scalar;
243 outputVector[number] = (
int32_t)(r);
249 #include <xmmintrin.h>
257 static inline void volk_32f_s32f_convert_32i_a_sse(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
258 unsigned int number = 0;
260 const unsigned int quarterPoints = num_points / 4;
262 const float* inputVectorPtr = (
const float*)inputVector;
263 int32_t* outputVectorPtr = outputVector;
265 float min_val = -2147483647;
266 float max_val = 2147483647;
269 __m128 vScalar = _mm_set_ps1(scalar);
271 __m128 vmin_val = _mm_set_ps1(min_val);
272 __m128 vmax_val = _mm_set_ps1(max_val);
276 for(;number < quarterPoints; number++){
277 ret = _mm_load_ps(inputVectorPtr);
280 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
282 _mm_store_ps(outputFloatBuffer, ret);
283 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[0]);
284 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[1]);
285 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[2]);
286 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[3]);
289 number = quarterPoints * 4;
290 for(; number < num_points; number++){
291 r = inputVector[number] * scalar;
296 outputVector[number] = (
int32_t)(r);
301 #ifdef LV_HAVE_GENERIC
309 static inline void volk_32f_s32f_convert_32i_a_generic(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
310 int32_t* outputVectorPtr = outputVector;
311 const float* inputVectorPtr = inputVector;
312 unsigned int number = 0;
313 float min_val = -2147483647;
314 float max_val = 2147483647;
317 for(number = 0; number < num_points; number++){
318 r = *inputVectorPtr++ * scalar;
323 *outputVectorPtr++ = (
int32_t)(r);
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27
signed int int32_t
Definition: stdint.h:77