1 #ifndef INCLUDED_volk_16i_s32f_convert_32f_u_H
2 #define INCLUDED_volk_16i_s32f_convert_32f_u_H
18 static inline void volk_16i_s32f_convert_32f_u_sse4_1(
float* outputVector,
const int16_t* inputVector,
const float scalar,
unsigned int num_points){
19 unsigned int number = 0;
20 const unsigned int eighthPoints = num_points / 8;
22 float* outputVectorPtr = outputVector;
23 __m128 invScalar = _mm_set_ps1(1.0/scalar);
29 for(;number < eighthPoints; number++){
32 inputVal = _mm_loadu_si128((__m128i*)inputPtr);
35 inputVal2 = _mm_srli_si128(inputVal, 8);
38 inputVal = _mm_cvtepi16_epi32(inputVal);
39 inputVal2 = _mm_cvtepi16_epi32(inputVal2);
41 ret = _mm_cvtepi32_ps(inputVal);
42 ret = _mm_mul_ps(ret, invScalar);
43 _mm_storeu_ps(outputVectorPtr, ret);
46 ret = _mm_cvtepi32_ps(inputVal2);
47 ret = _mm_mul_ps(ret, invScalar);
48 _mm_storeu_ps(outputVectorPtr, ret);
55 number = eighthPoints * 8;
56 for(; number < num_points; number++){
57 outputVector[number] =((float)(inputVector[number])) / scalar;
63 #include <xmmintrin.h>
73 static inline void volk_16i_s32f_convert_32f_u_sse(
float* outputVector,
const int16_t* inputVector,
const float scalar,
unsigned int num_points){
74 unsigned int number = 0;
75 const unsigned int quarterPoints = num_points / 4;
77 float* outputVectorPtr = outputVector;
78 __m128 invScalar = _mm_set_ps1(1.0/scalar);
82 for(;number < quarterPoints; number++){
83 ret = _mm_set_ps((
float)(inputPtr[3]), (
float)(inputPtr[2]), (
float)(inputPtr[1]), (
float)(inputPtr[0]));
85 ret = _mm_mul_ps(ret, invScalar);
86 _mm_storeu_ps(outputVectorPtr, ret);
92 number = quarterPoints * 4;
93 for(; number < num_points; number++){
94 outputVector[number] = (float)(inputVector[number]) / scalar;
99 #ifdef LV_HAVE_GENERIC
108 static inline void volk_16i_s32f_convert_32f_generic(
float* outputVector,
const int16_t* inputVector,
const float scalar,
unsigned int num_points){
109 float* outputVectorPtr = outputVector;
110 const int16_t* inputVectorPtr = inputVector;
111 unsigned int number = 0;
113 for(number = 0; number < num_points; number++){
114 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
123 #ifndef INCLUDED_volk_16i_s32f_convert_32f_a_H
124 #define INCLUDED_volk_16i_s32f_convert_32f_a_H
129 #ifdef LV_HAVE_SSE4_1
130 #include <smmintrin.h>
139 static inline void volk_16i_s32f_convert_32f_a_sse4_1(
float* outputVector,
const int16_t* inputVector,
const float scalar,
unsigned int num_points){
140 unsigned int number = 0;
141 const unsigned int eighthPoints = num_points / 8;
143 float* outputVectorPtr = outputVector;
144 __m128 invScalar = _mm_set_ps1(1.0/scalar);
150 for(;number < eighthPoints; number++){
153 inputVal = _mm_loadu_si128((__m128i*)inputPtr);
156 inputVal2 = _mm_srli_si128(inputVal, 8);
159 inputVal = _mm_cvtepi16_epi32(inputVal);
160 inputVal2 = _mm_cvtepi16_epi32(inputVal2);
162 ret = _mm_cvtepi32_ps(inputVal);
163 ret = _mm_mul_ps(ret, invScalar);
164 _mm_storeu_ps(outputVectorPtr, ret);
165 outputVectorPtr += 4;
167 ret = _mm_cvtepi32_ps(inputVal2);
168 ret = _mm_mul_ps(ret, invScalar);
169 _mm_storeu_ps(outputVectorPtr, ret);
171 outputVectorPtr += 4;
176 number = eighthPoints * 8;
177 for(; number < num_points; number++){
178 outputVector[number] =((float)(inputVector[number])) / scalar;
184 #include <xmmintrin.h>
193 static inline void volk_16i_s32f_convert_32f_a_sse(
float* outputVector,
const int16_t* inputVector,
const float scalar,
unsigned int num_points){
194 unsigned int number = 0;
195 const unsigned int quarterPoints = num_points / 4;
197 float* outputVectorPtr = outputVector;
198 __m128 invScalar = _mm_set_ps1(1.0/scalar);
202 for(;number < quarterPoints; number++){
203 ret = _mm_set_ps((
float)(inputPtr[3]), (
float)(inputPtr[2]), (
float)(inputPtr[1]), (
float)(inputPtr[0]));
205 ret = _mm_mul_ps(ret, invScalar);
206 _mm_storeu_ps(outputVectorPtr, ret);
209 outputVectorPtr += 4;
212 number = quarterPoints * 4;
213 for(; number < num_points; number++){
214 outputVector[number] = (float)(inputVector[number]) / scalar;
219 #ifdef LV_HAVE_GENERIC
227 static inline void volk_16i_s32f_convert_32f_a_generic(
float* outputVector,
const int16_t* inputVector,
const float scalar,
unsigned int num_points){
228 float* outputVectorPtr = outputVector;
229 const int16_t* inputVectorPtr = inputVector;
230 unsigned int number = 0;
232 for(number = 0; number < num_points; number++){
233 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
signed short int16_t
Definition: stdint.h:76