1 #ifndef INCLUDED_volk_8i_s32f_convert_32f_u_H
2 #define INCLUDED_volk_8i_s32f_convert_32f_u_H
18 static inline void volk_8i_s32f_convert_32f_u_sse4_1(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points){
19 unsigned int number = 0;
20 const unsigned int sixteenthPoints = num_points / 16;
22 float* outputVectorPtr = outputVector;
23 const float iScalar = 1.0 / scalar;
24 __m128 invScalar = _mm_set_ps1( iScalar );
25 const int8_t* inputVectorPtr = inputVector;
30 for(;number < sixteenthPoints; number++){
31 inputVal = _mm_loadu_si128((__m128i*)inputVectorPtr);
33 interimVal = _mm_cvtepi8_epi32(inputVal);
34 ret = _mm_cvtepi32_ps(interimVal);
35 ret = _mm_mul_ps(ret, invScalar);
36 _mm_storeu_ps(outputVectorPtr, ret);
39 inputVal = _mm_srli_si128(inputVal, 4);
40 interimVal = _mm_cvtepi8_epi32(inputVal);
41 ret = _mm_cvtepi32_ps(interimVal);
42 ret = _mm_mul_ps(ret, invScalar);
43 _mm_storeu_ps(outputVectorPtr, ret);
46 inputVal = _mm_srli_si128(inputVal, 4);
47 interimVal = _mm_cvtepi8_epi32(inputVal);
48 ret = _mm_cvtepi32_ps(interimVal);
49 ret = _mm_mul_ps(ret, invScalar);
50 _mm_storeu_ps(outputVectorPtr, ret);
53 inputVal = _mm_srli_si128(inputVal, 4);
54 interimVal = _mm_cvtepi8_epi32(inputVal);
55 ret = _mm_cvtepi32_ps(interimVal);
56 ret = _mm_mul_ps(ret, invScalar);
57 _mm_storeu_ps(outputVectorPtr, ret);
63 number = sixteenthPoints * 16;
64 for(; number < num_points; number++){
65 outputVector[number] = (float)(inputVector[number]) * iScalar;
70 #ifdef LV_HAVE_GENERIC
79 static inline void volk_8i_s32f_convert_32f_generic(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points){
80 float* outputVectorPtr = outputVector;
81 const int8_t* inputVectorPtr = inputVector;
82 unsigned int number = 0;
83 const float iScalar = 1.0 / scalar;
85 for(number = 0; number < num_points; number++){
86 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
95 #ifndef INCLUDED_volk_8i_s32f_convert_32f_a_H
96 #define INCLUDED_volk_8i_s32f_convert_32f_a_H
101 #ifdef LV_HAVE_SSE4_1
102 #include <smmintrin.h>
111 static inline void volk_8i_s32f_convert_32f_a_sse4_1(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points){
112 unsigned int number = 0;
113 const unsigned int sixteenthPoints = num_points / 16;
115 float* outputVectorPtr = outputVector;
116 const float iScalar = 1.0 / scalar;
117 __m128 invScalar = _mm_set_ps1(iScalar);
118 const int8_t* inputVectorPtr = inputVector;
123 for(;number < sixteenthPoints; number++){
124 inputVal = _mm_load_si128((__m128i*)inputVectorPtr);
126 interimVal = _mm_cvtepi8_epi32(inputVal);
127 ret = _mm_cvtepi32_ps(interimVal);
128 ret = _mm_mul_ps(ret, invScalar);
129 _mm_store_ps(outputVectorPtr, ret);
130 outputVectorPtr += 4;
132 inputVal = _mm_srli_si128(inputVal, 4);
133 interimVal = _mm_cvtepi8_epi32(inputVal);
134 ret = _mm_cvtepi32_ps(interimVal);
135 ret = _mm_mul_ps(ret, invScalar);
136 _mm_store_ps(outputVectorPtr, ret);
137 outputVectorPtr += 4;
139 inputVal = _mm_srli_si128(inputVal, 4);
140 interimVal = _mm_cvtepi8_epi32(inputVal);
141 ret = _mm_cvtepi32_ps(interimVal);
142 ret = _mm_mul_ps(ret, invScalar);
143 _mm_store_ps(outputVectorPtr, ret);
144 outputVectorPtr += 4;
146 inputVal = _mm_srli_si128(inputVal, 4);
147 interimVal = _mm_cvtepi8_epi32(inputVal);
148 ret = _mm_cvtepi32_ps(interimVal);
149 ret = _mm_mul_ps(ret, invScalar);
150 _mm_store_ps(outputVectorPtr, ret);
151 outputVectorPtr += 4;
153 inputVectorPtr += 16;
156 number = sixteenthPoints * 16;
157 for(; number < num_points; number++){
158 outputVector[number] = (float)(inputVector[number]) * iScalar;
163 #ifdef LV_HAVE_GENERIC
171 static inline void volk_8i_s32f_convert_32f_a_generic(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points){
172 float* outputVectorPtr = outputVector;
173 const int8_t* inputVectorPtr = inputVector;
174 unsigned int number = 0;
175 const float iScalar = 1.0 / scalar;
177 for(number = 0; number < num_points; number++){
178 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
191 extern void volk_8i_s32f_convert_32f_a_orc_impl(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points);
192 static inline void volk_8i_s32f_convert_32f_u_orc(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points){
193 float invscalar = 1.0 / scalar;
194 volk_8i_s32f_convert_32f_a_orc_impl(outputVector, inputVector, invscalar, num_points);
signed char int8_t
Definition: stdint.h:75