1 #ifndef INCLUDED_volk_64f_convert_32f_u_H
2 #define INCLUDED_volk_64f_convert_32f_u_H
15 static inline void volk_64f_convert_32f_u_sse2(
float* outputVector,
const double* inputVector,
unsigned int num_points){
16 unsigned int number = 0;
18 const unsigned int quarterPoints = num_points / 4;
20 const double* inputVectorPtr = (
const double*)inputVector;
21 float* outputVectorPtr = outputVector;
23 __m128d inputVal1, inputVal2;
25 for(;number < quarterPoints; number++){
26 inputVal1 = _mm_loadu_pd(inputVectorPtr); inputVectorPtr += 2;
27 inputVal2 = _mm_loadu_pd(inputVectorPtr); inputVectorPtr += 2;
29 ret = _mm_cvtpd_ps(inputVal1);
30 ret2 = _mm_cvtpd_ps(inputVal2);
32 ret = _mm_movelh_ps(ret, ret2);
34 _mm_storeu_ps(outputVectorPtr, ret);
38 number = quarterPoints * 4;
39 for(; number < num_points; number++){
40 outputVector[number] = (float)(inputVector[number]);
46 #ifdef LV_HAVE_GENERIC
53 static inline void volk_64f_convert_32f_generic(
float* outputVector,
const double* inputVector,
unsigned int num_points){
54 float* outputVectorPtr = outputVector;
55 const double* inputVectorPtr = inputVector;
56 unsigned int number = 0;
58 for(number = 0; number < num_points; number++){
59 *outputVectorPtr++ = ((float)(*inputVectorPtr++));
68 #ifndef INCLUDED_volk_64f_convert_32f_a_H
69 #define INCLUDED_volk_64f_convert_32f_a_H
75 #include <emmintrin.h>
82 static inline void volk_64f_convert_32f_a_sse2(
float* outputVector,
const double* inputVector,
unsigned int num_points){
83 unsigned int number = 0;
85 const unsigned int quarterPoints = num_points / 4;
87 const double* inputVectorPtr = (
const double*)inputVector;
88 float* outputVectorPtr = outputVector;
90 __m128d inputVal1, inputVal2;
92 for(;number < quarterPoints; number++){
93 inputVal1 = _mm_load_pd(inputVectorPtr); inputVectorPtr += 2;
94 inputVal2 = _mm_load_pd(inputVectorPtr); inputVectorPtr += 2;
96 ret = _mm_cvtpd_ps(inputVal1);
97 ret2 = _mm_cvtpd_ps(inputVal2);
99 ret = _mm_movelh_ps(ret, ret2);
101 _mm_store_ps(outputVectorPtr, ret);
102 outputVectorPtr += 4;
105 number = quarterPoints * 4;
106 for(; number < num_points; number++){
107 outputVector[number] = (float)(inputVector[number]);
113 #ifdef LV_HAVE_GENERIC
120 static inline void volk_64f_convert_32f_a_generic(
float* outputVector,
const double* inputVector,
unsigned int num_points){
121 float* outputVectorPtr = outputVector;
122 const double* inputVectorPtr = inputVector;
123 unsigned int number = 0;
125 for(number = 0; number < num_points; number++){
126 *outputVectorPtr++ = ((float)(*inputVectorPtr++));