Statistics
| Branch: | Tag: | Revision:

root / volk / include / volk / volk_32f_s32f_convert_32i_a.h @ 5f145a32

History | View | Annotate | Download (5 kB)

1
#ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H
2
#define INCLUDED_volk_32f_s32f_convert_32i_a_H
3
4
#include <volk/volk_common.h>
5
#include <inttypes.h>
6
#include <stdio.h>
7
8
#ifdef LV_HAVE_AVX
9
#include <immintrin.h>
10
  /*!
11
    \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value
12
    \param inputVector The floating point input data buffer
13
    \param outputVector The 32 bit output data buffer
14
    \param scalar The value multiplied against each point in the input buffer
15
    \param num_points The number of data values to be converted
16
  */
17
static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
18
  unsigned int number = 0;
19
20
  const unsigned int eighthPoints = num_points / 8;
21
    
22
  const float* inputVectorPtr = (const float*)inputVector;
23
  int32_t* outputVectorPtr = outputVector;
24
  __m256 vScalar = _mm256_set1_ps(scalar);
25
  __m256 inputVal1;
26
  __m256i intInputVal1;
27
28
  for(;number < eighthPoints; number++){
29
    inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
30
31
    intInputVal1 = _mm256_cvtps_epi32(_mm256_mul_ps(inputVal1, vScalar));
32
33
    _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
34
    outputVectorPtr += 8;
35
  }
36
37
  number = eighthPoints * 8;    
38
  for(; number < num_points; number++){
39
    outputVector[number] = (int32_t)(inputVector[number] * scalar);
40
  }
41
}
42
#endif /* LV_HAVE_AVX */
43
44
#ifdef LV_HAVE_SSE2
45
#include <emmintrin.h>
46
  /*!
47
    \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value
48
    \param inputVector The floating point input data buffer
49
    \param outputVector The 32 bit output data buffer
50
    \param scalar The value multiplied against each point in the input buffer
51
    \param num_points The number of data values to be converted
52
  */
53
static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
54
  unsigned int number = 0;
55
56
  const unsigned int quarterPoints = num_points / 4;
57
    
58
  const float* inputVectorPtr = (const float*)inputVector;
59
  int32_t* outputVectorPtr = outputVector;
60
  __m128 vScalar = _mm_set_ps1(scalar);
61
  __m128 inputVal1;
62
  __m128i intInputVal1;
63
64
  for(;number < quarterPoints; number++){
65
    inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
66
67
    intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
68
69
    _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
70
    outputVectorPtr += 4;
71
  }
72
73
  number = quarterPoints * 4;    
74
  for(; number < num_points; number++){
75
    outputVector[number] = (int32_t)(inputVector[number] * scalar);
76
  }
77
}
78
#endif /* LV_HAVE_SSE2 */
79
80
#ifdef LV_HAVE_SSE
81
#include <xmmintrin.h>
82
  /*!
83
    \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value
84
    \param inputVector The floating point input data buffer
85
    \param outputVector The 32 bit output data buffer
86
    \param scalar The value multiplied against each point in the input buffer
87
    \param num_points The number of data values to be converted
88
  */
89
static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
90
  unsigned int number = 0;
91
92
  const unsigned int quarterPoints = num_points / 4;
93
    
94
  const float* inputVectorPtr = (const float*)inputVector;
95
  int32_t* outputVectorPtr = outputVector;
96
  __m128 vScalar = _mm_set_ps1(scalar);
97
  __m128 ret;
98
99
  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
100
101
  for(;number < quarterPoints; number++){
102
    ret = _mm_load_ps(inputVectorPtr);
103
    inputVectorPtr += 4;
104
105
    ret = _mm_mul_ps(ret, vScalar);
106
107
    _mm_store_ps(outputFloatBuffer, ret);
108
    *outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]);
109
    *outputVectorPtr++ = (int32_t)(outputFloatBuffer[1]);
110
    *outputVectorPtr++ = (int32_t)(outputFloatBuffer[2]);
111
    *outputVectorPtr++ = (int32_t)(outputFloatBuffer[3]);
112
  }
113
114
  number = quarterPoints * 4;    
115
  for(; number < num_points; number++){
116
    outputVector[number] = (int32_t)(inputVector[number] * scalar);
117
  }
118
}
119
#endif /* LV_HAVE_SSE */
120
121
#ifdef LV_HAVE_GENERIC
122
  /*!
123
    \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value
124
    \param inputVector The floating point input data buffer
125
    \param outputVector The 32 bit output data buffer
126
    \param scalar The value multiplied against each point in the input buffer
127
    \param num_points The number of data values to be converted
128
  */
129
static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
130
  int32_t* outputVectorPtr = outputVector;
131
  const float* inputVectorPtr = inputVector;
132
  unsigned int number = 0;
133
134
  for(number = 0; number < num_points; number++){
135
    *outputVectorPtr++ = ((int32_t)(*inputVectorPtr++  * scalar));
136
  }
137
}
138
#endif /* LV_HAVE_GENERIC */
139
140
141
142
143
#endif /* INCLUDED_volk_32f_s32f_convert_32i_a_H */