Statistics
| Branch: | Tag: | Revision:

root / volk / include / volk / volk_32f_s32f_convert_16i_u.h @ 5f145a32

History | View | Annotate | Download (4.1 kB)

1 e3600f59 Nick Foster
#ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H
2 e3600f59 Nick Foster
#define INCLUDED_volk_32f_s32f_convert_16i_u_H
3 23914465 Tom Rondeau
4 23914465 Tom Rondeau
#include <inttypes.h>
5 23914465 Tom Rondeau
#include <stdio.h>
6 23914465 Tom Rondeau
7 cef9e33e Nick Foster
#ifdef LV_HAVE_SSE2
8 23914465 Tom Rondeau
#include <emmintrin.h>
9 23914465 Tom Rondeau
  /*!
10 23914465 Tom Rondeau
    \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
11 23914465 Tom Rondeau
    \param inputVector The floating point input data buffer
12 23914465 Tom Rondeau
    \param outputVector The 16 bit output data buffer
13 23914465 Tom Rondeau
    \param scalar The value multiplied against each point in the input buffer
14 23914465 Tom Rondeau
    \param num_points The number of data values to be converted
15 23914465 Tom Rondeau
    \note Input buffer does NOT need to be properly aligned
16 23914465 Tom Rondeau
  */
17 e3600f59 Nick Foster
static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
18 23914465 Tom Rondeau
  unsigned int number = 0;
19 23914465 Tom Rondeau
20 23914465 Tom Rondeau
  const unsigned int eighthPoints = num_points / 8;
21 23914465 Tom Rondeau
    
22 23914465 Tom Rondeau
  const float* inputVectorPtr = (const float*)inputVector;
23 23914465 Tom Rondeau
  int16_t* outputVectorPtr = outputVector;
24 23914465 Tom Rondeau
  __m128 vScalar = _mm_set_ps1(scalar);
25 23914465 Tom Rondeau
  __m128 inputVal1, inputVal2;
26 23914465 Tom Rondeau
  __m128i intInputVal1, intInputVal2;
27 23914465 Tom Rondeau
28 23914465 Tom Rondeau
  for(;number < eighthPoints; number++){
29 23914465 Tom Rondeau
    inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
30 23914465 Tom Rondeau
    inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
31 23914465 Tom Rondeau
32 23914465 Tom Rondeau
    intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
33 23914465 Tom Rondeau
    intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar));
34 23914465 Tom Rondeau
    
35 23914465 Tom Rondeau
    intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
36 23914465 Tom Rondeau
37 23914465 Tom Rondeau
    _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
38 23914465 Tom Rondeau
    outputVectorPtr += 8;
39 23914465 Tom Rondeau
  }
40 23914465 Tom Rondeau
41 23914465 Tom Rondeau
  number = eighthPoints * 8;    
42 23914465 Tom Rondeau
  for(; number < num_points; number++){
43 23914465 Tom Rondeau
    outputVector[number] = (int16_t)(inputVector[number] * scalar);
44 23914465 Tom Rondeau
  }
45 23914465 Tom Rondeau
}
46 23914465 Tom Rondeau
#endif /* LV_HAVE_SSE2 */
47 23914465 Tom Rondeau
48 cef9e33e Nick Foster
#ifdef LV_HAVE_SSE
49 23914465 Tom Rondeau
#include <xmmintrin.h>
50 23914465 Tom Rondeau
  /*!
51 23914465 Tom Rondeau
    \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
52 23914465 Tom Rondeau
    \param inputVector The floating point input data buffer
53 23914465 Tom Rondeau
    \param outputVector The 16 bit output data buffer
54 23914465 Tom Rondeau
    \param scalar The value multiplied against each point in the input buffer
55 23914465 Tom Rondeau
    \param num_points The number of data values to be converted
56 23914465 Tom Rondeau
    \note Input buffer does NOT need to be properly aligned
57 23914465 Tom Rondeau
  */
58 e3600f59 Nick Foster
static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
59 23914465 Tom Rondeau
  unsigned int number = 0;
60 23914465 Tom Rondeau
61 23914465 Tom Rondeau
  const unsigned int quarterPoints = num_points / 4;
62 23914465 Tom Rondeau
    
63 23914465 Tom Rondeau
  const float* inputVectorPtr = (const float*)inputVector;
64 23914465 Tom Rondeau
  int16_t* outputVectorPtr = outputVector;
65 23914465 Tom Rondeau
  __m128 vScalar = _mm_set_ps1(scalar);
66 23914465 Tom Rondeau
  __m128 ret;
67 23914465 Tom Rondeau
68 0b3e4f25 Josh Blum
  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
69 23914465 Tom Rondeau
70 23914465 Tom Rondeau
  for(;number < quarterPoints; number++){
71 23914465 Tom Rondeau
    ret = _mm_loadu_ps(inputVectorPtr);
72 23914465 Tom Rondeau
    inputVectorPtr += 4;
73 23914465 Tom Rondeau
74 23914465 Tom Rondeau
    ret = _mm_mul_ps(ret, vScalar);
75 23914465 Tom Rondeau
76 23914465 Tom Rondeau
    _mm_store_ps(outputFloatBuffer, ret);
77 23914465 Tom Rondeau
    *outputVectorPtr++ = (int16_t)(outputFloatBuffer[0]);
78 23914465 Tom Rondeau
    *outputVectorPtr++ = (int16_t)(outputFloatBuffer[1]);
79 23914465 Tom Rondeau
    *outputVectorPtr++ = (int16_t)(outputFloatBuffer[2]);
80 23914465 Tom Rondeau
    *outputVectorPtr++ = (int16_t)(outputFloatBuffer[3]);
81 23914465 Tom Rondeau
  }
82 23914465 Tom Rondeau
83 23914465 Tom Rondeau
  number = quarterPoints * 4;    
84 23914465 Tom Rondeau
  for(; number < num_points; number++){
85 23914465 Tom Rondeau
    outputVector[number] = (int16_t)(inputVector[number] * scalar);
86 23914465 Tom Rondeau
  }
87 23914465 Tom Rondeau
}
88 23914465 Tom Rondeau
#endif /* LV_HAVE_SSE */
89 23914465 Tom Rondeau
90 23914465 Tom Rondeau
#ifdef LV_HAVE_GENERIC
91 23914465 Tom Rondeau
  /*!
92 23914465 Tom Rondeau
    \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
93 23914465 Tom Rondeau
    \param inputVector The floating point input data buffer
94 23914465 Tom Rondeau
    \param outputVector The 16 bit output data buffer
95 23914465 Tom Rondeau
    \param scalar The value multiplied against each point in the input buffer
96 23914465 Tom Rondeau
    \param num_points The number of data values to be converted
97 23914465 Tom Rondeau
    \note Input buffer does NOT need to be properly aligned
98 23914465 Tom Rondeau
  */
99 e3600f59 Nick Foster
static inline void volk_32f_s32f_convert_16i_u_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
100 23914465 Tom Rondeau
  int16_t* outputVectorPtr = outputVector;
101 23914465 Tom Rondeau
  const float* inputVectorPtr = inputVector;
102 23914465 Tom Rondeau
  unsigned int number = 0;
103 23914465 Tom Rondeau
104 23914465 Tom Rondeau
  for(number = 0; number < num_points; number++){
105 23914465 Tom Rondeau
    *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++  * scalar));
106 23914465 Tom Rondeau
  }
107 23914465 Tom Rondeau
}
108 23914465 Tom Rondeau
#endif /* LV_HAVE_GENERIC */
109 23914465 Tom Rondeau
110 23914465 Tom Rondeau
111 23914465 Tom Rondeau
112 23914465 Tom Rondeau
113 e3600f59 Nick Foster
#endif /* INCLUDED_volk_32f_s32f_convert_16i_u_H */