root / volk / include / volk / volk_32f_s32f_convert_16i_u.h @ 5f145a32
History | View | Annotate | Download (4.1 kB)
| 1 | e3600f59 | Nick Foster | #ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H
|
|---|---|---|---|
| 2 | e3600f59 | Nick Foster | #define INCLUDED_volk_32f_s32f_convert_16i_u_H
|
| 3 | 23914465 | Tom Rondeau | |
| 4 | 23914465 | Tom Rondeau | #include <inttypes.h> |
| 5 | 23914465 | Tom Rondeau | #include <stdio.h> |
| 6 | 23914465 | Tom Rondeau | |
| 7 | cef9e33e | Nick Foster | #ifdef LV_HAVE_SSE2
|
| 8 | 23914465 | Tom Rondeau | #include <emmintrin.h> |
| 9 | 23914465 | Tom Rondeau | /*!
|
| 10 | 23914465 | Tom Rondeau | \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value |
| 11 | 23914465 | Tom Rondeau | \param inputVector The floating point input data buffer |
| 12 | 23914465 | Tom Rondeau | \param outputVector The 16 bit output data buffer |
| 13 | 23914465 | Tom Rondeau | \param scalar The value multiplied against each point in the input buffer |
| 14 | 23914465 | Tom Rondeau | \param num_points The number of data values to be converted |
| 15 | 23914465 | Tom Rondeau | \note Input buffer does NOT need to be properly aligned |
| 16 | 23914465 | Tom Rondeau | */ |
| 17 | e3600f59 | Nick Foster | static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ |
| 18 | 23914465 | Tom Rondeau | unsigned int number = 0; |
| 19 | 23914465 | Tom Rondeau | |
| 20 | 23914465 | Tom Rondeau | const unsigned int eighthPoints = num_points / 8; |
| 21 | 23914465 | Tom Rondeau | |
| 22 | 23914465 | Tom Rondeau | const float* inputVectorPtr = (const float*)inputVector; |
| 23 | 23914465 | Tom Rondeau | int16_t* outputVectorPtr = outputVector; |
| 24 | 23914465 | Tom Rondeau | __m128 vScalar = _mm_set_ps1(scalar); |
| 25 | 23914465 | Tom Rondeau | __m128 inputVal1, inputVal2; |
| 26 | 23914465 | Tom Rondeau | __m128i intInputVal1, intInputVal2; |
| 27 | 23914465 | Tom Rondeau | |
| 28 | 23914465 | Tom Rondeau | for(;number < eighthPoints; number++){
|
| 29 | 23914465 | Tom Rondeau | inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
|
| 30 | 23914465 | Tom Rondeau | inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
|
| 31 | 23914465 | Tom Rondeau | |
| 32 | 23914465 | Tom Rondeau | intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar)); |
| 33 | 23914465 | Tom Rondeau | intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar)); |
| 34 | 23914465 | Tom Rondeau | |
| 35 | 23914465 | Tom Rondeau | intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2); |
| 36 | 23914465 | Tom Rondeau | |
| 37 | 23914465 | Tom Rondeau | _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1); |
| 38 | 23914465 | Tom Rondeau | outputVectorPtr += 8;
|
| 39 | 23914465 | Tom Rondeau | } |
| 40 | 23914465 | Tom Rondeau | |
| 41 | 23914465 | Tom Rondeau | number = eighthPoints * 8;
|
| 42 | 23914465 | Tom Rondeau | for(; number < num_points; number++){
|
| 43 | 23914465 | Tom Rondeau | outputVector[number] = (int16_t)(inputVector[number] * scalar); |
| 44 | 23914465 | Tom Rondeau | } |
| 45 | 23914465 | Tom Rondeau | } |
| 46 | 23914465 | Tom Rondeau | #endif /* LV_HAVE_SSE2 */ |
| 47 | 23914465 | Tom Rondeau | |
| 48 | cef9e33e | Nick Foster | #ifdef LV_HAVE_SSE
|
| 49 | 23914465 | Tom Rondeau | #include <xmmintrin.h> |
| 50 | 23914465 | Tom Rondeau | /*!
|
| 51 | 23914465 | Tom Rondeau | \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value |
| 52 | 23914465 | Tom Rondeau | \param inputVector The floating point input data buffer |
| 53 | 23914465 | Tom Rondeau | \param outputVector The 16 bit output data buffer |
| 54 | 23914465 | Tom Rondeau | \param scalar The value multiplied against each point in the input buffer |
| 55 | 23914465 | Tom Rondeau | \param num_points The number of data values to be converted |
| 56 | 23914465 | Tom Rondeau | \note Input buffer does NOT need to be properly aligned |
| 57 | 23914465 | Tom Rondeau | */ |
| 58 | e3600f59 | Nick Foster | static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ |
| 59 | 23914465 | Tom Rondeau | unsigned int number = 0; |
| 60 | 23914465 | Tom Rondeau | |
| 61 | 23914465 | Tom Rondeau | const unsigned int quarterPoints = num_points / 4; |
| 62 | 23914465 | Tom Rondeau | |
| 63 | 23914465 | Tom Rondeau | const float* inputVectorPtr = (const float*)inputVector; |
| 64 | 23914465 | Tom Rondeau | int16_t* outputVectorPtr = outputVector; |
| 65 | 23914465 | Tom Rondeau | __m128 vScalar = _mm_set_ps1(scalar); |
| 66 | 23914465 | Tom Rondeau | __m128 ret; |
| 67 | 23914465 | Tom Rondeau | |
| 68 | 0b3e4f25 | Josh Blum | __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; |
| 69 | 23914465 | Tom Rondeau | |
| 70 | 23914465 | Tom Rondeau | for(;number < quarterPoints; number++){
|
| 71 | 23914465 | Tom Rondeau | ret = _mm_loadu_ps(inputVectorPtr); |
| 72 | 23914465 | Tom Rondeau | inputVectorPtr += 4;
|
| 73 | 23914465 | Tom Rondeau | |
| 74 | 23914465 | Tom Rondeau | ret = _mm_mul_ps(ret, vScalar); |
| 75 | 23914465 | Tom Rondeau | |
| 76 | 23914465 | Tom Rondeau | _mm_store_ps(outputFloatBuffer, ret); |
| 77 | 23914465 | Tom Rondeau | *outputVectorPtr++ = (int16_t)(outputFloatBuffer[0]);
|
| 78 | 23914465 | Tom Rondeau | *outputVectorPtr++ = (int16_t)(outputFloatBuffer[1]);
|
| 79 | 23914465 | Tom Rondeau | *outputVectorPtr++ = (int16_t)(outputFloatBuffer[2]);
|
| 80 | 23914465 | Tom Rondeau | *outputVectorPtr++ = (int16_t)(outputFloatBuffer[3]);
|
| 81 | 23914465 | Tom Rondeau | } |
| 82 | 23914465 | Tom Rondeau | |
| 83 | 23914465 | Tom Rondeau | number = quarterPoints * 4;
|
| 84 | 23914465 | Tom Rondeau | for(; number < num_points; number++){
|
| 85 | 23914465 | Tom Rondeau | outputVector[number] = (int16_t)(inputVector[number] * scalar); |
| 86 | 23914465 | Tom Rondeau | } |
| 87 | 23914465 | Tom Rondeau | } |
| 88 | 23914465 | Tom Rondeau | #endif /* LV_HAVE_SSE */ |
| 89 | 23914465 | Tom Rondeau | |
| 90 | 23914465 | Tom Rondeau | #ifdef LV_HAVE_GENERIC
|
| 91 | 23914465 | Tom Rondeau | /*!
|
| 92 | 23914465 | Tom Rondeau | \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value |
| 93 | 23914465 | Tom Rondeau | \param inputVector The floating point input data buffer |
| 94 | 23914465 | Tom Rondeau | \param outputVector The 16 bit output data buffer |
| 95 | 23914465 | Tom Rondeau | \param scalar The value multiplied against each point in the input buffer |
| 96 | 23914465 | Tom Rondeau | \param num_points The number of data values to be converted |
| 97 | 23914465 | Tom Rondeau | \note Input buffer does NOT need to be properly aligned |
| 98 | 23914465 | Tom Rondeau | */ |
| 99 | e3600f59 | Nick Foster | static inline void volk_32f_s32f_convert_16i_u_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ |
| 100 | 23914465 | Tom Rondeau | int16_t* outputVectorPtr = outputVector; |
| 101 | 23914465 | Tom Rondeau | const float* inputVectorPtr = inputVector; |
| 102 | 23914465 | Tom Rondeau | unsigned int number = 0; |
| 103 | 23914465 | Tom Rondeau | |
| 104 | 23914465 | Tom Rondeau | for(number = 0; number < num_points; number++){ |
| 105 | 23914465 | Tom Rondeau | *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++ * scalar)); |
| 106 | 23914465 | Tom Rondeau | } |
| 107 | 23914465 | Tom Rondeau | } |
| 108 | 23914465 | Tom Rondeau | #endif /* LV_HAVE_GENERIC */ |
| 109 | 23914465 | Tom Rondeau | |
| 110 | 23914465 | Tom Rondeau | |
| 111 | 23914465 | Tom Rondeau | |
| 112 | 23914465 | Tom Rondeau | |
| 113 | e3600f59 | Nick Foster | #endif /* INCLUDED_volk_32f_s32f_convert_16i_u_H */ |