GNU Radio 3.6.5 C++ API
|
00001 #ifndef INCLUDED_volk_32f_s32f_convert_8i_a_H 00002 #define INCLUDED_volk_32f_s32f_convert_8i_a_H 00003 00004 #include <volk/volk_common.h> 00005 #include <inttypes.h> 00006 #include <stdio.h> 00007 00008 #ifdef LV_HAVE_SSE2 00009 #include <emmintrin.h> 00010 /*! 00011 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value 00012 \param inputVector The floating point input data buffer 00013 \param outputVector The 8 bit output data buffer 00014 \param scalar The value multiplied against each point in the input buffer 00015 \param num_points The number of data values to be converted 00016 */ 00017 static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00018 unsigned int number = 0; 00019 00020 const unsigned int sixteenthPoints = num_points / 16; 00021 00022 const float* inputVectorPtr = (const float*)inputVector; 00023 int8_t* outputVectorPtr = outputVector; 00024 00025 float min_val = -128; 00026 float max_val = 127; 00027 float r; 00028 00029 __m128 vScalar = _mm_set_ps1(scalar); 00030 __m128 inputVal1, inputVal2, inputVal3, inputVal4; 00031 __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4; 00032 __m128 vmin_val = _mm_set_ps1(min_val); 00033 __m128 vmax_val = _mm_set_ps1(max_val); 00034 00035 for(;number < sixteenthPoints; number++){ 00036 inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; 00037 inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; 00038 inputVal3 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; 00039 inputVal4 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; 00040 00041 inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val); 00042 inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val); 00043 inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val); 00044 inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val); 00045 00046 intInputVal1 = _mm_cvtps_epi32(inputVal1); 00047 intInputVal2 = _mm_cvtps_epi32(inputVal2); 00048 intInputVal3 = _mm_cvtps_epi32(inputVal3); 00049 intInputVal4 = _mm_cvtps_epi32(inputVal4); 00050 00051 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2); 00052 intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4); 00053 00054 intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3); 00055 00056 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1); 00057 outputVectorPtr += 16; 00058 } 00059 00060 number = sixteenthPoints * 16; 00061 for(; number < num_points; number++){ 00062 r = inputVector[number] * scalar; 00063 if(r > max_val) 00064 r = max_val; 00065 else if(r < min_val) 00066 r = min_val; 00067 outputVector[number] = (int8_t)(r); 00068 } 00069 } 00070 #endif /* LV_HAVE_SSE2 */ 00071 00072 #ifdef LV_HAVE_SSE 00073 #include <xmmintrin.h> 00074 /*! 00075 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value 00076 \param inputVector The floating point input data buffer 00077 \param outputVector The 8 bit output data buffer 00078 \param scalar The value multiplied against each point in the input buffer 00079 \param num_points The number of data values to be converted 00080 */ 00081 static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00082 unsigned int number = 0; 00083 00084 const unsigned int quarterPoints = num_points / 4; 00085 00086 const float* inputVectorPtr = (const float*)inputVector; 00087 00088 float min_val = -128; 00089 float max_val = 127; 00090 float r; 00091 00092 int8_t* outputVectorPtr = outputVector; 00093 __m128 vScalar = _mm_set_ps1(scalar); 00094 __m128 ret; 00095 __m128 vmin_val = _mm_set_ps1(min_val); 00096 __m128 vmax_val = _mm_set_ps1(max_val); 00097 00098 __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; 00099 00100 for(;number < quarterPoints; number++){ 00101 ret = _mm_load_ps(inputVectorPtr); 00102 inputVectorPtr += 4; 00103 00104 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val); 00105 00106 _mm_store_ps(outputFloatBuffer, ret); 00107 *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]); 00108 *outputVectorPtr++ = (int8_t)(outputFloatBuffer[1]); 00109 *outputVectorPtr++ = (int8_t)(outputFloatBuffer[2]); 00110 *outputVectorPtr++ = (int8_t)(outputFloatBuffer[3]); 00111 } 00112 00113 number = quarterPoints * 4; 00114 for(; number < num_points; number++){ 00115 r = inputVector[number] * scalar; 00116 if(r > max_val) 00117 r = max_val; 00118 else if(r < min_val) 00119 r = min_val; 00120 outputVector[number] = (int8_t)(r); 00121 } 00122 } 00123 #endif /* LV_HAVE_SSE */ 00124 00125 #ifdef LV_HAVE_GENERIC 00126 /*! 00127 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value 00128 \param inputVector The floating point input data buffer 00129 \param outputVector The 8 bit output data buffer 00130 \param scalar The value multiplied against each point in the input buffer 00131 \param num_points The number of data values to be converted 00132 */ 00133 static inline void volk_32f_s32f_convert_8i_a_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00134 int8_t* outputVectorPtr = outputVector; 00135 const float* inputVectorPtr = inputVector; 00136 unsigned int number = 0; 00137 float min_val = -128; 00138 float max_val = 127; 00139 float r; 00140 00141 for(number = 0; number < num_points; number++){ 00142 r = *inputVectorPtr++ * scalar; 00143 if(r > max_val) 00144 r = max_val; 00145 else if(r < min_val) 00146 r = min_val; 00147 *outputVectorPtr++ = (int8_t)(r); 00148 } 00149 } 00150 #endif /* LV_HAVE_GENERIC */ 00151 00152 00153 00154 00155 #endif /* INCLUDED_volk_32f_s32f_convert_8i_a_H */