GNU Radio 3.6.5 C++ API
|
00001 #ifndef INCLUDED_volk_32f_s32f_convert_8i_u_H 00002 #define INCLUDED_volk_32f_s32f_convert_8i_u_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 00007 #ifdef LV_HAVE_SSE2 00008 #include <emmintrin.h> 00009 /*! 00010 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value 00011 \param inputVector The floating point input data buffer 00012 \param outputVector The 8 bit output data buffer 00013 \param scalar The value multiplied against each point in the input buffer 00014 \param num_points The number of data values to be converted 00015 \note Input buffer does NOT need to be properly aligned 00016 */ 00017 static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00018 unsigned int number = 0; 00019 00020 const unsigned int sixteenthPoints = num_points / 16; 00021 00022 const float* inputVectorPtr = (const float*)inputVector; 00023 int8_t* outputVectorPtr = outputVector; 00024 00025 float min_val = -128; 00026 float max_val = 127; 00027 float r; 00028 00029 __m128 vScalar = _mm_set_ps1(scalar); 00030 __m128 inputVal1, inputVal2, inputVal3, inputVal4; 00031 __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4; 00032 __m128 vmin_val = _mm_set_ps1(min_val); 00033 __m128 vmax_val = _mm_set_ps1(max_val); 00034 00035 for(;number < sixteenthPoints; number++){ 00036 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; 00037 inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; 00038 inputVal3 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; 00039 inputVal4 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; 00040 00041 inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val); 00042 inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val); 00043 inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val); 00044 inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val); 00045 00046 intInputVal1 = _mm_cvtps_epi32(inputVal1); 00047 intInputVal2 = _mm_cvtps_epi32(inputVal2); 00048 intInputVal3 = _mm_cvtps_epi32(inputVal3); 00049 intInputVal4 = _mm_cvtps_epi32(inputVal4); 00050 00051 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2); 00052 intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4); 00053 00054 intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3); 00055 00056 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1); 00057 outputVectorPtr += 16; 00058 } 00059 00060 number = sixteenthPoints * 16; 00061 for(; number < num_points; number++){ 00062 r = inputVector[number] * scalar; 00063 if(r > max_val) 00064 r = max_val; 00065 else if(r < min_val) 00066 r = min_val; 00067 outputVector[number] = (int16_t)(r); 00068 } 00069 } 00070 #endif /* LV_HAVE_SSE2 */ 00071 00072 #ifdef LV_HAVE_SSE 00073 #include <xmmintrin.h> 00074 /*! 00075 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value 00076 \param inputVector The floating point input data buffer 00077 \param outputVector The 8 bit output data buffer 00078 \param scalar The value multiplied against each point in the input buffer 00079 \param num_points The number of data values to be converted 00080 \note Input buffer does NOT need to be properly aligned 00081 */ 00082 static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00083 unsigned int number = 0; 00084 00085 const unsigned int quarterPoints = num_points / 4; 00086 00087 const float* inputVectorPtr = (const float*)inputVector; 00088 int8_t* outputVectorPtr = outputVector; 00089 00090 float min_val = -128; 00091 float max_val = 127; 00092 float r; 00093 00094 __m128 vScalar = _mm_set_ps1(scalar); 00095 __m128 ret; 00096 __m128 vmin_val = _mm_set_ps1(min_val); 00097 __m128 vmax_val = _mm_set_ps1(max_val); 00098 00099 __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; 00100 00101 for(;number < quarterPoints; number++){ 00102 ret = _mm_loadu_ps(inputVectorPtr); 00103 inputVectorPtr += 4; 00104 00105 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val); 00106 00107 _mm_store_ps(outputFloatBuffer, ret); 00108 *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]); 00109 *outputVectorPtr++ = (int8_t)(outputFloatBuffer[1]); 00110 *outputVectorPtr++ = (int8_t)(outputFloatBuffer[2]); 00111 *outputVectorPtr++ = (int8_t)(outputFloatBuffer[3]); 00112 } 00113 00114 number = quarterPoints * 4; 00115 for(; number < num_points; number++){ 00116 r = inputVector[number] * scalar; 00117 if(r > max_val) 00118 r = max_val; 00119 else if(r < min_val) 00120 r = min_val; 00121 outputVector[number] = (int16_t)(r); 00122 } 00123 } 00124 #endif /* LV_HAVE_SSE */ 00125 00126 #ifdef LV_HAVE_GENERIC 00127 /*! 00128 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value 00129 \param inputVector The floating point input data buffer 00130 \param outputVector The 8 bit output data buffer 00131 \param scalar The value multiplied against each point in the input buffer 00132 \param num_points The number of data values to be converted 00133 \note Input buffer does NOT need to be properly aligned 00134 */ 00135 static inline void volk_32f_s32f_convert_8i_u_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00136 int8_t* outputVectorPtr = outputVector; 00137 const float* inputVectorPtr = inputVector; 00138 unsigned int number = 0; 00139 float min_val = -128; 00140 float max_val = 127; 00141 float r; 00142 00143 for(number = 0; number < num_points; number++){ 00144 r = *inputVectorPtr++ * scalar; 00145 if(r > max_val) 00146 r = max_val; 00147 else if(r < min_val) 00148 r = min_val; 00149 *outputVectorPtr++ = (int16_t)(r); 00150 } 00151 } 00152 #endif /* LV_HAVE_GENERIC */ 00153 00154 00155 00156 00157 #endif /* INCLUDED_volk_32f_s32f_convert_8i_u_H */