GNU Radio 3.5.3.2 C++ API
volk_32u_byteswap_a.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32u_byteswap_a_H
00002 #define INCLUDED_volk_32u_byteswap_a_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 
00007 #ifdef LV_HAVE_SSE2
00008 #include <emmintrin.h>
00009 
00010 /*!
00011   \brief Byteswaps (in-place) an aligned vector of int32_t's.
00012   \param intsToSwap The vector of data to byte swap
00013   \param numDataPoints The number of data points
00014 */
00015 static inline void volk_32u_byteswap_a_sse2(uint32_t* intsToSwap, unsigned int num_points){
00016   unsigned int number = 0;
00017 
00018   uint32_t* inputPtr = intsToSwap;
00019   __m128i input, byte1, byte2, byte3, byte4, output;
00020   __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
00021   __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
00022 
00023   const uint64_t quarterPoints = num_points / 4;
00024   for(;number < quarterPoints; number++){
00025     // Load the 32t values, increment inputPtr later since we're doing it in-place.
00026     input = _mm_load_si128((__m128i*)inputPtr);
00027     // Do the four shifts
00028     byte1 = _mm_slli_epi32(input, 24);
00029     byte2 = _mm_slli_epi32(input, 8);
00030     byte3 = _mm_srli_epi32(input, 8);
00031     byte4 = _mm_srli_epi32(input, 24);
00032     // Or bytes together
00033     output = _mm_or_si128(byte1, byte4);
00034     byte2 = _mm_and_si128(byte2, byte2mask);
00035     output = _mm_or_si128(output, byte2);
00036     byte3 = _mm_and_si128(byte3, byte3mask);
00037     output = _mm_or_si128(output, byte3);
00038     // Store the results
00039     _mm_store_si128((__m128i*)inputPtr, output);
00040     inputPtr += 4;
00041   }
00042   
00043   // Byteswap any remaining points:
00044   number = quarterPoints*4;  
00045   for(; number < num_points; number++){
00046     uint32_t outputVal = *inputPtr;
00047     outputVal = (((outputVal >> 24) & 0xff) | ((outputVal >> 8) & 0x0000ff00) | ((outputVal << 8) & 0x00ff0000) | ((outputVal << 24) & 0xff000000));
00048     *inputPtr = outputVal;
00049     inputPtr++;
00050   }
00051 }
00052 #endif /* LV_HAVE_SSE2 */
00053 
00054 #ifdef LV_HAVE_GENERIC
00055 /*!
00056   \brief Byteswaps (in-place) an aligned vector of int32_t's.
00057   \param intsToSwap The vector of data to byte swap
00058   \param numDataPoints The number of data points
00059 */
00060 static inline void volk_32u_byteswap_a_generic(uint32_t* intsToSwap, unsigned int num_points){
00061   uint32_t* inputPtr = intsToSwap;
00062 
00063   unsigned int point;
00064   for(point = 0; point < num_points; point++){
00065     uint32_t output = *inputPtr;
00066     output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | ((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
00067     
00068     *inputPtr = output;
00069     inputPtr++;
00070   }
00071 }
00072 #endif /* LV_HAVE_GENERIC */
00073 
00074 
00075 
00076 
00077 #endif /* INCLUDED_volk_32u_byteswap_a_H */