GNU Radio 3.5.3.2 C++ API
volk_64u_byteswap_a.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_64u_byteswap_a_H
00002 #define INCLUDED_volk_64u_byteswap_a_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 
00007 #ifdef LV_HAVE_SSE2
00008 #include <emmintrin.h>
00009 
00010 /*!
00011   \brief Byteswaps (in-place) an aligned vector of int64_t's.
00012   \param intsToSwap The vector of data to byte swap
00013   \param numDataPoints The number of data points
00014 */
00015 static inline void volk_64u_byteswap_a_sse2(uint64_t* intsToSwap, unsigned int num_points){
00016     uint32_t* inputPtr = (uint32_t*)intsToSwap;
00017     __m128i input, byte1, byte2, byte3, byte4, output;
00018     __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
00019     __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
00020     uint64_t number = 0;
00021     const unsigned int halfPoints = num_points / 2;
00022     for(;number < halfPoints; number++){
00023       // Load the 32t values, increment inputPtr later since we're doing it in-place.
00024       input = _mm_load_si128((__m128i*)inputPtr);
00025 
00026       // Do the four shifts
00027       byte1 = _mm_slli_epi32(input, 24);
00028       byte2 = _mm_slli_epi32(input, 8);
00029       byte3 = _mm_srli_epi32(input, 8);
00030       byte4 = _mm_srli_epi32(input, 24);
00031       // Or bytes together
00032       output = _mm_or_si128(byte1, byte4);
00033       byte2 = _mm_and_si128(byte2, byte2mask);
00034       output = _mm_or_si128(output, byte2);
00035       byte3 = _mm_and_si128(byte3, byte3mask);
00036       output = _mm_or_si128(output, byte3);
00037       
00038       // Reorder the two words
00039       output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1));
00040 
00041       // Store the results
00042       _mm_store_si128((__m128i*)inputPtr, output);
00043       inputPtr += 4;
00044     }
00045   
00046     // Byteswap any remaining points:
00047     number = halfPoints*2;  
00048     for(; number < num_points; number++){
00049       uint32_t output1 = *inputPtr;
00050       uint32_t output2 = inputPtr[1];
00051       
00052       output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
00053       
00054       output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
00055       
00056       *inputPtr++ = output2;
00057       *inputPtr++ = output1;
00058     }
00059 }
00060 #endif /* LV_HAVE_SSE2 */
00061 
00062 #ifdef LV_HAVE_GENERIC
00063 /*!
00064   \brief Byteswaps (in-place) an aligned vector of int64_t's.
00065   \param intsToSwap The vector of data to byte swap
00066   \param numDataPoints The number of data points
00067 */
00068 static inline void volk_64u_byteswap_a_generic(uint64_t* intsToSwap, unsigned int num_points){
00069   uint32_t* inputPtr = (uint32_t*)intsToSwap;
00070   unsigned int point;
00071   for(point = 0; point < num_points; point++){
00072     uint32_t output1 = *inputPtr;
00073     uint32_t output2 = inputPtr[1];
00074     
00075     output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
00076     
00077     output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
00078     
00079     *inputPtr++ = output2;
00080     *inputPtr++ = output1;
00081   }
00082 }
00083 #endif /* LV_HAVE_GENERIC */
00084 
00085 
00086 
00087 
00088 #endif /* INCLUDED_volk_64u_byteswap_a_H */