1 #ifndef INCLUDED_volk_32u_byteswap_u_H
2 #define INCLUDED_volk_32u_byteswap_u_H
15 static inline void volk_32u_byteswap_u_sse2(
uint32_t* intsToSwap,
unsigned int num_points){
16 unsigned int number = 0;
19 __m128i input, byte1, byte2, byte3, byte4, output;
20 __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
21 __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
23 const uint64_t quarterPoints = num_points / 4;
24 for(;number < quarterPoints; number++){
26 input = _mm_loadu_si128((__m128i*)inputPtr);
28 byte1 = _mm_slli_epi32(input, 24);
29 byte2 = _mm_slli_epi32(input, 8);
30 byte3 = _mm_srli_epi32(input, 8);
31 byte4 = _mm_srli_epi32(input, 24);
33 output = _mm_or_si128(byte1, byte4);
34 byte2 = _mm_and_si128(byte2, byte2mask);
35 output = _mm_or_si128(output, byte2);
36 byte3 = _mm_and_si128(byte3, byte3mask);
37 output = _mm_or_si128(output, byte3);
39 _mm_storeu_si128((__m128i*)inputPtr, output);
44 number = quarterPoints*4;
45 for(; number < num_points; number++){
47 outputVal = (((outputVal >> 24) & 0xff) | ((outputVal >> 8) & 0x0000ff00) | ((outputVal << 8) & 0x00ff0000) | ((outputVal << 24) & 0xff000000));
48 *inputPtr = outputVal;
54 #ifdef LV_HAVE_GENERIC
60 static inline void volk_32u_byteswap_generic(
uint32_t* intsToSwap,
unsigned int num_points){
64 for(point = 0; point < num_points; point++){
66 output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | ((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
78 #ifndef INCLUDED_volk_32u_byteswap_a_H
79 #define INCLUDED_volk_32u_byteswap_a_H
85 #include <emmintrin.h>
92 static inline void volk_32u_byteswap_a_sse2(
uint32_t* intsToSwap,
unsigned int num_points){
93 unsigned int number = 0;
96 __m128i input, byte1, byte2, byte3, byte4, output;
97 __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
98 __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
100 const uint64_t quarterPoints = num_points / 4;
101 for(;number < quarterPoints; number++){
103 input = _mm_load_si128((__m128i*)inputPtr);
105 byte1 = _mm_slli_epi32(input, 24);
106 byte2 = _mm_slli_epi32(input, 8);
107 byte3 = _mm_srli_epi32(input, 8);
108 byte4 = _mm_srli_epi32(input, 24);
110 output = _mm_or_si128(byte1, byte4);
111 byte2 = _mm_and_si128(byte2, byte2mask);
112 output = _mm_or_si128(output, byte2);
113 byte3 = _mm_and_si128(byte3, byte3mask);
114 output = _mm_or_si128(output, byte3);
116 _mm_store_si128((__m128i*)inputPtr, output);
121 number = quarterPoints*4;
122 for(; number < num_points; number++){
124 outputVal = (((outputVal >> 24) & 0xff) | ((outputVal >> 8) & 0x0000ff00) | ((outputVal << 8) & 0x00ff0000) | ((outputVal << 24) & 0xff000000));
125 *inputPtr = outputVal;
131 #ifdef LV_HAVE_GENERIC
137 static inline void volk_32u_byteswap_a_generic(
uint32_t* intsToSwap,
unsigned int num_points){
141 for(point = 0; point < num_points; point++){
143 output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | ((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
unsigned int uint32_t
Definition: stdint.h:80
unsigned __int64 uint64_t
Definition: stdint.h:90