GNU Radio 3.7.3 C++ API
volk_32u_byteswap.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32u_byteswap_u_H
2 #define INCLUDED_volk_32u_byteswap_u_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 
7 #ifdef LV_HAVE_SSE2
8 #include <emmintrin.h>
9 
10 /*!
11  \brief Byteswaps (in-place) an aligned vector of int32_t's.
12  \param intsToSwap The vector of data to byte swap
13  \param numDataPoints The number of data points
14 */
15 static inline void volk_32u_byteswap_u_sse2(uint32_t* intsToSwap, unsigned int num_points){
16  unsigned int number = 0;
17 
18  uint32_t* inputPtr = intsToSwap;
19  __m128i input, byte1, byte2, byte3, byte4, output;
20  __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
21  __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
22 
23  const uint64_t quarterPoints = num_points / 4;
24  for(;number < quarterPoints; number++){
25  // Load the 32t values, increment inputPtr later since we're doing it in-place.
26  input = _mm_loadu_si128((__m128i*)inputPtr);
27  // Do the four shifts
28  byte1 = _mm_slli_epi32(input, 24);
29  byte2 = _mm_slli_epi32(input, 8);
30  byte3 = _mm_srli_epi32(input, 8);
31  byte4 = _mm_srli_epi32(input, 24);
32  // Or bytes together
33  output = _mm_or_si128(byte1, byte4);
34  byte2 = _mm_and_si128(byte2, byte2mask);
35  output = _mm_or_si128(output, byte2);
36  byte3 = _mm_and_si128(byte3, byte3mask);
37  output = _mm_or_si128(output, byte3);
38  // Store the results
39  _mm_storeu_si128((__m128i*)inputPtr, output);
40  inputPtr += 4;
41  }
42 
43  // Byteswap any remaining points:
44  number = quarterPoints*4;
45  for(; number < num_points; number++){
46  uint32_t outputVal = *inputPtr;
47  outputVal = (((outputVal >> 24) & 0xff) | ((outputVal >> 8) & 0x0000ff00) | ((outputVal << 8) & 0x00ff0000) | ((outputVal << 24) & 0xff000000));
48  *inputPtr = outputVal;
49  inputPtr++;
50  }
51 }
52 #endif /* LV_HAVE_SSE2 */
53 
54 #ifdef LV_HAVE_GENERIC
55 /*!
56  \brief Byteswaps (in-place) an aligned vector of int32_t's.
57  \param intsToSwap The vector of data to byte swap
58  \param numDataPoints The number of data points
59 */
60 static inline void volk_32u_byteswap_generic(uint32_t* intsToSwap, unsigned int num_points){
61  uint32_t* inputPtr = intsToSwap;
62 
63  unsigned int point;
64  for(point = 0; point < num_points; point++){
65  uint32_t output = *inputPtr;
66  output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | ((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
67 
68  *inputPtr = output;
69  inputPtr++;
70  }
71 }
72 #endif /* LV_HAVE_GENERIC */
73 
74 
75 
76 
77 #endif /* INCLUDED_volk_32u_byteswap_u_H */
78 #ifndef INCLUDED_volk_32u_byteswap_a_H
79 #define INCLUDED_volk_32u_byteswap_a_H
80 
81 #include <inttypes.h>
82 #include <stdio.h>
83 
84 #ifdef LV_HAVE_SSE2
85 #include <emmintrin.h>
86 
87 /*!
88  \brief Byteswaps (in-place) an aligned vector of int32_t's.
89  \param intsToSwap The vector of data to byte swap
90  \param numDataPoints The number of data points
91 */
92 static inline void volk_32u_byteswap_a_sse2(uint32_t* intsToSwap, unsigned int num_points){
93  unsigned int number = 0;
94 
95  uint32_t* inputPtr = intsToSwap;
96  __m128i input, byte1, byte2, byte3, byte4, output;
97  __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
98  __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
99 
100  const uint64_t quarterPoints = num_points / 4;
101  for(;number < quarterPoints; number++){
102  // Load the 32t values, increment inputPtr later since we're doing it in-place.
103  input = _mm_load_si128((__m128i*)inputPtr);
104  // Do the four shifts
105  byte1 = _mm_slli_epi32(input, 24);
106  byte2 = _mm_slli_epi32(input, 8);
107  byte3 = _mm_srli_epi32(input, 8);
108  byte4 = _mm_srli_epi32(input, 24);
109  // Or bytes together
110  output = _mm_or_si128(byte1, byte4);
111  byte2 = _mm_and_si128(byte2, byte2mask);
112  output = _mm_or_si128(output, byte2);
113  byte3 = _mm_and_si128(byte3, byte3mask);
114  output = _mm_or_si128(output, byte3);
115  // Store the results
116  _mm_store_si128((__m128i*)inputPtr, output);
117  inputPtr += 4;
118  }
119 
120  // Byteswap any remaining points:
121  number = quarterPoints*4;
122  for(; number < num_points; number++){
123  uint32_t outputVal = *inputPtr;
124  outputVal = (((outputVal >> 24) & 0xff) | ((outputVal >> 8) & 0x0000ff00) | ((outputVal << 8) & 0x00ff0000) | ((outputVal << 24) & 0xff000000));
125  *inputPtr = outputVal;
126  inputPtr++;
127  }
128 }
129 #endif /* LV_HAVE_SSE2 */
130 
131 #ifdef LV_HAVE_GENERIC
132 /*!
133  \brief Byteswaps (in-place) an aligned vector of int32_t's.
134  \param intsToSwap The vector of data to byte swap
135  \param numDataPoints The number of data points
136 */
137 static inline void volk_32u_byteswap_a_generic(uint32_t* intsToSwap, unsigned int num_points){
138  uint32_t* inputPtr = intsToSwap;
139 
140  unsigned int point;
141  for(point = 0; point < num_points; point++){
142  uint32_t output = *inputPtr;
143  output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | ((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
144 
145  *inputPtr = output;
146  inputPtr++;
147  }
148 }
149 #endif /* LV_HAVE_GENERIC */
150 
151 
152 
153 
154 #endif /* INCLUDED_volk_32u_byteswap_a_H */
unsigned int uint32_t
Definition: stdint.h:80
unsigned __int64 uint64_t
Definition: stdint.h:90