GNU Radio 3.7.2 C++ API
volk_64u_byteswap.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_64u_byteswap_u_H
2 #define INCLUDED_volk_64u_byteswap_u_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 
7 #ifdef LV_HAVE_SSE2
8 #include <emmintrin.h>
9 
10 /*!
11  \brief Byteswaps (in-place) an aligned vector of int64_t's.
12  \param intsToSwap The vector of data to byte swap
13  \param numDataPoints The number of data points
14 */
15 static inline void volk_64u_byteswap_u_sse2(uint64_t* intsToSwap, unsigned int num_points){
16  uint32_t* inputPtr = (uint32_t*)intsToSwap;
17  __m128i input, byte1, byte2, byte3, byte4, output;
18  __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
19  __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
20  uint64_t number = 0;
21  const unsigned int halfPoints = num_points / 2;
22  for(;number < halfPoints; number++){
23  // Load the 32t values, increment inputPtr later since we're doing it in-place.
24  input = _mm_loadu_si128((__m128i*)inputPtr);
25 
26  // Do the four shifts
27  byte1 = _mm_slli_epi32(input, 24);
28  byte2 = _mm_slli_epi32(input, 8);
29  byte3 = _mm_srli_epi32(input, 8);
30  byte4 = _mm_srli_epi32(input, 24);
31  // Or bytes together
32  output = _mm_or_si128(byte1, byte4);
33  byte2 = _mm_and_si128(byte2, byte2mask);
34  output = _mm_or_si128(output, byte2);
35  byte3 = _mm_and_si128(byte3, byte3mask);
36  output = _mm_or_si128(output, byte3);
37 
38  // Reorder the two words
39  output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1));
40 
41  // Store the results
42  _mm_storeu_si128((__m128i*)inputPtr, output);
43  inputPtr += 4;
44  }
45 
46  // Byteswap any remaining points:
47  number = halfPoints*2;
48  for(; number < num_points; number++){
49  uint32_t output1 = *inputPtr;
50  uint32_t output2 = inputPtr[1];
51 
52  output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
53 
54  output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
55 
56  *inputPtr++ = output2;
57  *inputPtr++ = output1;
58  }
59 }
60 #endif /* LV_HAVE_SSE2 */
61 
62 #ifdef LV_HAVE_GENERIC
63 /*!
64  \brief Byteswaps (in-place) an aligned vector of int64_t's.
65  \param intsToSwap The vector of data to byte swap
66  \param numDataPoints The number of data points
67 */
68 static inline void volk_64u_byteswap_generic(uint64_t* intsToSwap, unsigned int num_points){
69  uint32_t* inputPtr = (uint32_t*)intsToSwap;
70  unsigned int point;
71  for(point = 0; point < num_points; point++){
72  uint32_t output1 = *inputPtr;
73  uint32_t output2 = inputPtr[1];
74 
75  output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
76 
77  output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
78 
79  *inputPtr++ = output2;
80  *inputPtr++ = output1;
81  }
82 }
83 #endif /* LV_HAVE_GENERIC */
84 
85 
86 
87 
88 #endif /* INCLUDED_volk_64u_byteswap_u_H */
89 #ifndef INCLUDED_volk_64u_byteswap_a_H
90 #define INCLUDED_volk_64u_byteswap_a_H
91 
92 #include <inttypes.h>
93 #include <stdio.h>
94 
95 #ifdef LV_HAVE_SSE2
96 #include <emmintrin.h>
97 
98 /*!
99  \brief Byteswaps (in-place) an aligned vector of int64_t's.
100  \param intsToSwap The vector of data to byte swap
101  \param numDataPoints The number of data points
102 */
103 static inline void volk_64u_byteswap_a_sse2(uint64_t* intsToSwap, unsigned int num_points){
104  uint32_t* inputPtr = (uint32_t*)intsToSwap;
105  __m128i input, byte1, byte2, byte3, byte4, output;
106  __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
107  __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
108  uint64_t number = 0;
109  const unsigned int halfPoints = num_points / 2;
110  for(;number < halfPoints; number++){
111  // Load the 32t values, increment inputPtr later since we're doing it in-place.
112  input = _mm_load_si128((__m128i*)inputPtr);
113 
114  // Do the four shifts
115  byte1 = _mm_slli_epi32(input, 24);
116  byte2 = _mm_slli_epi32(input, 8);
117  byte3 = _mm_srli_epi32(input, 8);
118  byte4 = _mm_srli_epi32(input, 24);
119  // Or bytes together
120  output = _mm_or_si128(byte1, byte4);
121  byte2 = _mm_and_si128(byte2, byte2mask);
122  output = _mm_or_si128(output, byte2);
123  byte3 = _mm_and_si128(byte3, byte3mask);
124  output = _mm_or_si128(output, byte3);
125 
126  // Reorder the two words
127  output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1));
128 
129  // Store the results
130  _mm_store_si128((__m128i*)inputPtr, output);
131  inputPtr += 4;
132  }
133 
134  // Byteswap any remaining points:
135  number = halfPoints*2;
136  for(; number < num_points; number++){
137  uint32_t output1 = *inputPtr;
138  uint32_t output2 = inputPtr[1];
139 
140  output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
141 
142  output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
143 
144  *inputPtr++ = output2;
145  *inputPtr++ = output1;
146  }
147 }
148 #endif /* LV_HAVE_SSE2 */
149 
150 #ifdef LV_HAVE_GENERIC
151 /*!
152  \brief Byteswaps (in-place) an aligned vector of int64_t's.
153  \param intsToSwap The vector of data to byte swap
154  \param numDataPoints The number of data points
155 */
156 static inline void volk_64u_byteswap_a_generic(uint64_t* intsToSwap, unsigned int num_points){
157  uint32_t* inputPtr = (uint32_t*)intsToSwap;
158  unsigned int point;
159  for(point = 0; point < num_points; point++){
160  uint32_t output1 = *inputPtr;
161  uint32_t output2 = inputPtr[1];
162 
163  output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
164 
165  output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
166 
167  *inputPtr++ = output2;
168  *inputPtr++ = output1;
169  }
170 }
171 #endif /* LV_HAVE_GENERIC */
172 
173 
174 
175 
176 #endif /* INCLUDED_volk_64u_byteswap_a_H */
unsigned int uint32_t
Definition: stdint.h:80
unsigned __int64 uint64_t
Definition: stdint.h:90