1 #ifndef INCLUDED_volk_16u_byteswap_u_H
2 #define INCLUDED_volk_16u_byteswap_u_H
15 static inline void volk_16u_byteswap_u_sse2(
uint16_t* intsToSwap,
unsigned int num_points){
16 unsigned int number = 0;
18 __m128i input, left, right, output;
20 const unsigned int eighthPoints = num_points / 8;
21 for(;number < eighthPoints; number++){
23 input = _mm_loadu_si128((__m128i*)inputPtr);
25 left = _mm_slli_epi16(input, 8);
26 right = _mm_srli_epi16(input, 8);
28 output = _mm_or_si128(left, right);
30 _mm_storeu_si128((__m128i*)inputPtr, output);
35 number = eighthPoints*8;
36 for(; number < num_points; number++){
38 outputVal = (((outputVal >> 8) & 0xff) | ((outputVal << 8) & 0xff00));
39 *inputPtr = outputVal;
45 #ifdef LV_HAVE_GENERIC
51 static inline void volk_16u_byteswap_generic(
uint16_t* intsToSwap,
unsigned int num_points){
54 for(point = 0; point < num_points; point++){
56 output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
64 #ifndef INCLUDED_volk_16u_byteswap_a_H
65 #define INCLUDED_volk_16u_byteswap_a_H
71 #include <emmintrin.h>
78 static inline void volk_16u_byteswap_a_sse2(
uint16_t* intsToSwap,
unsigned int num_points){
79 unsigned int number = 0;
81 __m128i input, left, right, output;
83 const unsigned int eighthPoints = num_points / 8;
84 for(;number < eighthPoints; number++){
86 input = _mm_load_si128((__m128i*)inputPtr);
88 left = _mm_slli_epi16(input, 8);
89 right = _mm_srli_epi16(input, 8);
91 output = _mm_or_si128(left, right);
93 _mm_store_si128((__m128i*)inputPtr, output);
99 number = eighthPoints*8;
100 for(; number < num_points; number++){
102 outputVal = (((outputVal >> 8) & 0xff) | ((outputVal << 8) & 0xff00));
103 *inputPtr = outputVal;
110 #include <arm_neon.h>
116 static inline void volk_16u_byteswap_neon(
uint16_t* intsToSwap,
unsigned int num_points){
118 unsigned int eighth_points = num_points / 8;
119 uint16x8_t input, output;
122 for(number = 0; number < eighth_points; number++) {
123 input = vld1q_u16(inputPtr);
124 output = vsriq_n_u16(output, input, 8);
125 output = vsliq_n_u16(output, input, 8);
126 vst1q_u16(inputPtr, output);
130 for(number = eighth_points * 8; number < num_points; number++){
132 output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
139 #ifdef LV_HAVE_GENERIC
145 static inline void volk_16u_byteswap_a_generic(
uint16_t* intsToSwap,
unsigned int num_points){
148 for(point = 0; point < num_points; point++){
150 output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
163 extern void volk_16u_byteswap_a_orc_impl(
uint16_t* intsToSwap,
unsigned int num_points);
164 static inline void volk_16u_byteswap_u_orc(
uint16_t* intsToSwap,
unsigned int num_points){
165 volk_16u_byteswap_a_orc_impl(intsToSwap, num_points);
unsigned short uint16_t
Definition: stdint.h:79