Statistics
| Branch: | Tag: | Revision:

root / volk / include / volk / volk_16sc_deinterleave_real_8s_aligned16.h @ 15ad4b53

History | View | Annotate | Download (3 kB)

1
#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H
2
#define INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H
3
4
#include <inttypes.h>
5
#include <stdio.h>
6
7
#if LV_HAVE_SSSE3
8
#include <tmmintrin.h>
9
/*!
10
  \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data
11
  \param complexVector The complex input vector
12
  \param iBuffer The I buffer output data
13
  \param num_points The number of complex data values to be deinterleaved
14
*/
15
static inline void volk_16sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
16
  unsigned int number = 0;
17
  const int8_t* complexVectorPtr = (int8_t*)complexVector;
18
  int8_t* iBufferPtr = iBuffer;
19
  __m128i iMoveMask1 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
20
  __m128i iMoveMask2 = _mm_set_epi8(13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
21
  __m128i complexVal1, complexVal2, complexVal3, complexVal4, iOutputVal;
22
23
  unsigned int sixteenthPoints = num_points / 16;
24
25
  for(number = 0; number < sixteenthPoints; number++){
26
    complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);  complexVectorPtr += 16;
27
    complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);  complexVectorPtr += 16;
28
29
    complexVal3 = _mm_load_si128((__m128i*)complexVectorPtr);  complexVectorPtr += 16;
30
    complexVal4 = _mm_load_si128((__m128i*)complexVectorPtr);  complexVectorPtr += 16;
31
32
    complexVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask1);
33
    complexVal2 = _mm_shuffle_epi8(complexVal2, iMoveMask2);
34
35
    complexVal1 = _mm_or_si128(complexVal1, complexVal2);
36
37
    complexVal3 = _mm_shuffle_epi8(complexVal3, iMoveMask1);
38
    complexVal4 = _mm_shuffle_epi8(complexVal4, iMoveMask2);
39
40
    complexVal3 = _mm_or_si128(complexVal3, complexVal4);
41
42
43
    complexVal1 = _mm_srai_epi16(complexVal1, 8);
44
    complexVal3 = _mm_srai_epi16(complexVal3, 8);
45
46
    iOutputVal = _mm_packs_epi16(complexVal1, complexVal3);
47
48
    _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
49
50
    iBufferPtr += 16;
51
  }
52
53
  number = sixteenthPoints * 16;
54
  int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
55
  for(; number < num_points; number++){
56
    *iBufferPtr++ = ((int8_t)(*int16ComplexVectorPtr++ / 256));
57
    int16ComplexVectorPtr++;
58
  }
59
}
60
#endif /* LV_HAVE_SSSE3 */
61
62
#if LV_HAVE_GENERIC
63
/*!
64
  \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data
65
  \param complexVector The complex input vector
66
  \param iBuffer The I buffer output data
67
  \param num_points The number of complex data values to be deinterleaved
68
*/
69
static inline void volk_16sc_deinterleave_real_8s_aligned16_generic(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
70
  unsigned int number = 0;
71
  const int16_t* complexVectorPtr = (int16_t*)complexVector;
72
  int8_t* iBufferPtr = iBuffer;
73
  for(number = 0; number < num_points; number++){
74
    *iBufferPtr++ = (int8_t)(*complexVectorPtr++ / 256);
75
    complexVectorPtr++;
76
  }
77
}
78
#endif /* LV_HAVE_GENERIC */
79
80
81
82
83
#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H */