GNU Radio Manual and C++ API Reference  3.7.5.1
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
volk_32f_binary_slicer_8i.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_binary_slicer_8i_H
2 #define INCLUDED_volk_32f_binary_slicer_8i_H
3 
4 
5 #ifdef LV_HAVE_GENERIC
6 /*!
7  \brief Returns integer 1 if float input is greater than or equal to 0, 1 otherwise
8  \param cVector The char (int8_t) output (either 0 or 1)
9  \param aVector The float input
10  \param num_points The number of values in aVector and stored into cVector
11 */
12 static inline void
13 volk_32f_binary_slicer_8i_generic(int8_t* cVector, const float* aVector,
14  unsigned int num_points)
15 {
16  int8_t* cPtr = cVector;
17  const float* aPtr = aVector;
18  unsigned int number = 0;
19 
20  for(number = 0; number < num_points; number++) {
21  if(*aPtr++ >= 0) {
22  *cPtr++ = 1;
23  }
24  else {
25  *cPtr++ = 0;
26  }
27  }
28 }
29 #endif /* LV_HAVE_GENERIC */
30 
31 
32 #ifdef LV_HAVE_GENERIC
33 /*!
34  \brief Returns integer 1 if float input is greater than or equal to 0, 1 otherwise
35  \param cVector The char (int8_t) output (either 0 or 1)
36  \param aVector The float input
37  \param num_points The number of values in aVector and stored into cVector
38 */
39 static inline void
40 volk_32f_binary_slicer_8i_generic_branchless(int8_t* cVector, const float* aVector,
41  unsigned int num_points)
42 {
43  int8_t* cPtr = cVector;
44  const float* aPtr = aVector;
45  unsigned int number = 0;
46 
47  for(number = 0; number < num_points; number++){
48  *cPtr++ = (*aPtr++ >= 0);
49  }
50 }
51 #endif /* LV_HAVE_GENERIC */
52 
53 
54 #ifdef LV_HAVE_SSE2
55 #include <emmintrin.h>
56 /*!
57  \brief Returns integer 1 if float input is greater than or equal to 0, 1 otherwise
58  \param cVector The char (int8_t) output (either 0 or 1)
59  \param aVector The float input
60  \param num_points The number of values in aVector and stored into cVector
61 */
62 static inline void
63 volk_32f_binary_slicer_8i_a_sse2(int8_t* cVector, const float* aVector,
64  unsigned int num_points)
65 {
66  int8_t* cPtr = cVector;
67  const float* aPtr = aVector;
68  unsigned int number = 0;
69 
70  unsigned int n16points = num_points / 16;
71  __m128 a0_val, a1_val, a2_val, a3_val;
72  __m128 res0_f, res1_f, res2_f, res3_f;
73  __m128i res0_i, res1_i, res2_i, res3_i;
74  __m128 zero_val;
75  zero_val = _mm_set1_ps(0.0f);
76 
77  for(number = 0; number < n16points; number++) {
78  a0_val = _mm_load_ps(aPtr);
79  a1_val = _mm_load_ps(aPtr+4);
80  a2_val = _mm_load_ps(aPtr+8);
81  a3_val = _mm_load_ps(aPtr+12);
82 
83  // compare >= 0; return float
84  res0_f = _mm_cmpge_ps(a0_val, zero_val);
85  res1_f = _mm_cmpge_ps(a1_val, zero_val);
86  res2_f = _mm_cmpge_ps(a2_val, zero_val);
87  res3_f = _mm_cmpge_ps(a3_val, zero_val);
88 
89  // convert to 32i and >> 31
90  res0_i = _mm_srli_epi32(_mm_cvtps_epi32(res0_f), 31);
91  res1_i = _mm_srli_epi32(_mm_cvtps_epi32(res1_f), 31);
92  res2_i = _mm_srli_epi32(_mm_cvtps_epi32(res2_f), 31);
93  res3_i = _mm_srli_epi32(_mm_cvtps_epi32(res3_f), 31);
94 
95  // pack into 16-bit results
96  res0_i = _mm_packs_epi32(res0_i, res1_i);
97  res2_i = _mm_packs_epi32(res2_i, res3_i);
98 
99  // pack into 8-bit results
100  res0_i = _mm_packs_epi16(res0_i, res2_i);
101 
102  _mm_store_si128((__m128i*)cPtr, res0_i);
103 
104  cPtr += 16;
105  aPtr += 16;
106  }
107 
108  for(number = n16points * 16; number < num_points; number++) {
109  if( *aPtr++ >= 0) {
110  *cPtr++ = 1;
111  }
112  else {
113  *cPtr++ = 0;
114  }
115  }
116 }
117 #endif /* LV_HAVE_SSE2 */
118 
119 
120 
121 #ifdef LV_HAVE_SSE2
122 #include <emmintrin.h>
123 /*!
124  \brief Returns integer 1 if float input is greater than or equal to 0, 1 otherwise
125  \param cVector The char (int8_t) output (either 0 or 1)
126  \param aVector The float input
127  \param num_points The number of values in aVector and stored into cVector
128 */
129 static inline void
130 volk_32f_binary_slicer_8i_u_sse2(int8_t* cVector, const float* aVector,
131  unsigned int num_points)
132 {
133  int8_t* cPtr = cVector;
134  const float* aPtr = aVector;
135  unsigned int number = 0;
136 
137  unsigned int n16points = num_points / 16;
138  __m128 a0_val, a1_val, a2_val, a3_val;
139  __m128 res0_f, res1_f, res2_f, res3_f;
140  __m128i res0_i, res1_i, res2_i, res3_i;
141  __m128 zero_val;
142  zero_val = _mm_set1_ps (0.0f);
143 
144  for(number = 0; number < n16points; number++) {
145  a0_val = _mm_loadu_ps(aPtr);
146  a1_val = _mm_loadu_ps(aPtr+4);
147  a2_val = _mm_loadu_ps(aPtr+8);
148  a3_val = _mm_loadu_ps(aPtr+12);
149 
150  // compare >= 0; return float
151  res0_f = _mm_cmpge_ps(a0_val, zero_val);
152  res1_f = _mm_cmpge_ps(a1_val, zero_val);
153  res2_f = _mm_cmpge_ps(a2_val, zero_val);
154  res3_f = _mm_cmpge_ps(a3_val, zero_val);
155 
156  // convert to 32i and >> 31
157  res0_i = _mm_srli_epi32(_mm_cvtps_epi32(res0_f), 31);
158  res1_i = _mm_srli_epi32(_mm_cvtps_epi32(res1_f), 31);
159  res2_i = _mm_srli_epi32(_mm_cvtps_epi32(res2_f), 31);
160  res3_i = _mm_srli_epi32(_mm_cvtps_epi32(res3_f), 31);
161 
162  // pack into 16-bit results
163  res0_i = _mm_packs_epi32(res0_i, res1_i);
164  res2_i = _mm_packs_epi32(res2_i, res3_i);
165 
166  // pack into 8-bit results
167  res0_i = _mm_packs_epi16(res0_i, res2_i);
168 
169  _mm_storeu_si128((__m128i*)cPtr, res0_i);
170 
171  cPtr += 16;
172  aPtr += 16;
173  }
174 
175  for(number = n16points * 16; number < num_points; number++) {
176  if( *aPtr++ >= 0) {
177  *cPtr++ = 1;
178  }
179  else {
180  *cPtr++ = 0;
181  }
182  }
183 }
184 #endif /* LV_HAVE_SSE2 */
185 
186 
187 #endif /* INCLUDED_volk_32f_binary_slicer_8i_H */
signed char int8_t
Definition: stdint.h:75