1 #ifndef INCLUDED_volk_32f_binary_slicer_32i_H
2 #define INCLUDED_volk_32f_binary_slicer_32i_H
12 static inline void volk_32f_binary_slicer_32i_generic(
int* cVector,
const float* aVector,
unsigned int num_points){
14 const float* aPtr = aVector;
15 unsigned int number = 0;
17 for(number = 0; number < num_points; number++){
29 #ifdef LV_HAVE_GENERIC
36 static inline void volk_32f_binary_slicer_32i_generic_branchless(
int* cVector,
const float* aVector,
unsigned int num_points){
38 const float* aPtr = aVector;
39 unsigned int number = 0;
41 for(number = 0; number < num_points; number++){
42 *cPtr++ = (*aPtr++ >= 0);
49 #include <emmintrin.h>
56 static inline void volk_32f_binary_slicer_32i_a_sse2(
int* cVector,
const float* aVector,
unsigned int num_points){
58 const float* aPtr = aVector;
59 unsigned int number = 0;
61 unsigned int quarter_points = num_points / 4;
63 __m128i res_i, binary_i;
65 zero_val = _mm_set1_ps (0.0f);
67 for(number = 0; number < quarter_points; number++){
68 a_val = _mm_load_ps(aPtr);
70 res_f = _mm_cmpge_ps (a_val, zero_val);
71 res_i = _mm_cvtps_epi32 (res_f);
72 binary_i = _mm_srli_epi32 (res_i, 31);
75 _mm_store_si128((__m128i*)cPtr, binary_i);
82 for(number = quarter_points * 4; number < num_points; number++){
95 #include <immintrin.h>
102 static inline void volk_32f_binary_slicer_32i_a_avx(
int* cVector,
const float* aVector,
unsigned int num_points){
104 const float* aPtr = aVector;
105 unsigned int number = 0;
107 unsigned int quarter_points = num_points / 8;
108 __m256 a_val, res_f, binary_f;
110 __m256 zero_val, one_val;
111 zero_val = _mm256_set1_ps (0.0f);
112 one_val = _mm256_set1_ps (1.0f);
114 for(number = 0; number < quarter_points; number++){
115 a_val = _mm256_load_ps(aPtr);
117 res_f = _mm256_cmp_ps (a_val, zero_val, 13);
118 binary_f = _mm256_and_ps (res_f, one_val);
119 binary_i = _mm256_cvtps_epi32(binary_f);
123 _mm256_store_si256((__m256i *)cPtr, binary_i);
130 for(number = quarter_points * 8; number < num_points; number++){
143 #include <emmintrin.h>
150 static inline void volk_32f_binary_slicer_32i_u_sse2(
int* cVector,
const float* aVector,
unsigned int num_points){
152 const float* aPtr = aVector;
153 unsigned int number = 0;
155 unsigned int quarter_points = num_points / 4;
157 __m128i res_i, binary_i;
159 zero_val = _mm_set1_ps (0.0f);
161 for(number = 0; number < quarter_points; number++){
162 a_val = _mm_loadu_ps(aPtr);
164 res_f = _mm_cmpge_ps (a_val, zero_val);
165 res_i = _mm_cvtps_epi32 (res_f);
166 binary_i = _mm_srli_epi32 (res_i, 31);
169 _mm_storeu_si128((__m128i*)cPtr, binary_i);
176 for(number = quarter_points * 4; number < num_points; number++){
189 #include <immintrin.h>
196 static inline void volk_32f_binary_slicer_32i_u_avx(
int* cVector,
const float* aVector,
unsigned int num_points){
198 const float* aPtr = aVector;
199 unsigned int number = 0;
201 unsigned int quarter_points = num_points / 8;
202 __m256 a_val, res_f, binary_f;
204 __m256 zero_val, one_val;
205 zero_val = _mm256_set1_ps (0.0f);
206 one_val = _mm256_set1_ps (1.0f);
208 for(number = 0; number < quarter_points; number++){
209 a_val = _mm256_loadu_ps(aPtr);
211 res_f = _mm256_cmp_ps (a_val, zero_val, 13);
212 binary_f = _mm256_and_ps (res_f, one_val);
213 binary_i = _mm256_cvtps_epi32(binary_f);
217 _mm256_storeu_si256((__m256i*)cPtr, binary_i);
224 for(number = quarter_points * 8; number < num_points; number++){