1 #ifndef INCLUDED_volk_32f_binary_slicer_8i_H
2 #define INCLUDED_volk_32f_binary_slicer_8i_H
13 volk_32f_binary_slicer_8i_generic(
int8_t* cVector,
const float* aVector,
14 unsigned int num_points)
17 const float* aPtr = aVector;
18 unsigned int number = 0;
20 for(number = 0; number < num_points; number++) {
32 #ifdef LV_HAVE_GENERIC
40 volk_32f_binary_slicer_8i_generic_branchless(
int8_t* cVector,
const float* aVector,
41 unsigned int num_points)
44 const float* aPtr = aVector;
45 unsigned int number = 0;
47 for(number = 0; number < num_points; number++){
48 *cPtr++ = (*aPtr++ >= 0);
55 #include <emmintrin.h>
63 volk_32f_binary_slicer_8i_a_sse2(
int8_t* cVector,
const float* aVector,
64 unsigned int num_points)
67 const float* aPtr = aVector;
68 unsigned int number = 0;
70 unsigned int n16points = num_points / 16;
71 __m128 a0_val, a1_val, a2_val, a3_val;
72 __m128 res0_f, res1_f, res2_f, res3_f;
73 __m128i res0_i, res1_i, res2_i, res3_i;
75 zero_val = _mm_set1_ps(0.0f);
77 for(number = 0; number < n16points; number++) {
78 a0_val = _mm_load_ps(aPtr);
79 a1_val = _mm_load_ps(aPtr+4);
80 a2_val = _mm_load_ps(aPtr+8);
81 a3_val = _mm_load_ps(aPtr+12);
84 res0_f = _mm_cmpge_ps(a0_val, zero_val);
85 res1_f = _mm_cmpge_ps(a1_val, zero_val);
86 res2_f = _mm_cmpge_ps(a2_val, zero_val);
87 res3_f = _mm_cmpge_ps(a3_val, zero_val);
90 res0_i = _mm_srli_epi32(_mm_cvtps_epi32(res0_f), 31);
91 res1_i = _mm_srli_epi32(_mm_cvtps_epi32(res1_f), 31);
92 res2_i = _mm_srli_epi32(_mm_cvtps_epi32(res2_f), 31);
93 res3_i = _mm_srli_epi32(_mm_cvtps_epi32(res3_f), 31);
96 res0_i = _mm_packs_epi32(res0_i, res1_i);
97 res2_i = _mm_packs_epi32(res2_i, res3_i);
100 res0_i = _mm_packs_epi16(res0_i, res2_i);
102 _mm_store_si128((__m128i*)cPtr, res0_i);
108 for(number = n16points * 16; number < num_points; number++) {
122 #include <emmintrin.h>
130 volk_32f_binary_slicer_8i_u_sse2(
int8_t* cVector,
const float* aVector,
131 unsigned int num_points)
134 const float* aPtr = aVector;
135 unsigned int number = 0;
137 unsigned int n16points = num_points / 16;
138 __m128 a0_val, a1_val, a2_val, a3_val;
139 __m128 res0_f, res1_f, res2_f, res3_f;
140 __m128i res0_i, res1_i, res2_i, res3_i;
142 zero_val = _mm_set1_ps (0.0f);
144 for(number = 0; number < n16points; number++) {
145 a0_val = _mm_loadu_ps(aPtr);
146 a1_val = _mm_loadu_ps(aPtr+4);
147 a2_val = _mm_loadu_ps(aPtr+8);
148 a3_val = _mm_loadu_ps(aPtr+12);
151 res0_f = _mm_cmpge_ps(a0_val, zero_val);
152 res1_f = _mm_cmpge_ps(a1_val, zero_val);
153 res2_f = _mm_cmpge_ps(a2_val, zero_val);
154 res3_f = _mm_cmpge_ps(a3_val, zero_val);
157 res0_i = _mm_srli_epi32(_mm_cvtps_epi32(res0_f), 31);
158 res1_i = _mm_srli_epi32(_mm_cvtps_epi32(res1_f), 31);
159 res2_i = _mm_srli_epi32(_mm_cvtps_epi32(res2_f), 31);
160 res3_i = _mm_srli_epi32(_mm_cvtps_epi32(res3_f), 31);
163 res0_i = _mm_packs_epi32(res0_i, res1_i);
164 res2_i = _mm_packs_epi32(res2_i, res3_i);
167 res0_i = _mm_packs_epi16(res0_i, res2_i);
169 _mm_storeu_si128((__m128i*)cPtr, res0_i);
175 for(number = n16points * 16; number < num_points; number++) {
signed char int8_t
Definition: stdint.h:75