diff options
author | Tom Rondeau <tom@trondeau.com> | 2014-07-29 11:19:25 -0400 |
---|---|---|
committer | Tom Rondeau <tom@trondeau.com> | 2014-07-29 19:27:39 -0400 |
commit | 63483459092aedc5a81354dd28454416a7e176a5 (patch) | |
tree | df7ba844d4c61a8bdcd52989cfa5ddc165def3d3 | |
parent | db2c447c4295ad3327e9059545a241e3e3f9f2f5 (diff) |
volk: cleaned up some sign compare warnings.
-rw-r--r-- | volk/kernels/volk/volk_16i_x5_add_quad_16i_x4.h | 5 | ||||
-rw-r--r-- | volk/kernels/volk/volk_32f_x3_sum_of_poly_32f.h | 20 |
2 files changed, 11 insertions, 14 deletions
diff --git a/volk/kernels/volk/volk_16i_x5_add_quad_16i_x4.h b/volk/kernels/volk/volk_16i_x5_add_quad_16i_x4.h index 28575b6282..cd1952bccb 100644 --- a/volk/kernels/volk/volk_16i_x5_add_quad_16i_x4.h +++ b/volk/kernels/volk/volk_16i_x5_add_quad_16i_x4.h @@ -6,9 +6,6 @@ #include<stdio.h> - - - #ifdef LV_HAVE_SSE2 #include<xmmintrin.h> #include<emmintrin.h> @@ -117,7 +114,7 @@ static inline void volk_16i_x5_add_quad_16i_x4_a_sse2(short* target0, short* ta static inline void volk_16i_x5_add_quad_16i_x4_neon(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_points) { const unsigned int eighth_points = num_points / 8; - int number = 0; + unsigned int number = 0; int16x8_t src0_vec, src1_vec, src2_vec, src3_vec, src4_vec; int16x8_t target0_vec, target1_vec, target2_vec, target3_vec; diff --git a/volk/kernels/volk/volk_32f_x3_sum_of_poly_32f.h b/volk/kernels/volk/volk_32f_x3_sum_of_poly_32f.h index 3a5c710c6f..d56623115e 100644 --- a/volk/kernels/volk/volk_32f_x3_sum_of_poly_32f.h +++ b/volk/kernels/volk/volk_32f_x3_sum_of_poly_32f.h @@ -298,7 +298,7 @@ static inline void volk_32f_x3_sum_of_poly_32f_u_avx(float* target, float* src0, static inline void volk_32f_x3_sum_of_poly_32f_a_neon(float* __restrict target, float* __restrict src0, float* __restrict center_point_array, float* __restrict cutoff, unsigned int num_points) { - int i; + unsigned int i; float zero[4] = {0.0f, 0.0f, 0.0f, 0.0f }; float32x2_t x_to_1, x_to_2, x_to_3, x_to_4; @@ -307,7 +307,7 @@ static inline void volk_32f_x3_sum_of_poly_32f_a_neon(float* __restrict target, float32x4_t x_qvector, c_qvector, cpa_qvector; float accumulator; float res_accumulators[4]; - + c_qvector = vld1q_f32( zero ); // load the cutoff in to a vector cutoff_vector = vdup_n_f32( *cutoff ); @@ -327,17 +327,17 @@ static inline void volk_32f_x3_sum_of_poly_32f_a_neon(float* __restrict target, x_low = vzip_f32(x_to_1, x_to_2); // [x^2 | x^1 || x^2 | x^1] x_high = vzip_f32(x_to_3, x_to_4); // [x^4 | x^3 || x^4 | x^3] // float32x4_t vcombine_f32(float32x2_t low, float32x2_t high); // VMOV d0,d0 - x_qvector = vcombine_f32(x_low.val[0], x_high.val[0]); + x_qvector = vcombine_f32(x_low.val[0], x_high.val[0]); // now we finally have [x^4 | x^3 | x^2 | x] ! - + c_qvector = vmlaq_f32(c_qvector, x_qvector, cpa_qvector); } // there should be better vector reduction techniques vst1q_f32(res_accumulators, c_qvector ); - accumulator = res_accumulators[0] + res_accumulators[1] + + accumulator = res_accumulators[0] + res_accumulators[1] + res_accumulators[2] + res_accumulators[3]; - + *target = accumulator + center_point_array[4] * (float)num_points; } @@ -348,11 +348,11 @@ static inline void volk_32f_x3_sum_of_poly_32f_a_neon(float* __restrict target, static inline void volk_32f_x3_sum_of_poly_32f_neonvert(float* __restrict target, float* __restrict src0, float* __restrict center_point_array, float* __restrict cutoff, unsigned int num_points) { - int i; + unsigned int i; float zero[4] = {0.0f, 0.0f, 0.0f, 0.0f }; float accumulator; - + float32x4_t accumulator1_vec, accumulator2_vec, accumulator3_vec, accumulator4_vec; accumulator1_vec = vld1q_f32(zero); @@ -373,7 +373,7 @@ static inline void volk_32f_x3_sum_of_poly_32f_neonvert(float* __restrict target // nathan is not sure why this is slower *and* wrong compared to neonvertfma for(i=0; i < num_points/4; ++i) { - // load x + // load x x_to_1 = vld1q_f32( src0 ); // Get a vector of max(src0, cutoff) @@ -398,7 +398,7 @@ static inline void volk_32f_x3_sum_of_poly_32f_neonvert(float* __restrict target __VOLK_ATTR_ALIGNED(32) float res_accumulators[4]; vst1q_f32(res_accumulators, accumulator1_vec ); - accumulator = res_accumulators[0] + res_accumulators[1] + + accumulator = res_accumulators[0] + res_accumulators[1] + res_accumulators[2] + res_accumulators[3]; float fst = 0.0; |