summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--volk/include/volk/volk_16i_max_star_16i_a.h8
-rw-r--r--volk/include/volk/volk_16i_max_star_horizontal_16i_a.h8
-rw-r--r--volk/include/volk/volk_16i_permute_and_scalar_add_a.h7
-rw-r--r--volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h9
-rw-r--r--volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h8
-rw-r--r--volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h7
-rw-r--r--volk/include/volk/volk_32fc_index_max_16u_a.h9
-rw-r--r--volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h12
-rw-r--r--volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h8
-rw-r--r--volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h20
-rw-r--r--volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h8
-rw-r--r--volk/include/volk/volk_32fc_x2_square_dist_32f_a.h8
12 files changed, 77 insertions, 35 deletions
diff --git a/volk/include/volk/volk_16i_max_star_16i_a.h b/volk/include/volk/volk_16i_max_star_16i_a.h
index edfff8a82b..ca81cf0d62 100644
--- a/volk/include/volk/volk_16i_max_star_16i_a.h
+++ b/volk/include/volk/volk_16i_max_star_16i_a.h
@@ -12,9 +12,9 @@
#include<emmintrin.h>
#include<tmmintrin.h>
-static inline void volk_16i_max_star_16i_a_ssse3(short* target, short* src0, unsigned int num_bytes) {
-
+static inline void volk_16i_max_star_16i_a_ssse3(short* target, short* src0, unsigned int num_points) {
+ const unsigned int num_bytes = num_points*2;
short candidate = src0[0];
short cands[8];
@@ -87,7 +87,9 @@ static inline void volk_16i_max_star_16i_a_ssse3(short* target, short* src0, un
#ifdef LV_HAVE_GENERIC
-static inline void volk_16i_max_star_16i_a_generic(short* target, short* src0, unsigned int num_bytes) {
+static inline void volk_16i_max_star_16i_a_generic(short* target, short* src0, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*2;
int i = 0;
diff --git a/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
index c1c9084256..13c235bc0b 100644
--- a/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
+++ b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
@@ -13,7 +13,9 @@
#include<emmintrin.h>
#include<tmmintrin.h>
-static inline void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, int16_t* src0, unsigned int num_bytes) {
+static inline void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, int16_t* src0, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*2;
const static uint8_t shufmask0[16] = {0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
const static uint8_t shufmask1[16] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d};
@@ -110,7 +112,9 @@ static inline void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, in
#ifdef LV_HAVE_GENERIC
-static inline void volk_16i_max_star_horizontal_16i_a_generic(int16_t* target, int16_t* src0, unsigned int num_bytes) {
+static inline void volk_16i_max_star_horizontal_16i_a_generic(int16_t* target, int16_t* src0, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*2;
int i = 0;
diff --git a/volk/include/volk/volk_16i_permute_and_scalar_add_a.h b/volk/include/volk/volk_16i_permute_and_scalar_add_a.h
index 47e3cbf9cb..d91b36208a 100644
--- a/volk/include/volk/volk_16i_permute_and_scalar_add_a.h
+++ b/volk/include/volk/volk_16i_permute_and_scalar_add_a.h
@@ -13,8 +13,9 @@
#include<xmmintrin.h>
#include<emmintrin.h>
-static inline void volk_16i_permute_and_scalar_add_a_sse2(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) {
+static inline void volk_16i_permute_and_scalar_add_a_sse2(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_points) {
+ const unsigned int num_bytes = num_points*2;
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
@@ -117,7 +118,9 @@ static inline void volk_16i_permute_and_scalar_add_a_sse2(short* target, short
#ifdef LV_HAVE_GENERIC
-static inline void volk_16i_permute_and_scalar_add_a_generic(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) {
+static inline void volk_16i_permute_and_scalar_add_a_generic(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*2;
int i = 0;
diff --git a/volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h
index 0d84985530..18b2e3d845 100644
--- a/volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h
+++ b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h
@@ -13,10 +13,9 @@
#include<emmintrin.h>
-static inline void volk_16i_x4_quad_max_star_16i_a_sse2(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) {
-
-
+static inline void volk_16i_x4_quad_max_star_16i_a_sse2(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_points) {
+ const unsigned int num_bytes = num_points*2;
int i = 0;
@@ -168,7 +167,9 @@ static inline void volk_16i_x4_quad_max_star_16i_a_sse2(short* target, short* s
#ifdef LV_HAVE_GENERIC
-static inline void volk_16i_x4_quad_max_star_16i_a_generic(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) {
+static inline void volk_16i_x4_quad_max_star_16i_a_generic(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*2;
int i = 0;
diff --git a/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h
index 5560b92d92..677cb40e9f 100644
--- a/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h
+++ b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h
@@ -13,7 +13,9 @@
#include<xmmintrin.h>
#include<emmintrin.h>
-static inline void volk_16i_x5_add_quad_16i_x4_a_sse2(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) {
+static inline void volk_16i_x5_add_quad_16i_x4_a_sse2(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*2;
__m128i xmm0, xmm1, xmm2, xmm3, xmm4;
__m128i *p_target0, *p_target1, *p_target2, *p_target3, *p_src0, *p_src1, *p_src2, *p_src3, *p_src4;
@@ -113,7 +115,9 @@ static inline void volk_16i_x5_add_quad_16i_x4_a_sse2(short* target0, short* ta
#ifdef LV_HAVE_GENERIC
-static inline void volk_16i_x5_add_quad_16i_x4_a_generic(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) {
+static inline void volk_16i_x5_add_quad_16i_x4_a_generic(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*2;
int i = 0;
diff --git a/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h
index 3c530628c8..e33e5a916a 100644
--- a/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h
+++ b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h
@@ -13,8 +13,9 @@
#include<xmmintrin.h>
#include<pmmintrin.h>
-static inline void volk_32f_x3_sum_of_poly_32f_a_sse3(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) {
+static inline void volk_32f_x3_sum_of_poly_32f_a_sse3(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_points) {
+ const unsigned int num_bytes = num_points*4;
float result = 0.0;
float fst = 0.0;
@@ -100,9 +101,9 @@ static inline void volk_32f_x3_sum_of_poly_32f_a_sse3(float* target, float* src0
#ifdef LV_HAVE_GENERIC
-static inline void volk_32f_x3_sum_of_poly_32f_a_generic(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) {
-
+static inline void volk_32f_x3_sum_of_poly_32f_a_generic(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_points) {
+ const unsigned int num_bytes = num_points*4;
float result = 0.0;
float fst = 0.0;
diff --git a/volk/include/volk/volk_32fc_index_max_16u_a.h b/volk/include/volk/volk_32fc_index_max_16u_a.h
index 842a6a0420..0e2201152c 100644
--- a/volk/include/volk/volk_32fc_index_max_16u_a.h
+++ b/volk/include/volk/volk_32fc_index_max_16u_a.h
@@ -11,9 +11,9 @@
#include<pmmintrin.h>
-static inline void volk_32fc_index_max_16u_a_sse3(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) {
-
+static inline void volk_32fc_index_max_16u_a_sse3(unsigned int* target, lv_32fc_t* src0, unsigned int num_points) {
+ const unsigned int num_bytes = num_points*8;
union bit128 holderf;
union bit128 holderi;
@@ -189,7 +189,10 @@ static inline void volk_32fc_index_max_16u_a_sse3(unsigned int* target, lv_32fc_
#endif /*LV_HAVE_SSE3*/
#ifdef LV_HAVE_GENERIC
-static inline void volk_32fc_index_max_16u_a_generic(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) {
+static inline void volk_32fc_index_max_16u_a_generic(unsigned int* target, lv_32fc_t* src0, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*8;
+
float sq_dist = 0.0;
float max = 0.0;
unsigned int index = 0;
diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h
index e3dedf2fcd..0deb9c2f90 100644
--- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h
+++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h
@@ -9,7 +9,9 @@
#ifdef LV_HAVE_GENERIC
-static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*8;
float * res = (float*) result;
float * in = (float*) input;
@@ -63,7 +65,9 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_generic(lv_32fc_t* res
#if LV_HAVE_SSE && LV_HAVE_64
-static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*8;
__VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000};
@@ -204,7 +208,9 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse(lv_32fc_t* result,
#endif
#if LV_HAVE_SSE && LV_HAVE_32
-static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*8;
__VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000};
diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
index e7493413f7..5b16b8639a 100644
--- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
+++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
@@ -8,7 +8,9 @@
#ifdef LV_HAVE_GENERIC
-static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*8;
float * res = (float*) result;
float * in = (float*) input;
@@ -64,7 +66,9 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_generic(lv_32fc_t* res
#include <mmintrin.h>
-static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ unsigned int num_bytes = num_points*8;
// Variable never used?
//__VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000};
diff --git a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h
index caef3e6f0d..10ff4080ed 100644
--- a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h
+++ b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h
@@ -10,7 +10,9 @@
#ifdef LV_HAVE_GENERIC
-static inline void volk_32fc_x2_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*8;
float * res = (float*) result;
float * in = (float*) input;
@@ -46,8 +48,9 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_generic(lv_32fc_t* result, const
#if LV_HAVE_SSE && LV_HAVE_64
-static inline void volk_32fc_x2_dot_prod_32fc_a_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_dot_prod_32fc_a_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+ const unsigned int num_bytes = num_points*8;
asm
(
@@ -175,11 +178,11 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_sse_64(lv_32fc_t* result, const
#if LV_HAVE_SSE && LV_HAVE_32
-static inline void volk_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
-
- volk_32fc_x2_dot_prod_32fc_a_generic(result, input, taps, num_bytes);
+static inline void volk_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+ volk_32fc_x2_dot_prod_32fc_a_generic(result, input, taps, num_points);
#if 0
+ const unsigned int num_bytes = num_points*8;
asm volatile
(
" #pushl %%ebp\n\t"
@@ -299,8 +302,9 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const
#include <pmmintrin.h>
-static inline void volk_32fc_x2_dot_prod_32fc_a_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_dot_prod_32fc_a_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+ const unsigned int num_bytes = num_points*8;
lv_32fc_t dotProduct;
memset(&dotProduct, 0x0, 2*sizeof(float));
@@ -356,7 +360,9 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_sse3(lv_32fc_t* result, const lv
#include <smmintrin.h>
-static inline void volk_32fc_x2_dot_prod_32fc_a_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_dot_prod_32fc_a_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*8;
__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1;
float *p_input, *p_taps;
diff --git a/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h
index 75eb9173d5..d985fcd7f5 100644
--- a/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h
+++ b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h
@@ -10,8 +10,9 @@
#include<xmmintrin.h>
#include<pmmintrin.h>
-static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) {
+static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_points) {
+ const unsigned int num_bytes = num_points*8;
__m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
@@ -106,7 +107,10 @@ static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_sse3(float* t
#endif /*LV_HAVE_SSE3*/
#ifdef LV_HAVE_GENERIC
-static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) {
+static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*8;
+
lv_32fc_t diff;
float sq_dist;
unsigned int i = 0;
diff --git a/volk/include/volk/volk_32fc_x2_square_dist_32f_a.h b/volk/include/volk/volk_32fc_x2_square_dist_32f_a.h
index b819eaffd4..a10b6702bb 100644
--- a/volk/include/volk/volk_32fc_x2_square_dist_32f_a.h
+++ b/volk/include/volk/volk_32fc_x2_square_dist_32f_a.h
@@ -9,8 +9,9 @@
#include<xmmintrin.h>
#include<pmmintrin.h>
-static inline void volk_32fc_x2_square_dist_32f_a_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) {
+static inline void volk_32fc_x2_square_dist_32f_a_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_points) {
+ const unsigned int num_bytes = num_points*8;
__m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
@@ -92,7 +93,10 @@ static inline void volk_32fc_x2_square_dist_32f_a_sse3(float* target, lv_32fc_t*
#endif /*LV_HAVE_SSE3*/
#ifdef LV_HAVE_GENERIC
-static inline void volk_32fc_x2_square_dist_32f_a_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) {
+static inline void volk_32fc_x2_square_dist_32f_a_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*8;
+
lv_32fc_t diff;
float sq_dist;
unsigned int i = 0;