summaryrefslogtreecommitdiff
path: root/volk
diff options
context:
space:
mode:
authorNathan West <nathan.west@okstate.edu>2014-10-28 05:36:57 -0500
committerNathan West <nathan.west@okstate.edu>2014-10-28 05:36:57 -0500
commit5d1c29c4491a2fcefc69857a7f2f557f6b8eff57 (patch)
tree8405d5d00bf6a30d517a4ea771bf304bd04c994f /volk
parenteba094a3010ca326da747390be51da8326fff0c1 (diff)
volk: fix memory overrun/corruption in neon binary_slicer_8i
Diffstat (limited to 'volk')
-rw-r--r--volk/kernels/volk/volk_32f_binary_slicer_8i.h10
1 files changed, 5 insertions, 5 deletions
diff --git a/volk/kernels/volk/volk_32f_binary_slicer_8i.h b/volk/kernels/volk/volk_32f_binary_slicer_8i.h
index 17b48d7510..ae4420b6e1 100644
--- a/volk/kernels/volk/volk_32f_binary_slicer_8i.h
+++ b/volk/kernels/volk/volk_32f_binary_slicer_8i.h
@@ -221,7 +221,7 @@ volk_32f_binary_slicer_8i_neon(int8_t* cVector, const float* aVector,
int8_t* cPtr = cVector;
const float* aPtr = aVector;
unsigned int number = 0;
- unsigned int n8points = num_points / 8;
+ unsigned int n16points = num_points / 16;
float32x4x2_t input_val0, input_val1;
float32x4_t zero_val;
@@ -237,7 +237,7 @@ volk_32f_binary_slicer_8i_neon(int8_t* cVector, const float* aVector,
// TODO: this is a good candidate for asm because the vcombines
// can be eliminated simply by picking dst registers that are
// adjacent.
- for(number = 0; number < n8points; number++) {
+ for(number = 0; number < n16points; number++) {
input_val0 = vld2q_f32(aPtr);
input_val1 = vld2q_f32(aPtr+8);
@@ -269,12 +269,12 @@ volk_32f_binary_slicer_8i_neon(int8_t* cVector, const float* aVector,
res_u8.val[1] = vand_u8(one, res_u8.val[1]);
vst2_u8((unsigned char*)cPtr, res_u8);
- cPtr += 8;
- aPtr += 8;
+ cPtr += 16;
+ aPtr += 16;
}
- for(number = n8points * 8; number < num_points; number++) {
+ for(number = n16points * 16; number < num_points; number++) {
if(*aPtr++ >= 0) {
*cPtr++ = 1;
}