summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--volk/kernels/volk/asm/neon/volk_32f_s32f_multiply_32f_neonasm.s17
1 files changed, 6 insertions, 11 deletions
diff --git a/volk/kernels/volk/asm/neon/volk_32f_s32f_multiply_32f_neonasm.s b/volk/kernels/volk/asm/neon/volk_32f_s32f_multiply_32f_neonasm.s
index 8262e4cd29..235d37538f 100644
--- a/volk/kernels/volk/asm/neon/volk_32f_s32f_multiply_32f_neonasm.s
+++ b/volk/kernels/volk/asm/neon/volk_32f_s32f_multiply_32f_neonasm.s
@@ -6,7 +6,7 @@ volk_32f_s32f_multiply_32f_neonasm:
@ r2 - bVector: pointer to input array 2
@ r3 - num_points: number of items to process
- stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, sl} @ prologue - save register states
+ stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12} @ prologue - save register states
@ quarter_points = num_points / 4
@@ -21,7 +21,7 @@ volk_32f_s32f_multiply_32f_neonasm:
mov r5, r0
@ load the scalar to a quad register
- @ vmov.32 d2[0], r2
+ @ vmov.32 d2[0], r2
@ The scalar might be in s0, not totally sure
vdup.32 q2, d0[0]
@@ -30,17 +30,17 @@ volk_32f_s32f_multiply_32f_neonasm:
@ vld1.32 {q1}, [r4:128]! @ aVal
@ vmul.f32 q3, q1, q2
@ vst1.32 {q3}, [r5:128]! @ cVal
- @
+ @
@ subs r10, r10, #1
@ bne .loop1 @ first loop
-
+
@ number = quarter_points * 4
mov r10, r11, asl #2
.loop2:
@ cmp num_points, number
@ bls .done
- @
+ @
@ vld1.32 {d0[0]}, [aVector]!
@ vmul.f32 s2, s0, s4
@ vst1.32 {d1[0]}, [cVector]!
@@ -48,10 +48,5 @@ volk_32f_s32f_multiply_32f_neonasm:
@ b .loop2
.done:
- ldmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, sl} @ epilogue - restore register states
+ ldmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12} @ epilogue - restore register states
bx lr
-
-
-
-
-