Removed whitespace and added dtools/bin/remove-whitespace as a tool to do this in the future.

The sed script was provided by Moritz Fischer.
author: Tom Rondeau <trondeau@vt.edu> 2012-04-13 18:36:53 -0400
committer: Tom Rondeau <trondeau@vt.edu> 2012-04-13 18:36:53 -0400
commit: f919f9dcbb54a08e6e26d6c229ce92fb784fa1b2 (patch)
tree: 7e846386b9eb1676f9a93fc4a1e55916b9accc97 /volk
parent: 6a1e9783fec6ed827f49db27c171591d30f32933 (diff)
140 files changed, 1385 insertions, 1385 deletions
diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index bd36d6dc7d..76b9f40317 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -13,7 +13,7 @@ extern "C" {
 int main(int argc, char *argv[]) {
 
     std::vector<std::string> results;
-    
+
     //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4_a, 1e-4, 2046, 10000, &results);
     //VOLK_PROFILE(volk_16i_branch_4_state_8_a, 1e-4, 2046, 10000, &results);
     VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f_a, 1e-5, 32768.0, 204600, 10000, &results);
@@ -143,7 +143,7 @@ int main(int argc, char *argv[]) {
 #this file is generated by volk_profile.\n\
 #the function name is followed by the preferred architecture.\n\
 ";
-    
+
     BOOST_FOREACH(std::string result, results) {
         config << result << std::endl;
     }
diff --git a/volk/cmake/FindORC.cmake b/volk/cmake/FindORC.cmake
index 1c4c417add..a5f35c465e 100644
--- a/volk/cmake/FindORC.cmake
+++ b/volk/cmake/FindORC.cmake
@@ -8,16 +8,16 @@ FIND_PROGRAM(ORCC_EXECUTABLE orcc
              HINTS ${PC_ORC_TOOLSDIR}
 	     PATHS ${ORC_ROOT}/bin ${CMAKE_INSTALL_PREFIX}/bin)
 
-FIND_PATH(ORC_INCLUDE_DIR NAMES orc/orc.h 
+FIND_PATH(ORC_INCLUDE_DIR NAMES orc/orc.h
           HINTS ${PC_ORC_INCLUDEDIR}
 	  PATHS ${ORC_ROOT}/include/orc-0.4 ${CMAKE_INSTALL_PREFIX}/include/orc-0.4)
 
 
-FIND_PATH(ORC_LIBRARY_DIR NAMES ${CMAKE_SHARED_LIBRARY_PREFIX}orc-0.4${CMAKE_SHARED_LIBRARY_SUFFIX} 
+FIND_PATH(ORC_LIBRARY_DIR NAMES ${CMAKE_SHARED_LIBRARY_PREFIX}orc-0.4${CMAKE_SHARED_LIBRARY_SUFFIX}
           HINTS ${PC_ORC_LIBDIR}
 	  PATHS ${ORC_ROOT}/lib${LIB_SUFFIX} ${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX})
 
-FIND_LIBRARY(ORC_LIB orc-0.4 
+FIND_LIBRARY(ORC_LIB orc-0.4
              HINTS ${PC_ORC_LIBRARY_DIRS}
 	     PATHS ${ORC_ROOT}/lib${LIB_SUFFIX} ${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX})
 
diff --git a/volk/gen/archs.xml b/volk/gen/archs.xml
index effd90d331..ca21184759 100644
--- a/volk/gen/archs.xml
+++ b/volk/gen/archs.xml
@@ -19,7 +19,7 @@
   <flag>m32</flag>
   <overrule>MD_SUBCPU</overrule>
   <overrule_val>x86_64</overrule_val>
-</arch> 
+</arch>
 
 <arch name="64" type="x86">
   <op>0x80000001</op>
diff --git a/volk/gen/make_c.py b/volk/gen/make_c.py
index 0f9bcde347..233cb85c27 100644
--- a/volk/gen/make_c.py
+++ b/volk/gen/make_c.py
@@ -48,7 +48,7 @@ struct volk_machine *get_machine(void) {
     extern struct volk_machine *volk_machines[];
     extern unsigned int n_volk_machines;
     static struct volk_machine *machine = NULL;
-    
+
     if(machine != NULL) return machine;
     else {
         unsigned int max_score = 0;
@@ -71,7 +71,7 @@ unsigned int volk_get_alignment(void) {
 }
 
 """
-    
+
     for i in range(len(functions)):
         tempstring += "void get_" + functions[i] + replace_arch.sub("", arched_arglist[i]) + "\n"
         tempstring += "    %s = get_machine()->%s_archs[volk_rank_archs(get_machine()->%s_indices, get_machine()->%s_arch_defs, get_machine()->%s_n_archs, get_machine()->%s_name, volk_get_lvarch())];\n" % (functions[i], functions[i], functions[i], functions[i], functions[i], functions[i])
diff --git a/volk/gen/make_config_fixed.py b/volk/gen/make_config_fixed.py
index 3fd1bdf0aa..96f20b6b25 100644
--- a/volk/gen/make_config_fixed.py
+++ b/volk/gen/make_config_fixed.py
@@ -18,4 +18,4 @@ def make_config_fixed(dom) :
     return tempstring;
 
 
-    
+
diff --git a/volk/gen/make_cpuid_c.py b/volk/gen/make_cpuid_c.py
index 2be1123a80..e30d643cb8 100644
--- a/volk/gen/make_cpuid_c.py
+++ b/volk/gen/make_cpuid_c.py
@@ -1,24 +1,24 @@
 #!/usr/bin/env python
 #
 # Copyright 2011 Free Software Foundation, Inc.
-# 
+#
 # This file is part of GNU Radio
-# 
+#
 # GNU Radio is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 3, or (at your option)
 # any later version.
-# 
+#
 # GNU Radio is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU General Public License
 # along with GNU Radio; see the file COPYING.  If not, write to
 # the Free Software Foundation, Inc., 51 Franklin Street,
 # Boston, MA 02110-1301, USA.
-# 
+#
 
 from xml.dom import minidom
 
@@ -79,7 +79,7 @@ static inline unsigned int cpuid_edx(unsigned int op) {
 
 def make_cpuid_c(dom) :
     tempstring = HEADER_TEMPL;
-    
+
     for domarch in dom:
         if str(domarch.attributes["type"].value) == "x86":
             if "no_test" in domarch.attributes.keys():
@@ -103,7 +103,7 @@ def make_cpuid_c(dom) :
             val = domarch.getElementsByTagName("val");
             if val:
                 val = str(val[0].firstChild.data);
-            
+
             if no_test:
                 tempstring = tempstring + """\
 int i_can_has_%s () {
@@ -113,9 +113,9 @@ int i_can_has_%s () {
     return 0;
 #endif
 }
-                
+
 """ % (arch)
-                
+
             elif op == "1":
                 tempstring = tempstring + """\
 int i_can_has_%s () {
@@ -144,7 +144,7 @@ int i_can_has_%s () {
 }
 
 """ % (arch, val, reg, op, shift, val)
-        
+
         elif str(domarch.attributes["type"].value) == "powerpc":
             arch = str(domarch.attributes["name"].value);
             tempstring = tempstring + """\
@@ -178,7 +178,7 @@ int i_can_has_%s () {
     unsigned int found_neon = 0;
     auxvec_f = fopen("/proc/self/auxv", "rb");
     if(!auxvec_f) return 0;
-    
+
     //so auxv is basically 32b of ID and 32b of value
     //so it goes like this
     while(!found_neon && auxvec_f) {
@@ -186,7 +186,7 @@ int i_can_has_%s () {
       if((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_NEON))
         found_neon = 1;
     }
-    
+
     fclose(auxvec_f);
     return found_neon;
 
@@ -196,7 +196,7 @@ int i_can_has_%s () {
 }
 
 """ % (arch)
-        
+
         elif str(domarch.attributes["type"].value) == "all":
             arch = str(domarch.attributes["name"].value);
             tempstring = tempstring + """\
@@ -213,7 +213,7 @@ int i_can_has_%s () {
 }
 
 """ % (arch)
-    
+
     tempstring = tempstring + "void volk_cpu_init() {\n";
     for domarch in dom:
         arch = str(domarch.attributes["name"].value);
@@ -231,9 +231,9 @@ int i_can_has_%s () {
 
     return tempstring;
 
-    
-            
-        
 
-                
-                
+
+
+
+
+
diff --git a/volk/gen/make_cpuid_h.py b/volk/gen/make_cpuid_h.py
index 4fe5c4e07b..1aa7837815 100644
--- a/volk/gen/make_cpuid_h.py
+++ b/volk/gen/make_cpuid_h.py
@@ -1,24 +1,24 @@
 #!/usr/bin/env python
 #
 # Copyright 2011 Free Software Foundation, Inc.
-# 
+#
 # This file is part of GNU Radio
-# 
+#
 # GNU Radio is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 3, or (at your option)
 # any later version.
-# 
+#
 # GNU Radio is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU General Public License
 # along with GNU Radio; see the file COPYING.  If not, write to
 # the Free Software Foundation, Inc., 51 Franklin Street,
 # Boston, MA 02110-1301, USA.
-# 
+#
 
 from xml.dom import minidom
 
@@ -44,5 +44,5 @@ def make_cpuid_h(dom) :
     tempstring = tempstring + "\n";
     tempstring = tempstring + "__VOLK_DECL_END\n";
     tempstring = tempstring + "#endif /*INCLUDED_VOLK_CPU_H*/\n"
-    
+
     return tempstring;
diff --git a/volk/gen/make_each_machine_c.py b/volk/gen/make_each_machine_c.py
index a3f6203bab..535578859a 100644
--- a/volk/gen/make_each_machine_c.py
+++ b/volk/gen/make_each_machine_c.py
@@ -59,7 +59,7 @@ def make_each_machine_c(machine_name, archs, functions, fcountlist, taglist, ali
 """
     for arch in archs:
         tempstring += "#define LV_HAVE_" + arch.swapcase() + " 1\n"
-    
+
     tempstring += """
 #include <volk/volk_common.h>
 #include "volk_machines.h"
diff --git a/volk/gen/make_environment_init_c.py b/volk/gen/make_environment_init_c.py
index 263d5bcd13..6186162ee5 100644
--- a/volk/gen/make_environment_init_c.py
+++ b/volk/gen/make_environment_init_c.py
@@ -14,7 +14,7 @@ def make_environment_init_c(dom) :
             tempstring = tempstring + "#endif\n"
     tempstring = tempstring + '\n\n';
     tempstring = tempstring + "void volk_environment_init(){\n"
-    
+
     for domarch in dom:
         arch = str(domarch.attributes["name"].value);
         envs = domarch.getElementsByTagName("environment");
@@ -22,11 +22,11 @@ def make_environment_init_c(dom) :
             cmd = str(env.firstChild.data);
             tempstring = tempstring + "#ifdef LV_HAVE_" + arch.swapcase() + "\n";
             tempstring = tempstring + "        " + cmd + "\n";
-            tempstring = tempstring + "#endif\n"     
-    
+            tempstring = tempstring + "#endif\n"
+
     tempstring = tempstring + "}\n";
     return tempstring;
-            
 
 
-    
+
+
diff --git a/volk/gen/make_environment_init_h.py b/volk/gen/make_environment_init_h.py
index 655d73f54c..0b0f35d618 100644
--- a/volk/gen/make_environment_init_h.py
+++ b/volk/gen/make_environment_init_h.py
@@ -12,7 +12,7 @@ def make_environment_init_h() :
     tempstring = tempstring + "__VOLK_DECL_END\n";
     tempstring = tempstring + "#endif\n"
     return tempstring;
-            
 
 
-    
+
+
diff --git a/volk/gen/make_h.py b/volk/gen/make_h.py
index 354e572588..cf8987c398 100644
--- a/volk/gen/make_h.py
+++ b/volk/gen/make_h.py
@@ -35,4 +35,4 @@ VOLK_API unsigned int volk_get_alignment(void);
     tempstring = tempstring + "#endif /*INCLUDED_VOLK_RUNTIME*/\n";
 
     return tempstring;
-    
+
diff --git a/volk/gen/make_machines_c.py b/volk/gen/make_machines_c.py
index a7ab63d6e5..f19da4ae44 100644
--- a/volk/gen/make_machines_c.py
+++ b/volk/gen/make_machines_c.py
@@ -33,7 +33,7 @@ struct volk_machine *volk_machines[] = {
         tempstring += "&volk_machine_" + machine
         tempstring += ","
         tempstring += "\n#endif\n"
-    
+
     tempstring += r"""
 };
 unsigned int n_volk_machines = sizeof(volk_machines)/sizeof(*volk_machines);
diff --git a/volk/gen/make_machines_h.py b/volk/gen/make_machines_h.py
index a48caa89c0..d2374120b4 100644
--- a/volk/gen/make_machines_h.py
+++ b/volk/gen/make_machines_h.py
@@ -41,15 +41,15 @@ struct volk_machine {
         tempstring += "    const int %s_arch_defs[%d];\n"%(function, len(archs))
         tempstring += "    const %s %s_archs[%d];\n"%(replace_volk.sub("p", function), function, len(archs))
         tempstring += "    const int %s_n_archs;\n"%function
-    
+
     tempstring += r"""};
-    
+
 """
     for machine in machines:
         tempstring += """#if LV_MACHINE_""" + machine.swapcase() + "\n"
         tempstring += "extern struct volk_machine volk_machine_" + machine + ";\n"
         tempstring += """#endif\n"""
-    
+
     tempstring += r"""
 
 __VOLK_DECL_END
diff --git a/volk/gen/make_makefile_am.py b/volk/gen/make_makefile_am.py
index 0dc088a80b..880ce40945 100644
--- a/volk/gen/make_makefile_am.py
+++ b/volk/gen/make_makefile_am.py
@@ -21,7 +21,7 @@ def make_makefile_am(dom, machines, archflags_dict):
     tempstring = r"""
 # This file is automatically generated by make_makefile_am.py.
 # Do not edit this file.
-    
+
 include $(top_srcdir)/Makefile.common
 
 AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \
@@ -55,17 +55,17 @@ volk_orc_CFLAGS = -DLV_HAVE_ORC=1
 volk_orc_LDFLAGS = $(ORC_LDFLAGS) -lorc-0.4
 volk_orc_LIBADD = ../orc/libvolk_orc.la
 else
-volk_orc_CFLAGS = 
+volk_orc_CFLAGS =
 volk_orc_LDFLAGS =
-volk_orc_LIBADD = 
+volk_orc_LIBADD =
 endif
 
 libvolk_la_CPPFLAGS = $(AM_CPPFLAGS) $(volk_orc_CFLAGS)
 libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS)
 libvolk_la_LIBADD = $(volk_orc_LIBADD)
 
-noinst_LTLIBRARIES = 
-    
+noinst_LTLIBRARIES =
+
 """
 
     #here be dragons
@@ -76,7 +76,7 @@ noinst_LTLIBRARIES =
         for arch in machines[machine_name]:
             if archflags_dict[arch] != "none":
                 tempstring += "-" + archflags_dict[arch] + " "
-                
+
         tempstring += "\nnoinst_LTLIBRARIES += libvolk_" + machine_name + ".la "
         tempstring += "\nlibvolk_la_LIBADD += libvolk_" + machine_name + ".la\n"
         tempstring += "libvolk_la_CPPFLAGS += -DLV_MACHINE_" + machine_name.swapcase() + " \n"
diff --git a/volk/gen/make_proccpu_sim.py b/volk/gen/make_proccpu_sim.py
index 029dacfcc0..e2c8123e5b 100644
--- a/volk/gen/make_proccpu_sim.py
+++ b/volk/gen/make_proccpu_sim.py
@@ -1,24 +1,24 @@
 #!/usr/bin/env python
 #
 # Copyright 2011 Free Software Foundation, Inc.
-# 
+#
 # This file is part of GNU Radio
-# 
+#
 # GNU Radio is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 3, or (at your option)
 # any later version.
-# 
+#
 # GNU Radio is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU General Public License
 # along with GNU Radio; see the file COPYING.  If not, write to
 # the Free Software Foundation, Inc., 51 Franklin Street,
 # Boston, MA 02110-1301, USA.
-# 
+#
 
 from xml.dom import minidom
 
@@ -35,7 +35,7 @@ def make_proccpu_sim(dom) :
     tempstring = tempstring + "    }\n";
     tempstring = tempstring + "}\n";
     tempstring = tempstring + "\n\n";
-    
+
     tempstring = tempstring + "int main() {\n";
     tempstring = tempstring + "    volk_cpu_init();\n";
     tempstring = tempstring + "    char buf[2048];\n";
diff --git a/volk/gen/make_set_simd.py b/volk/gen/make_set_simd.py
index 5a848e59ea..8334487d78 100644
--- a/volk/gen/make_set_simd.py
+++ b/volk/gen/make_set_simd.py
@@ -43,17 +43,17 @@ def make_set_simd(dom, machines) :
     tempstring = tempstring + "  indLV_ARCH=no\n";
     tempstring = tempstring + "  AC_ARG_WITH(lv_arch,\n";
     tempstring = tempstring + "    AC_HELP_STRING([--with-lv_arch=ARCH],[set volk hardware speedups as space separated string with elements from the following list(";
-    
+
     for domarch in dom:
         arch = str(domarch.attributes["name"].value);
         tempstring = tempstring + arch + ", "
     tempstring = tempstring[0:len(tempstring) - 2];
-        
+
     tempstring = tempstring + ")]),\n";
     tempstring = tempstring + "      [cf_with_lv_arch=\"$withval\"],\n";
     tempstring = tempstring + "      [cf_with_lv_arch=\"\"])\n";
     if str(domarch.attributes["type"].value) == "all":
-        arch = str(domarch.attributes["name"].value);    
+        arch = str(domarch.attributes["name"].value);
         tempstring = tempstring + "  AC_DEFINE(LV_MAKE_" + arch.swapcase() + ", 1, [always set "+ arch + "!])\n";
     tempstring = tempstring + "  ADDONS=\"\"\n";
     tempstring = tempstring + "  BUILT_ARCHS=\"\"\n";
@@ -67,7 +67,7 @@ def make_set_simd(dom, machines) :
     tempstring = tempstring[0:-1] + "\"\n";
     tempstring = tempstring + "    OVERRULE_FLAG=\"yes\"\n";
     tempstring = tempstring + "  fi\n";
-    
+
     tempstring = tempstring +'\ndnl init LV_MAKE_XXX and then try to add archs\n';
     for domarch in dom:
         if str(domarch.attributes["type"].value) != "all":
@@ -118,9 +118,9 @@ def make_set_simd(dom, machines) :
             tempstring = tempstring + "  if  test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n"
             tempstring = tempstring + "    indLV_ARCH=no\n"
             tempstring = tempstring + "  fi\n"
-            
-            tempstring = tempstring + "  if test \"$indCC\" == \"yes\" && test \"$indCXX\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n"        
-            
+
+            tempstring = tempstring + "  if test \"$indCC\" == \"yes\" && test \"$indCXX\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n"
+
             #tempstring = tempstring + "    ADDONS=\"${ADDONS} -" + flag + "\"\n";
             tempstring = tempstring + "    BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n";
             tempstring = tempstring + "    LV_MAKE_" + arch.swapcase() + "=yes\n";
@@ -138,17 +138,17 @@ def make_set_simd(dom, machines) :
             tempstring = tempstring + "  if  test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n"
             tempstring = tempstring + "    indLV_ARCH=no\n"
             tempstring = tempstring + "  fi\n"
-            tempstring = tempstring + "  if test \"$indLV_ARCH\" == \"yes\"; then\n"        
+            tempstring = tempstring + "  if test \"$indLV_ARCH\" == \"yes\"; then\n"
             tempstring = tempstring + "    LV_MAKE_" + arch.swapcase() + "=yes\n";
             tempstring = tempstring + "    BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n";
             tempstring = tempstring + "  fi\n"
             tempstring = tempstring + "  indLV_ARCH=no\n"
-            
+
 
     for domarch in dom:
         arch = str(domarch.attributes["name"].value);
         tempstring = tempstring + "  AM_CONDITIONAL(LV_MAKE_" + arch.swapcase() + ", test \"$LV_MAKE_" + arch.swapcase() + "\" == \"yes\")\n";
-    
+
     tempstring += "\n"
     #now we can define the machines we're compiling
     for machine_name in machines:
@@ -156,11 +156,11 @@ def make_set_simd(dom, machines) :
         marchlist = machines[machine_name]
         for march in marchlist:
             tempstring += "test \"$LV_MAKE_" + march.swapcase() + "\" == \"yes\" && "
-            
+
         tempstring += "test true)\n" #just so we don't have to detect the last one in the group, i know
     tempstring = tempstring + "  LV_CXXFLAGS=\"${LV_CXXFLAGS} ${ADDONS}\"\n"
     tempstring = tempstring + "])\n"
-   
+
     return tempstring;
 
-        
+
diff --git a/volk/gen/make_typedefs.py b/volk/gen/make_typedefs.py
index 8f9f2b55e0..09221d2ef1 100644
--- a/volk/gen/make_typedefs.py
+++ b/volk/gen/make_typedefs.py
@@ -15,7 +15,7 @@ def make_typedefs(funclist, retlist, my_argtypelist) :
 
     tempstring = tempstring + '\n';
 
-    for i in range(len(funclist)): 
+    for i in range(len(funclist)):
         tempstring = tempstring + "typedef " + retlist[i] +" (*" + replace_volk.sub("p", funclist[i]) + ")(" + my_argtypelist[i] + ");\n";
 
     tempstring = tempstring + "#endif /*INCLUDED_VOLK_TYPEDEFS*/\n";
diff --git a/volk/gen/volk_register.py b/volk/gen/volk_register.py
index 0774ece291..3a237c5ca0 100644
--- a/volk/gen/volk_register.py
+++ b/volk/gen/volk_register.py
@@ -62,7 +62,7 @@ for line in hdr_files:
             subdtype = re.search("[0-9]+[A-z]+", dtype);
             if subdtype:
                 datatypes.append(subdtype.group(0));
-                
+
 
 datatypes = set(datatypes);
 
@@ -71,7 +71,7 @@ for line in hdr_files:
         if dt in line:
             subline = re.search("(volk_" + dt +"_.*(a|u).*\.h)", line);
             if subline:
-                
+
                 subsubline = re.search(".+(?=\.h)", subline.group(0));
                 functions.append(subsubline.group(0));
 
@@ -80,13 +80,13 @@ afile = minidom.parse(os.path.join(srcdir, "gen/archs.xml"))
 filearchs = afile.getElementsByTagName("arch");
 for filearch in filearchs:
     archs.append(str(filearch.attributes["name"].value));
-                 
+
 for arch in archs:
     a_var = re.search("^\$", arch);
     if a_var:
         archs.remove(arch);
-        
-#strip out mutex archs        
+
+#strip out mutex archs
 
 archflags_dict = {}
 for filearch in filearchs:
@@ -177,22 +177,22 @@ for func in functions:
                 tag = re.search("\w+", tag.group(0));
                 if tag:
                     tags.append(tag.group(0));
-               
-    
+
+
         if begun_name == 0:
             retline = re.search(".+(?=" + func + ")", line);
             if retline:
                 ret = retline.group(0);
-                
-                
-                
-        
+
+
+
+
             subline = re.search(func + ".*", line);
             if subline:
                 subsubline = re.search("\(.*?\)", subline.group(0));
                 if subsubline:
                     args = subsubline.group(0);
-                    
+
                 else:
                     begun_name = 1;
                     subsubline = re.search("\(.*", subline.group(0));
@@ -214,25 +214,25 @@ for func in functions:
                 if subline:
                     args = subline.group(0);
                     begun_name = 0;
-                else: 
+                else:
                     subline = re.search("\(.*", line);
                     if subline:
                         args = subline.group(0);
                         begun_paren = 1;
-    
+
     replace = re.compile("static ");
     ret = replace.sub("", ret);
     replace = re.compile("inline ");
     ret = replace.sub("", ret);
     replace = re.compile("\)");
     arched_args = replace.sub(", const char* arch) {", args);
-    
+
     remove = re.compile('\)|\(|{');
     rargs = remove.sub("", args);
     sargs = rargs.split(',');
-    
-    
-    
+
+
+
     margs = [];
     atypes = [];
     for arg in sargs:
@@ -241,7 +241,7 @@ for func in functions:
         replace = re.compile(" " + temp[-1]);
         atypes.append(replace.sub("", arg));
 
-    
+
     my_args = ""
     arg_types = ""
     for arg in range(0, len(margs) - 1):
@@ -255,7 +255,7 @@ for func in functions:
     this_type = leading_space_remove.sub("", atypes[-1]);
     arg_types = arg_types + this_type;
     my_argtypelist.append(arg_types);
-    
+
     if(ret[-1] != ' '):
         ret = ret + ' ';
 
@@ -263,7 +263,7 @@ for func in functions:
     my_arglist.append(my_args) #!!!!!!!!!!!!!!!!!
     retlist.append(ret);
     fcountlist.append(fcount);
-    taglist.append(tags);               
+    taglist.append(tags);
 
 
 outfile_cpu_h.write(make_cpuid_h(filearchs));
diff --git a/volk/include/volk/volk_16i_branch_4_state_8_a.h b/volk/include/volk/volk_16i_branch_4_state_8_a.h
index 0424e66e99..6338fbdd17 100644
--- a/volk/include/volk/volk_16i_branch_4_state_8_a.h
+++ b/volk/include/volk/volk_16i_branch_4_state_8_a.h
@@ -3,7 +3,7 @@
 
 
 #include<inttypes.h>
-#include<stdio.h>	
+#include<stdio.h>
 
 
 
@@ -15,32 +15,32 @@
 #include<tmmintrin.h>
 
 static inline  void volk_16i_branch_4_state_8_a_ssse3(short* target,  short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) {
-	
-  
+
+
   __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11;
 
   __m128i *p_target, *p_src0, *p_cntl2, *p_cntl3, *p_scalars;
 
-  
-  
+
+
   p_target = (__m128i*)target;
   p_src0 = (__m128i*)src0;
   p_cntl2 = (__m128i*)cntl2;
   p_cntl3 = (__m128i*)cntl3;
   p_scalars = (__m128i*)scalars;
-  
+
   int i = 0;
-  
+
   int bound = 1;
-  
-  
+
+
   xmm0 = _mm_load_si128(p_scalars);
-  
+
   xmm1 = _mm_shufflelo_epi16(xmm0, 0);
   xmm2 = _mm_shufflelo_epi16(xmm0, 0x55);
   xmm3 = _mm_shufflelo_epi16(xmm0, 0xaa);
   xmm4 = _mm_shufflelo_epi16(xmm0, 0xff);
-  
+
   xmm1 = _mm_shuffle_epi32(xmm1, 0x00);
   xmm2 = _mm_shuffle_epi32(xmm2, 0x00);
   xmm3 = _mm_shuffle_epi32(xmm3, 0x00);
@@ -52,40 +52,40 @@ static inline  void volk_16i_branch_4_state_8_a_ssse3(short* target,  short* src
   xmm10 = _mm_load_si128((__m128i*)permuters[3]);
 
   for(; i < bound; ++i) {
-    
+
     xmm5 = _mm_load_si128(p_src0);
-    
-    
-    
-    
-    
-    
-    
+
+
+
+
+
+
+
 
 
     xmm0 = _mm_shuffle_epi8(xmm5, xmm0);
     xmm6 = _mm_shuffle_epi8(xmm5, xmm6);
     xmm8 = _mm_shuffle_epi8(xmm5, xmm8);
     xmm10 = _mm_shuffle_epi8(xmm5, xmm10);
-    
+
     p_src0 += 4;
-   
-    
+
+
     xmm5 = _mm_add_epi16(xmm1, xmm2);
-    
+
     xmm6 = _mm_add_epi16(xmm2, xmm6);
     xmm8 = _mm_add_epi16(xmm1, xmm8);
-   
-     
+
+
     xmm7 = _mm_load_si128(p_cntl2);
     xmm9 = _mm_load_si128(p_cntl3);
-    
+
     xmm0 = _mm_add_epi16(xmm5, xmm0);
-    
-    
+
+
     xmm7 = _mm_and_si128(xmm7, xmm3);
     xmm9 = _mm_and_si128(xmm9, xmm4);
-    
+
     xmm5 = _mm_load_si128(&p_cntl2[1]);
     xmm11 = _mm_load_si128(&p_cntl3[1]);
 
@@ -95,96 +95,96 @@ static inline  void volk_16i_branch_4_state_8_a_ssse3(short* target,  short* src
     xmm11 = _mm_and_si128(xmm11, xmm4);
 
     xmm0 = _mm_add_epi16(xmm0, xmm7);
-   
-   
- 
+
+
+
     xmm7 = _mm_load_si128(&p_cntl2[2]);
     xmm9 = _mm_load_si128(&p_cntl3[2]);
-    
+
     xmm5 = _mm_add_epi16(xmm5, xmm11);
-    
+
     xmm7 = _mm_and_si128(xmm7, xmm3);
     xmm9 = _mm_and_si128(xmm9, xmm4);
-    
+
     xmm6 = _mm_add_epi16(xmm6, xmm5);
-   
-    
+
+
     xmm5 = _mm_load_si128(&p_cntl2[3]);
     xmm11 = _mm_load_si128(&p_cntl3[3]);
-    
+
     xmm7 = _mm_add_epi16(xmm7, xmm9);
-    
+
     xmm5 = _mm_and_si128(xmm5, xmm3);
     xmm11 = _mm_and_si128(xmm11, xmm4);
-    
+
     xmm8 = _mm_add_epi16(xmm8, xmm7);
-    
+
     xmm5 = _mm_add_epi16(xmm5, xmm11);
-    
+
     _mm_store_si128(p_target, xmm0);
     _mm_store_si128(&p_target[1], xmm6);
 
     xmm10 = _mm_add_epi16(xmm5, xmm10);
-    
+
     _mm_store_si128(&p_target[2], xmm8);
-    
+
     _mm_store_si128(&p_target[3], xmm10);
-    
-    p_target += 3;   
+
+    p_target += 3;
   }
 }
-	
-	
+
+
 #endif /*LV_HAVE_SSEs*/
 
 #ifdef LV_HAVE_GENERIC
 static inline  void volk_16i_branch_4_state_8_a_generic(short* target,  short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) {
 	int i = 0;
-	
+
 	int bound = 4;
-	
+
 	for(; i < bound; ++i) {
-	  target[i* 8] = src0[((char)permuters[i][0])/2] 
+	  target[i* 8] = src0[((char)permuters[i][0])/2]
 	    + ((i + 1)%2  * scalars[0])
 	    + (((i >> 1)^1) * scalars[1])
 	    + (cntl2[i * 8] & scalars[2])
 	    + (cntl3[i * 8] & scalars[3]);
-	  target[i* 8 + 1] = src0[((char)permuters[i][1 * 2])/2] 
+	  target[i* 8 + 1] = src0[((char)permuters[i][1 * 2])/2]
 	    + ((i + 1)%2  * scalars[0])
 	    + (((i >> 1)^1) * scalars[1])
 	    + (cntl2[i * 8 + 1] & scalars[2])
 	    + (cntl3[i * 8 + 1] & scalars[3]);
-	  target[i* 8 + 2] = src0[((char)permuters[i][2 * 2])/2] 
+	  target[i* 8 + 2] = src0[((char)permuters[i][2 * 2])/2]
 	    + ((i + 1)%2  * scalars[0])
 	    + (((i >> 1)^1) * scalars[1])
 	    + (cntl2[i * 8 + 2] & scalars[2])
 	    + (cntl3[i * 8 + 2] & scalars[3]);
-	  target[i* 8 + 3] = src0[((char)permuters[i][3 * 2])/2] 
+	  target[i* 8 + 3] = src0[((char)permuters[i][3 * 2])/2]
 	    + ((i + 1)%2  * scalars[0])
 	    + (((i >> 1)^1) * scalars[1])
 	    + (cntl2[i * 8 + 3] & scalars[2])
 	    + (cntl3[i * 8 + 3] & scalars[3]);
-	  target[i* 8 + 4] = src0[((char)permuters[i][4 * 2])/2] 
+	  target[i* 8 + 4] = src0[((char)permuters[i][4 * 2])/2]
 	    + ((i + 1)%2  * scalars[0])
 	    + (((i >> 1)^1) * scalars[1])
 	    + (cntl2[i * 8 + 4] & scalars[2])
 	    + (cntl3[i * 8 + 4] & scalars[3]);
-	  target[i* 8 + 5] = src0[((char)permuters[i][5 * 2])/2] 
+	  target[i* 8 + 5] = src0[((char)permuters[i][5 * 2])/2]
 	    + ((i + 1)%2  * scalars[0])
 	    + (((i >> 1)^1) * scalars[1])
 	    + (cntl2[i * 8 + 5] & scalars[2])
 	    + (cntl3[i * 8 + 5] & scalars[3]);
-	  target[i* 8 + 6] = src0[((char)permuters[i][6 * 2])/2] 
+	  target[i* 8 + 6] = src0[((char)permuters[i][6 * 2])/2]
 	    + ((i + 1)%2  * scalars[0])
 	    + (((i >> 1)^1) * scalars[1])
 	    + (cntl2[i * 8 + 6] & scalars[2])
 	    + (cntl3[i * 8 + 6] & scalars[3]);
-	  target[i* 8 + 7] = src0[((char)permuters[i][7 * 2])/2] 
+	  target[i* 8 + 7] = src0[((char)permuters[i][7 * 2])/2]
 	    + ((i + 1)%2  * scalars[0])
 	    + (((i >> 1)^1) * scalars[1])
 	    + (cntl2[i * 8 + 7] & scalars[2])
 	    + (cntl3[i * 8 + 7] & scalars[3]);
-	  
+
 	}
 }
 
diff --git a/volk/include/volk/volk_16i_convert_8i_a.h b/volk/include/volk/volk_16i_convert_8i_a.h
index 8046035c74..84548c8c50 100644
--- a/volk/include/volk/volk_16i_convert_8i_a.h
+++ b/volk/include/volk/volk_16i_convert_8i_a.h
@@ -15,7 +15,7 @@
 static inline void volk_16i_convert_8i_a_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int sixteenthPoints = num_points / 16;
-    
+
      int8_t* outputVectorPtr = outputVector;
     int16_t* inputPtr = (int16_t*)inputVector;
     __m128i inputVal1;
@@ -30,7 +30,7 @@ static inline void volk_16i_convert_8i_a_sse2(int8_t* outputVector, const int16_
 
       inputVal1 = _mm_srai_epi16(inputVal1, 8);
       inputVal2 = _mm_srai_epi16(inputVal2, 8);
-      
+
       ret = _mm_packs_epi16(inputVal1, inputVal2);
 
       _mm_store_si128((__m128i*)outputVectorPtr, ret);
diff --git a/volk/include/volk/volk_16i_convert_8i_u.h b/volk/include/volk/volk_16i_convert_8i_u.h
index df1084fe0c..80608a1412 100644
--- a/volk/include/volk/volk_16i_convert_8i_u.h
+++ b/volk/include/volk/volk_16i_convert_8i_u.h
@@ -16,7 +16,7 @@
 static inline void volk_16i_convert_8i_u_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int sixteenthPoints = num_points / 16;
-    
+
      int8_t* outputVectorPtr = outputVector;
     int16_t* inputPtr = (int16_t*)inputVector;
     __m128i inputVal1;
@@ -31,7 +31,7 @@ static inline void volk_16i_convert_8i_u_sse2(int8_t* outputVector, const int16_
 
       inputVal1 = _mm_srai_epi16(inputVal1, 8);
       inputVal2 = _mm_srai_epi16(inputVal2, 8);
-      
+
       ret = _mm_packs_epi16(inputVal1, inputVal2);
 
       _mm_storeu_si128((__m128i*)outputVectorPtr, ret);
diff --git a/volk/include/volk/volk_16i_max_star_16i_a.h b/volk/include/volk/volk_16i_max_star_16i_a.h
index 28197ddef7..edfff8a82b 100644
--- a/volk/include/volk/volk_16i_max_star_16i_a.h
+++ b/volk/include/volk/volk_16i_max_star_16i_a.h
@@ -3,7 +3,7 @@
 
 
 #include<inttypes.h>
-#include<stdio.h>	
+#include<stdio.h>
 
 
 #ifdef LV_HAVE_SSSE3
@@ -15,82 +15,82 @@
 static inline  void volk_16i_max_star_16i_a_ssse3(short* target, short* src0, unsigned int num_bytes) {
 
 
-  
+
   short candidate = src0[0];
   short cands[8];
   __m128i xmm0, xmm1, xmm3, xmm4, xmm5, xmm6;
-  
+
 
   __m128i *p_src0;
-  
+
   p_src0 = (__m128i*)src0;
 
   int bound = num_bytes >> 4;
   int leftovers = (num_bytes >> 1) & 7;
-  
+
   int i = 0;
-  
-  
+
+
   xmm1 = _mm_setzero_si128();
   xmm0 = _mm_setzero_si128();
   //_mm_insert_epi16(xmm0, candidate, 0);
-  
-  xmm0 = _mm_shuffle_epi8(xmm0, xmm1); 
 
-  
+  xmm0 = _mm_shuffle_epi8(xmm0, xmm1);
+
+
   for(i = 0; i < bound; ++i) {
     xmm1 = _mm_load_si128(p_src0);
     p_src0 += 1;
     //xmm2 = _mm_sub_epi16(xmm1, xmm0);
-  
 
-    
-  
-  
-  
+
+
+
+
+
     xmm3 = _mm_cmpgt_epi16(xmm0, xmm1);
     xmm4 = _mm_cmpeq_epi16(xmm0, xmm1);
     xmm5 = _mm_cmpgt_epi16(xmm1, xmm0);
 
     xmm6 = _mm_xor_si128(xmm4, xmm5);
-    
+
     xmm3 = _mm_and_si128(xmm3, xmm0);
     xmm4 = _mm_and_si128(xmm6, xmm1);
-    
+
     xmm0 = _mm_add_epi16(xmm3, xmm4);
-    
-  
+
+
   }
-  
+
   _mm_store_si128((__m128i*)cands, xmm0);
-  
+
   for(i = 0; i < 8; ++i) {
     candidate = ((short)(candidate - cands[i]) > 0) ? candidate : cands[i];
   }
-  
- 
-  
+
+
+
   for(i = 0; i < leftovers; ++i) {
-  
+
     candidate = ((short)(candidate - src0[(bound << 3) + i]) > 0) ? candidate : src0[(bound << 3) + i];
   }
 
   target[0] = candidate;
-  
-    
-    
- 
 
-}   
- 
+
+
+
+
+}
+
 #endif /*LV_HAVE_SSSE3*/
 
 #ifdef LV_HAVE_GENERIC
 
 static inline void volk_16i_max_star_16i_a_generic(short* target, short* src0, unsigned int num_bytes) {
-	
+
 	int i = 0;
-	
+
 	int bound = num_bytes >> 1;
 
 	short candidate = src0[0];
@@ -98,7 +98,7 @@ static inline void volk_16i_max_star_16i_a_generic(short* target, short* src0, u
 	  candidate = ((short)(candidate - src0[i]) > 0) ? candidate : src0[i];
 	}
 	target[0] = candidate;
-	  
+
 }
 
 
diff --git a/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
index a10a62350e..c1c9084256 100644
--- a/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
+++ b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
@@ -4,7 +4,7 @@
 #include <volk/volk_common.h>
 
 #include<inttypes.h>
-#include<stdio.h>	
+#include<stdio.h>
 
 
 #ifdef LV_HAVE_SSSE3
@@ -20,107 +20,107 @@ static inline  void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, in
   const static uint8_t andmask0[16] = {0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
   const static uint8_t andmask1[16] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02};
 
-  
-  
+
+
   __m128i xmm0, xmm1, xmm2, xmm3, xmm4;
   __m128i  xmm5, xmm6, xmm7, xmm8;
-  
+
   xmm4 = _mm_load_si128((__m128i*)shufmask0);
   xmm5 = _mm_load_si128((__m128i*)shufmask1);
   xmm6 = _mm_load_si128((__m128i*)andmask0);
   xmm7 = _mm_load_si128((__m128i*)andmask1);
-  
+
   __m128i *p_target, *p_src0;
-  
+
   p_target = (__m128i*)target;
   p_src0 = (__m128i*)src0;
 
   int bound = num_bytes >> 5;
   int intermediate = (num_bytes >> 4) & 1;
   int leftovers = (num_bytes >> 1) & 7;
-  
+
   int i = 0;
-  
-  
+
+
   for(i = 0; i < bound; ++i) {
-     
+
     xmm0 = _mm_load_si128(p_src0);
     xmm1 = _mm_load_si128(&p_src0[1]);
-    
-    
+
+
 
     xmm2 = _mm_xor_si128(xmm2, xmm2);
     p_src0 += 2;
-    
+
     xmm3 = _mm_hsub_epi16(xmm0, xmm1);
-    
-    xmm2 = _mm_cmpgt_epi16(xmm2, xmm3);   
+
+    xmm2 = _mm_cmpgt_epi16(xmm2, xmm3);
 
     xmm8 = _mm_and_si128(xmm2, xmm6);
     xmm3 = _mm_and_si128(xmm2, xmm7);
-    
+
 
     xmm8 = _mm_add_epi8(xmm8, xmm4);
     xmm3 = _mm_add_epi8(xmm3, xmm5);
 
     xmm0 = _mm_shuffle_epi8(xmm0, xmm8);
     xmm1 = _mm_shuffle_epi8(xmm1, xmm3);
-    
-    
+
+
     xmm3 = _mm_add_epi16(xmm0, xmm1);
 
-    
+
     _mm_store_si128(p_target, xmm3);
-    
+
     p_target += 1;
-  
+
   }
 
   for(i = 0; i < intermediate; ++i) {
-    
+
     xmm0 = _mm_load_si128(p_src0);
-    
-    
+
+
     xmm2 = _mm_xor_si128(xmm2, xmm2);
     p_src0 += 1;
-    
+
     xmm3 = _mm_hsub_epi16(xmm0, xmm1);
     xmm2 = _mm_cmpgt_epi16(xmm2, xmm3);
 
     xmm8 = _mm_and_si128(xmm2, xmm6);
-    
+
     xmm3 = _mm_add_epi8(xmm8, xmm4);
-    
+
     xmm0 = _mm_shuffle_epi8(xmm0, xmm3);
-    
+
     _mm_storel_pd((double*)p_target, bit128_p(&xmm0)->double_vec);
-    
+
     p_target = (__m128i*)((int8_t*)p_target + 8);
 
   }
-    
-  for(i = (bound << 4) + (intermediate << 3); i < (bound << 4) + (intermediate << 3) + leftovers ; i += 2) { 
+
+  for(i = (bound << 4) + (intermediate << 3); i < (bound << 4) + (intermediate << 3) + leftovers ; i += 2) {
     target[i>>1] = ((int16_t)(src0[i] - src0[i + 1]) > 0) ? src0[i] : src0[i + 1];
   }
-  
 
-}   
- 
+
+}
+
 #endif /*LV_HAVE_SSSE3*/
 
 
 #ifdef LV_HAVE_GENERIC
 static inline void volk_16i_max_star_horizontal_16i_a_generic(int16_t* target, int16_t* src0, unsigned int num_bytes) {
-	
+
 	int i = 0;
-	
+
 	int bound = num_bytes >> 1;
 
-      
+
 	for(i = 0; i < bound; i += 2) {
 	  target[i >> 1] = ((int16_t) (src0[i] - src0[i + 1]) > 0) ? src0[i] : src0[i+1];
 	}
-		
+
 }
 
 
diff --git a/volk/include/volk/volk_16i_permute_and_scalar_add_a.h b/volk/include/volk/volk_16i_permute_and_scalar_add_a.h
index de36cee804..47e3cbf9cb 100644
--- a/volk/include/volk/volk_16i_permute_and_scalar_add_a.h
+++ b/volk/include/volk/volk_16i_permute_and_scalar_add_a.h
@@ -3,7 +3,7 @@
 
 
 #include<inttypes.h>
-#include<stdio.h>	
+#include<stdio.h>
 
 
 
@@ -14,33 +14,33 @@
 #include<emmintrin.h>
 
 static inline  void volk_16i_permute_and_scalar_add_a_sse2(short* target,  short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) {
-	
+
 
   __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
 
   __m128i *p_target, *p_cntl0, *p_cntl1, *p_cntl2, *p_cntl3, *p_scalars;
 
   short* p_permute_indexes = permute_indexes;
-  
+
   p_target = (__m128i*)target;
   p_cntl0 = (__m128i*)cntl0;
   p_cntl1 = (__m128i*)cntl1;
   p_cntl2 = (__m128i*)cntl2;
   p_cntl3 = (__m128i*)cntl3;
   p_scalars = (__m128i*)scalars;
-  
+
   int i = 0;
-  
+
   int bound = (num_bytes >> 4);
   int leftovers = (num_bytes >> 1) & 7;
-  
+
   xmm0 = _mm_load_si128(p_scalars);
-  
+
   xmm1 = _mm_shufflelo_epi16(xmm0, 0);
   xmm2 = _mm_shufflelo_epi16(xmm0, 0x55);
   xmm3 = _mm_shufflelo_epi16(xmm0, 0xaa);
   xmm4 = _mm_shufflelo_epi16(xmm0, 0xff);
-  
+
   xmm1 = _mm_shuffle_epi32(xmm1, 0x00);
   xmm2 = _mm_shuffle_epi32(xmm2, 0x00);
   xmm3 = _mm_shuffle_epi32(xmm3, 0x00);
@@ -64,49 +64,49 @@ static inline  void volk_16i_permute_and_scalar_add_a_sse2(short* target,  short
 
     xmm0 = _mm_add_epi16(xmm0, xmm5);
     xmm6 = _mm_add_epi16(xmm6, xmm7);
-    
+
     p_permute_indexes += 8;
-    
+
     xmm0 = _mm_add_epi16(xmm0, xmm6);
-    
+
     xmm5 = _mm_load_si128(p_cntl0);
     xmm6 = _mm_load_si128(p_cntl1);
     xmm7 = _mm_load_si128(p_cntl2);
-    
+
     xmm5 = _mm_and_si128(xmm5, xmm1);
     xmm6 = _mm_and_si128(xmm6, xmm2);
     xmm7 = _mm_and_si128(xmm7, xmm3);
-    
+
     xmm0 = _mm_add_epi16(xmm0, xmm5);
-    
+
     xmm5 = _mm_load_si128(p_cntl3);
-    
+
     xmm6 = _mm_add_epi16(xmm6, xmm7);
 
     p_cntl0 += 1;
-    
+
     xmm5 = _mm_and_si128(xmm5, xmm4);
-    
+
     xmm0 = _mm_add_epi16(xmm0, xmm6);
-    
+
     p_cntl1 += 1;
     p_cntl2 += 1;
-    
-    xmm0 = _mm_add_epi16(xmm0, xmm5); 
-    
+
+    xmm0 = _mm_add_epi16(xmm0, xmm5);
+
     p_cntl3 += 1;
 
     _mm_store_si128(p_target, xmm0);
-    
+
     p_target += 1;
   }
-	
-	
-	
-	
+
+
+
+
 
   for(i = bound * 8; i < (bound * 8) + leftovers; ++i) {
-    target[i] = src0[permute_indexes[i]] 
+    target[i] = src0[permute_indexes[i]]
       + (cntl0[i] & scalars[0])
       + (cntl1[i] & scalars[1])
       + (cntl2[i] & scalars[2])
@@ -118,18 +118,18 @@ static inline  void volk_16i_permute_and_scalar_add_a_sse2(short* target,  short
 
 #ifdef LV_HAVE_GENERIC
 static inline void volk_16i_permute_and_scalar_add_a_generic(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) {
-	
+
 	int i = 0;
-	
+
 	int bound = num_bytes >> 1;
 
 	for(i = 0; i < bound; ++i) {
-		target[i] = src0[permute_indexes[i]] 
+		target[i] = src0[permute_indexes[i]]
 			+ (cntl0[i] & scalars[0])
 			+ (cntl1[i] & scalars[1])
 			+ (cntl2[i] & scalars[2])
 			+ (cntl3[i] & scalars[3]);
-		
+
 	}
 }
 
diff --git a/volk/include/volk/volk_16i_s32f_convert_32f_a.h b/volk/include/volk/volk_16i_s32f_convert_32f_a.h
index 0555fdf002..7108ff6590 100644
--- a/volk/include/volk/volk_16i_s32f_convert_32f_a.h
+++ b/volk/include/volk/volk_16i_s32f_convert_32f_a.h
@@ -17,7 +17,7 @@
 static inline void volk_16i_s32f_convert_32f_a_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int eighthPoints = num_points / 8;
-    
+
      float* outputVectorPtr = outputVector;
     __m128 invScalar = _mm_set_ps1(1.0/scalar);
     int16_t* inputPtr = (int16_t*)inputVector;
@@ -36,7 +36,7 @@ static inline void volk_16i_s32f_convert_32f_a_sse4_1(float* outputVector, const
       // Convert the lower 4 values into 32 bit words
       inputVal = _mm_cvtepi16_epi32(inputVal);
       inputVal2 = _mm_cvtepi16_epi32(inputVal2);
-      
+
       ret = _mm_cvtepi32_ps(inputVal);
       ret = _mm_mul_ps(ret, invScalar);
       _mm_storeu_ps(outputVectorPtr, ret);
@@ -71,7 +71,7 @@ static inline void volk_16i_s32f_convert_32f_a_sse4_1(float* outputVector, const
 static inline void volk_16i_s32f_convert_32f_a_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
-    
+
     float* outputVectorPtr = outputVector;
     __m128 invScalar = _mm_set_ps1(1.0/scalar);
     int16_t* inputPtr = (int16_t*)inputVector;
@@ -79,7 +79,7 @@ static inline void volk_16i_s32f_convert_32f_a_sse(float* outputVector, const in
 
     for(;number < quarterPoints; number++){
       ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), (float)(inputPtr[1]), (float)(inputPtr[0]));
-      
+
       ret = _mm_mul_ps(ret, invScalar);
       _mm_storeu_ps(outputVectorPtr, ret);
 
diff --git a/volk/include/volk/volk_16i_s32f_convert_32f_u.h b/volk/include/volk/volk_16i_s32f_convert_32f_u.h
index d34acc0913..4ce8e8f35b 100644
--- a/volk/include/volk/volk_16i_s32f_convert_32f_u.h
+++ b/volk/include/volk/volk_16i_s32f_convert_32f_u.h
@@ -18,7 +18,7 @@
 static inline void volk_16i_s32f_convert_32f_u_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int eighthPoints = num_points / 8;
-    
+
      float* outputVectorPtr = outputVector;
     __m128 invScalar = _mm_set_ps1(1.0/scalar);
     int16_t* inputPtr = (int16_t*)inputVector;
@@ -37,7 +37,7 @@ static inline void volk_16i_s32f_convert_32f_u_sse4_1(float* outputVector, const
       // Convert the lower 4 values into 32 bit words
       inputVal = _mm_cvtepi16_epi32(inputVal);
       inputVal2 = _mm_cvtepi16_epi32(inputVal2);
-      
+
       ret = _mm_cvtepi32_ps(inputVal);
       ret = _mm_mul_ps(ret, invScalar);
       _mm_storeu_ps(outputVectorPtr, ret);
@@ -73,7 +73,7 @@ static inline void volk_16i_s32f_convert_32f_u_sse4_1(float* outputVector, const
 static inline void volk_16i_s32f_convert_32f_u_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
-    
+
     float* outputVectorPtr = outputVector;
     __m128 invScalar = _mm_set_ps1(1.0/scalar);
     int16_t* inputPtr = (int16_t*)inputVector;
@@ -81,7 +81,7 @@ static inline void volk_16i_s32f_convert_32f_u_sse(float* outputVector, const in
 
     for(;number < quarterPoints; number++){
       ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), (float)(inputPtr[1]), (float)(inputPtr[0]));
-      
+
       ret = _mm_mul_ps(ret, invScalar);
       _mm_storeu_ps(outputVectorPtr, ret);
 
diff --git a/volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h
index 2688aff04e..0d84985530 100644
--- a/volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h
+++ b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h
@@ -3,7 +3,7 @@
 
 
 #include<inttypes.h>
-#include<stdio.h>	
+#include<stdio.h>
 
 
 
@@ -14,7 +14,7 @@
 #include<emmintrin.h>
 
 static inline  void volk_16i_x4_quad_max_star_16i_a_sse2(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) {
-	
+
 
 
 
@@ -23,41 +23,41 @@ static inline  void volk_16i_x4_quad_max_star_16i_a_sse2(short* target, short* s
 	int bound = (num_bytes >> 4);
 	int bound_copy = bound;
 	int leftovers = (num_bytes >> 1) & 7;
-	
+
 	__m128i *p_target, *p_src0, *p_src1, *p_src2, *p_src3;
 	p_target = (__m128i*) target;
 	p_src0 =  (__m128i*)src0;
 	p_src1 =  (__m128i*)src1;
 	p_src2 =  (__m128i*)src2;
 	p_src3 =  (__m128i*)src3;
-	
-	
+
+
 
 	__m128i xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
 
 	while(bound_copy > 0) {
-	 
+
 	  xmm1 = _mm_load_si128(p_src0);
 	  xmm2 = _mm_load_si128(p_src1);
 	  xmm3 = _mm_load_si128(p_src2);
 	  xmm4 = _mm_load_si128(p_src3);
-	  
+
 	  xmm5 = _mm_setzero_si128();
 	  xmm6 = _mm_setzero_si128();
 	  xmm7 = xmm1;
 	  xmm8 = xmm3;
-	  
-	  
+
+
 	  xmm1 = _mm_sub_epi16(xmm2, xmm1);
 
-	  
+
 
 	  xmm3 = _mm_sub_epi16(xmm4, xmm3);
 
 	  xmm5 = _mm_cmpgt_epi16(xmm1, xmm5);
 	  xmm6 = _mm_cmpgt_epi16(xmm3, xmm6);
 
-	  
+
 
 	  xmm2 = _mm_and_si128(xmm5, xmm2);
 	  xmm4 = _mm_and_si128(xmm6, xmm4);
@@ -67,7 +67,7 @@ static inline  void volk_16i_x4_quad_max_star_16i_a_sse2(short* target, short* s
 	  xmm5 = _mm_add_epi16(xmm2, xmm5);
 	  xmm6 = _mm_add_epi16(xmm4, xmm6);
 
-	  
+
 	  xmm1 = _mm_xor_si128(xmm1, xmm1);
 	  xmm2 = xmm5;
 	  xmm5 = _mm_sub_epi16(xmm6, xmm5);
@@ -76,23 +76,23 @@ static inline  void volk_16i_x4_quad_max_star_16i_a_sse2(short* target, short* s
 
 	  xmm1 = _mm_cmpgt_epi16(xmm5, xmm1);
 	  p_src1 += 1;
-	  
+
 	  xmm6 = _mm_and_si128(xmm1, xmm6);
-	  	  
+
 	  xmm1 = _mm_andnot_si128(xmm1, xmm2);
 	  p_src2 += 1;
 
 
-	  
+
 	  xmm1 = _mm_add_epi16(xmm6, xmm1);
 	  p_src3 += 1;
 
-	  
+
 	  _mm_store_si128(p_target, xmm1);
 	  p_target += 1;
-      
+
 	}
-	
+
 
 	/*asm volatile
 		(
@@ -111,25 +111,25 @@ static inline  void volk_16i_x4_quad_max_star_16i_a_sse2(short* target, short* s
 		 "movaps %%xmm3, %%xmm8\n\t"
 		 "psubw %%xmm2, %%xmm1\n\t"
 		 "psubw %%xmm4, %%xmm3\n\t"
-		 
+
 		 "pcmpgtw %%xmm1, %%xmm5\n\t"
 		 "pcmpgtw %%xmm3, %%xmm6\n\t"
-		 
+
 		 "pand %%xmm5, %%xmm2\n\t"
 		 "pand %%xmm6, %%xmm4\n\t"
 		 "pandn %%xmm7, %%xmm5\n\t"
 		 "pandn %%xmm8, %%xmm6\n\t"
-		 
+
 		 "paddw %%xmm2, %%xmm5\n\t"
 		 "paddw %%xmm4, %%xmm6\n\t"
 
 		 "pxor %%xmm1, %%xmm1\n\t"
 		 "movaps %%xmm5, %%xmm2\n\t"
-		 
+
 		 "psubw %%xmm6, %%xmm5\n\t"
 		 "add $16, %[src0]\n\t"
 		 "add $-1, %[bound]\n\t"
-	  
+
 		 "pcmpgtw %%xmm5, %%xmm1\n\t"
 		 "add $16, %[src1]\n\t"
 
@@ -144,13 +144,13 @@ static inline  void volk_16i_x4_quad_max_star_16i_a_sse2(short* target, short* s
 		 "movaps %%xmm1, (%[target])\n\t"
 		 "addw $16, %[target]\n\t"
 		 "jmp volk_16i_x4_quad_max_star_16i_a_sse2_L1\n\t"
-		 
+
 		 "volk_16i_x4_quad_max_star_16i_a_sse2_END:\n\t"
 		 :
 		 :[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), [src2]"r"(src2), [src3]"r"(src3), [target]"r"(target)
 		 :
 		 );
-	*/ 
+	*/
 
 	short temp0 = 0;
 	short temp1 = 0;
@@ -169,11 +169,11 @@ static inline  void volk_16i_x4_quad_max_star_16i_a_sse2(short* target, short* s
 
 #ifdef LV_HAVE_GENERIC
 static inline void volk_16i_x4_quad_max_star_16i_a_generic(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) {
-	
+
 	int i = 0;
-	
+
 	int bound = num_bytes >> 1;
-	
+
 	short temp0 = 0;
 	short temp1 = 0;
 	for(i = 0; i < bound; ++i) {
diff --git a/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h
index e4c9f17ed3..5560b92d92 100644
--- a/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h
+++ b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h
@@ -3,7 +3,7 @@
 
 
 #include<inttypes.h>
-#include<stdio.h>	
+#include<stdio.h>
 
 
 
@@ -14,7 +14,7 @@
 #include<emmintrin.h>
 
 static inline  void volk_16i_x5_add_quad_16i_x4_a_sse2(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) {
-  
+
   __m128i xmm0, xmm1, xmm2, xmm3, xmm4;
   __m128i *p_target0, *p_target1, *p_target2, *p_target3,  *p_src0, *p_src1, *p_src2, *p_src3, *p_src4;
   p_target0 = (__m128i*)target0;
@@ -39,16 +39,16 @@ static inline  void volk_16i_x5_add_quad_16i_x4_a_sse2(short* target0, short* ta
     xmm2 = _mm_load_si128(p_src2);
     xmm3 = _mm_load_si128(p_src3);
     xmm4 = _mm_load_si128(p_src4);
-    
+
     p_src0 += 1;
     p_src1 += 1;
-    
+
     xmm1 = _mm_add_epi16(xmm0, xmm1);
     xmm2 = _mm_add_epi16(xmm0, xmm2);
     xmm3 = _mm_add_epi16(xmm0, xmm3);
     xmm4 = _mm_add_epi16(xmm0, xmm4);
-    
-    
+
+
     p_src2 += 1;
     p_src3 += 1;
     p_src4 += 1;
@@ -57,7 +57,7 @@ static inline  void volk_16i_x5_add_quad_16i_x4_a_sse2(short* target0, short* ta
     _mm_store_si128(p_target1, xmm2);
     _mm_store_si128(p_target2, xmm3);
     _mm_store_si128(p_target3, xmm4);
-    
+
     p_target0 += 1;
     p_target1 += 1;
     p_target2 += 1;
@@ -97,9 +97,9 @@ static inline  void volk_16i_x5_add_quad_16i_x4_a_sse2(short* target0, short* ta
 		 :[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), [src2]"r"(src2), [src3]"r"(src3), [src4]"r"(src4), [target0]"r"(target0), [target1]"r"(target1), [target2]"r"(target2), [target3]"r"(target3)
 		 :"xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
 		 );
-		 
+
     */
-	 
+
 
   for(i = bound * 8; i < (bound * 8) + leftovers; ++i) {
     target0[i] = src0[i] + src1[i];
@@ -114,9 +114,9 @@ static inline  void volk_16i_x5_add_quad_16i_x4_a_sse2(short* target0, short* ta
 #ifdef LV_HAVE_GENERIC
 
 static inline void volk_16i_x5_add_quad_16i_x4_a_generic(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) {
-	
+
 	int i = 0;
-	
+
 	int bound = num_bytes >> 1;
 
 	for(i = 0; i < bound; ++i) {
diff --git a/volk/include/volk/volk_16ic_deinterleave_16i_x2_a.h b/volk/include/volk/volk_16ic_deinterleave_16i_x2_a.h
index cdd60235e6..f8aa30874f 100644
--- a/volk/include/volk/volk_16ic_deinterleave_16i_x2_a.h
+++ b/volk/include/volk/volk_16ic_deinterleave_16i_x2_a.h
@@ -71,7 +71,7 @@ static inline void volk_16ic_deinterleave_16i_x2_a_sse2(int16_t* iBuffer, int16_
   __m128i highMask = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0);
 
   unsigned int eighthPoints = num_points / 8;
- 
+
   for(number = 0; number < eighthPoints; number++){
     complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);  complexVectorPtr += 8;
     complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);  complexVectorPtr += 8;
diff --git a/volk/include/volk/volk_16ic_deinterleave_real_16i_a.h b/volk/include/volk/volk_16ic_deinterleave_real_16i_a.h
index 2b99e068e9..bac1f2e4b0 100644
--- a/volk/include/volk/volk_16ic_deinterleave_real_16i_a.h
+++ b/volk/include/volk/volk_16ic_deinterleave_real_16i_a.h
@@ -64,7 +64,7 @@ static inline void volk_16ic_deinterleave_real_16i_a_sse2(int16_t* iBuffer, cons
   __m128i highMask = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0);
 
   unsigned int eighthPoints = num_points / 8;
- 
+
   for(number = 0; number < eighthPoints; number++){
     complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);  complexVectorPtr += 8;
     complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);  complexVectorPtr += 8;
diff --git a/volk/include/volk/volk_16ic_magnitude_16i_a.h b/volk/include/volk/volk_16ic_magnitude_16i_a.h
index a6951e9679..317075e85e 100644
--- a/volk/include/volk/volk_16ic_magnitude_16i_a.h
+++ b/volk/include/volk/volk_16ic_magnitude_16i_a.h
@@ -17,7 +17,7 @@
 static inline void volk_16ic_magnitude_16i_a_sse3(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
-   
+
   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
   int16_t* magnitudeVectorPtr = magnitudeVector;
 
@@ -35,7 +35,7 @@ static inline void volk_16ic_magnitude_16i_a_sse3(int16_t* magnitudeVector, cons
     inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
     inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
     inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
-      
+
     inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
     inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
     inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
@@ -106,7 +106,7 @@ static inline void volk_16ic_magnitude_16i_a_sse(int16_t* magnitudeVector, const
     inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
     inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
     inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
-      
+
     cplxValue1 = _mm_load_ps(inputFloatBuffer);
     complexVectorPtr += 4;
 
diff --git a/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a.h b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a.h
index e73d405e02..1300395ff0 100644
--- a/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a.h
+++ b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a.h
@@ -20,7 +20,7 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a_sse(float* iBuffer, floa
     float* qBufferPtr = qBuffer;
 
     uint64_t number = 0;
-    const uint64_t quarterPoints = num_points / 4;    
+    const uint64_t quarterPoints = num_points / 4;
     __m128 cplxValue1, cplxValue2, iValue, qValue;
 
     __m128 invScalar = _mm_set_ps1(1.0/scalar);
@@ -29,12 +29,12 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a_sse(float* iBuffer, floa
     __VOLK_ATTR_ALIGNED(16) float floatBuffer[8];
 
     for(;number < quarterPoints; number++){
-      
+
       floatBuffer[0] = (float)(complexVectorPtr[0]);
       floatBuffer[1] = (float)(complexVectorPtr[1]);
       floatBuffer[2] = (float)(complexVectorPtr[2]);
       floatBuffer[3] = (float)(complexVectorPtr[3]);
-      
+
       floatBuffer[4] = (float)(complexVectorPtr[4]);
       floatBuffer[5] = (float)(complexVectorPtr[5]);
       floatBuffer[6] = (float)(complexVectorPtr[6]);
diff --git a/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a.h b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a.h
index 1630cb0ed2..5e2d82b947 100644
--- a/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a.h
+++ b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a.h
@@ -18,7 +18,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer,
   float* iBufferPtr = iBuffer;
 
   unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;    
+  const unsigned int quarterPoints = num_points / 4;
 
   __m128 iFloatValue;
 
@@ -49,7 +49,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer,
     *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
     sixteenTComplexVectorPtr++;
   }
-    
+
 }
 #endif /* LV_HAVE_SSE4_1 */
 
@@ -66,7 +66,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, co
   float* iBufferPtr = iBuffer;
 
   unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;    
+  const unsigned int quarterPoints = num_points / 4;
   __m128 iValue;
 
   const float iScalar = 1.0/scalar;
@@ -77,7 +77,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, co
 
   for(;number < quarterPoints; number++){
     floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2;
-    floatBuffer[1] = (float)(*complexVectorPtr); complexVectorPtr += 2; 
+    floatBuffer[1] = (float)(*complexVectorPtr); complexVectorPtr += 2;
     floatBuffer[2] = (float)(*complexVectorPtr); complexVectorPtr += 2;
     floatBuffer[3] = (float)(*complexVectorPtr); complexVectorPtr += 2;
 
@@ -96,7 +96,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, co
     *iBufferPtr++ = ((float)(*complexVectorPtr++)) * iScalar;
     complexVectorPtr++;
   }
-    
+
 }
 #endif /* LV_HAVE_SSE */
 
diff --git a/volk/include/volk/volk_16ic_s32f_magnitude_32f_a.h b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a.h
index 35406e2cbb..d20eea1a79 100644
--- a/volk/include/volk/volk_16ic_s32f_magnitude_32f_a.h
+++ b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a.h
@@ -18,7 +18,7 @@
 static inline void volk_16ic_s32f_magnitude_32f_a_sse3(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
-  
+
   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
   float* magnitudeVectorPtr = magnitudeVector;
 
@@ -34,7 +34,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a_sse3(float* magnitudeVector, c
     inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
     inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
     inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
-      
+
     inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
     inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
     inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
@@ -56,7 +56,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a_sse3(float* magnitudeVector, c
     result = _mm_sqrt_ps(result); // Square root the values
 
     _mm_store_ps(magnitudeVectorPtr, result);
-      
+
     magnitudeVectorPtr += 4;
   }
 
@@ -99,7 +99,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a_sse(float* magnitudeVector, co
     inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
     inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
     inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
-      
+
     inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
     inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
     inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
@@ -107,7 +107,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a_sse(float* magnitudeVector, co
 
     cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
     cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
-    
+
     re = _mm_shuffle_ps(cplxValue1, cplxValue2, 0x88);
     im = _mm_shuffle_ps(cplxValue1, cplxValue2, 0xdd);
 
@@ -124,7 +124,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a_sse(float* magnitudeVector, co
     result = _mm_sqrt_ps(result); // Square root the values
 
     _mm_store_ps(magnitudeVectorPtr, result);
-      
+
     magnitudeVectorPtr += 4;
   }
 
@@ -138,7 +138,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a_sse(float* magnitudeVector, co
   }
 }
 
- 
+
 #endif /* LV_HAVE_SSE */
 
 #ifdef LV_HAVE_GENERIC
diff --git a/volk/include/volk/volk_16u_byteswap_a.h b/volk/include/volk/volk_16u_byteswap_a.h
index 75c7ef0f3c..fc3eb5fa7a 100644
--- a/volk/include/volk/volk_16u_byteswap_a.h
+++ b/volk/include/volk/volk_16u_byteswap_a.h
@@ -31,9 +31,9 @@ static inline void volk_16u_byteswap_a_sse2(uint16_t* intsToSwap, unsigned int n
     inputPtr += 8;
   }
 
-  
+
   // Byteswap any remaining points:
-  number = eighthPoints*8;  
+  number = eighthPoints*8;
   for(; number < num_points; number++){
     uint16_t outputVal = *inputPtr;
     outputVal = (((outputVal >> 8) & 0xff) | ((outputVal << 8) & 0xff00));
diff --git a/volk/include/volk/volk_32f_accumulator_s32f_a.h b/volk/include/volk/volk_32f_accumulator_s32f_a.h
index 7ce0d1c80d..78364d0a01 100644
--- a/volk/include/volk/volk_32f_accumulator_s32f_a.h
+++ b/volk/include/volk/volk_32f_accumulator_s32f_a.h
@@ -20,13 +20,13 @@ static inline void volk_32f_accumulator_s32f_a_sse(float* result, const float* i
 
   const float* aPtr = inputBuffer;
   __VOLK_ATTR_ALIGNED(16) float tempBuffer[4];
-  
+
   __m128 accumulator = _mm_setzero_ps();
   __m128 aVal = _mm_setzero_ps();
 
   for(;number < quarterPoints; number++){
     aVal = _mm_load_ps(aPtr);
-    accumulator = _mm_add_ps(accumulator, aVal); 
+    accumulator = _mm_add_ps(accumulator, aVal);
     aPtr += 4;
   }
   _mm_store_ps(tempBuffer,accumulator); // Store the results back into the C container
@@ -34,7 +34,7 @@ static inline void volk_32f_accumulator_s32f_a_sse(float* result, const float* i
   returnValue += tempBuffer[1];
   returnValue += tempBuffer[2];
   returnValue += tempBuffer[3];
-  
+
   number = quarterPoints * 4;
   for(;number < num_points; number++){
     returnValue += (*aPtr++);
diff --git a/volk/include/volk/volk_32f_convert_64f_a.h b/volk/include/volk/volk_32f_convert_64f_a.h
index dda6464096..2c469ac421 100644
--- a/volk/include/volk/volk_32f_convert_64f_a.h
+++ b/volk/include/volk/volk_32f_convert_64f_a.h
@@ -16,7 +16,7 @@ static inline void volk_32f_convert_64f_a_sse2(double* outputVector, const float
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
-    
+
   const float* inputVectorPtr = (const float*)inputVector;
   double* outputVectorPtr = outputVector;
   __m128d ret;
@@ -24,7 +24,7 @@ static inline void volk_32f_convert_64f_a_sse2(double* outputVector, const float
 
   for(;number < quarterPoints; number++){
     inputVal = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
- 
+
     ret = _mm_cvtps_pd(inputVal);
 
     _mm_store_pd(outputVectorPtr, ret);
@@ -38,7 +38,7 @@ static inline void volk_32f_convert_64f_a_sse2(double* outputVector, const float
     outputVectorPtr += 2;
   }
 
-  number = quarterPoints * 4;    
+  number = quarterPoints * 4;
   for(; number < num_points; number++){
     outputVector[number] = (double)(inputVector[number]);
   }
diff --git a/volk/include/volk/volk_32f_convert_64f_u.h b/volk/include/volk/volk_32f_convert_64f_u.h
index 387baa3b95..10d8a4f6c0 100644
--- a/volk/include/volk/volk_32f_convert_64f_u.h
+++ b/volk/include/volk/volk_32f_convert_64f_u.h
@@ -16,7 +16,7 @@ static inline void volk_32f_convert_64f_u_sse2(double* outputVector, const float
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
-    
+
   const float* inputVectorPtr = (const float*)inputVector;
   double* outputVectorPtr = outputVector;
   __m128d ret;
@@ -24,7 +24,7 @@ static inline void volk_32f_convert_64f_u_sse2(double* outputVector, const float
 
   for(;number < quarterPoints; number++){
     inputVal = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
- 
+
     ret = _mm_cvtps_pd(inputVal);
 
     _mm_storeu_pd(outputVectorPtr, ret);
@@ -38,7 +38,7 @@ static inline void volk_32f_convert_64f_u_sse2(double* outputVector, const float
     outputVectorPtr += 2;
   }
 
-  number = quarterPoints * 4;    
+  number = quarterPoints * 4;
   for(; number < num_points; number++){
     outputVector[number] = (double)(inputVector[number]);
   }
diff --git a/volk/include/volk/volk_32f_index_max_16u_a.h b/volk/include/volk/volk_32f_index_max_16u_a.h
index 0c43a50815..b9ca1dd3e7 100644
--- a/volk/include/volk/volk_32f_index_max_16u_a.h
+++ b/volk/include/volk/volk_32f_index_max_16u_a.h
@@ -52,7 +52,7 @@ static inline void volk_32f_index_max_16u_a_sse4_1(unsigned int* target, const f
     }
 
     number = quarterPoints * 4;
-    for(;number < num_points; number++){      
+    for(;number < num_points; number++){
       if(src0[number] > max){
 	index = number;
 	max = src0[number];
@@ -111,7 +111,7 @@ static inline void volk_32f_index_max_16u_a_sse(unsigned int* target, const floa
     }
 
     number = quarterPoints * 4;
-    for(;number < num_points; number++){      
+    for(;number < num_points; number++){
       if(src0[number] > max){
 	index = number;
 	max = src0[number];
@@ -128,11 +128,11 @@ static inline void volk_32f_index_max_16u_a_generic(unsigned int* target, const
   if(num_points > 0){
     float max = src0[0];
     unsigned int index = 0;
-    
-    unsigned int i = 1; 
-    
+
+    unsigned int i = 1;
+
     for(; i < num_points; ++i) {
-      
+
       if(src0[i] > max){
 	index = i;
 	max = src0[i];
diff --git a/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a.h b/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a.h
index b25df75a17..43713f8b5a 100644
--- a/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a.h
+++ b/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a.h
@@ -46,7 +46,7 @@ static inline void volk_32f_s32f_32f_fm_detect_32f_a_sse(float* outputVector, co
     inPtr++;
     outPtr++;
   }
-    
+
   for (; number < quarterPoints; number++) {
     // Load data
     next3old1 = _mm_loadu_ps((float*) (inPtr-1));
@@ -65,7 +65,7 @@ static inline void volk_32f_s32f_32f_fm_detect_32f_a_sse(float* outputVector, co
     _mm_store_ps(outPtr,next3old1); // Store the results back into the output
     outPtr += 4;
   }
-    
+
   for (number = (4 > (quarterPoints*4) ? 4 : (4 * quarterPoints)); number < num_points; number++) {
     *outPtr = *(inPtr) - *(inPtr-1);
     if (*outPtr >  bound) *outPtr -= 2*bound;
@@ -73,7 +73,7 @@ static inline void volk_32f_s32f_32f_fm_detect_32f_a_sse(float* outputVector, co
     inPtr++;
     outPtr++;
   }
-    
+
   *saveValue = inputVector[num_points-1];
 }
 #endif /* LV_HAVE_SSE */
@@ -94,14 +94,14 @@ static inline void volk_32f_s32f_32f_fm_detect_32f_a_generic(float* outputVector
   unsigned int number = 0;
   float* outPtr = outputVector;
   const float* inPtr = inputVector;
-      
+
   // Do the first 1 by hand since we're going in from the saveValue:
   *outPtr = *inPtr - *saveValue;
   if (*outPtr >  bound) *outPtr -= 2*bound;
   if (*outPtr < -bound) *outPtr += 2*bound;
   inPtr++;
   outPtr++;
-    
+
   for (number = 1; number < num_points; number++) {
     *outPtr = *(inPtr) - *(inPtr-1);
     if (*outPtr >  bound) *outPtr -= 2*bound;
@@ -109,7 +109,7 @@ static inline void volk_32f_s32f_32f_fm_detect_32f_a_generic(float* outputVector
     inPtr++;
     outPtr++;
   }
-    
+
   *saveValue = inputVector[num_points-1];
 }
 #endif /* LV_HAVE_GENERIC */
diff --git a/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a.h b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a.h
index b1902a8c00..db61e359d6 100644
--- a/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a.h
+++ b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a.h
@@ -23,7 +23,7 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a_sse(float* nois
 
   const float* dataPointsPtr = realDataPoints;
   __VOLK_ATTR_ALIGNED(16) float avgPointsVector[4];
-    
+
   __m128 dataPointsVal;
   __m128 avgPointsVal = _mm_setzero_ps();
   // Calculate the sum (for mean) for all points
@@ -73,11 +73,11 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a_sse(float* nois
 
     // Mask off the items that exceed the mean amplitude and add the avg Points that do not exceed the mean amplitude
     avgPointsVal = _mm_add_ps(avgPointsVal, _mm_and_ps(compareMask, dataPointsVal));
-      
+
     // Count the number of bins which do not exceed the mean amplitude
     vValidBinCount = _mm_add_ps(vValidBinCount, _mm_and_ps(compareMask, vOnesVector));
   }
-    
+
   // Calculate the mean from the remaining data points
   _mm_store_ps(avgPointsVector, avgPointsVal);
 
@@ -104,7 +104,7 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a_sse(float* nois
       validBinCount += 1.0;
     }
   }
-    
+
   float localNoiseFloorAmplitude = 0;
   if(validBinCount > 0.0){
     localNoiseFloorAmplitude = sumMean / validBinCount;
diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_a.h b/volk/include/volk/volk_32f_s32f_convert_16i_a.h
index a249596780..9df4946f24 100644
--- a/volk/include/volk/volk_32f_s32f_convert_16i_a.h
+++ b/volk/include/volk/volk_32f_s32f_convert_16i_a.h
@@ -19,7 +19,7 @@ static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const
   unsigned int number = 0;
 
   const unsigned int eighthPoints = num_points / 8;
-    
+
   const float* inputVectorPtr = (const float*)inputVector;
   int16_t* outputVectorPtr = outputVector;
 
@@ -51,7 +51,7 @@ static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const
     outputVectorPtr += 8;
   }
 
-  number = eighthPoints * 8;    
+  number = eighthPoints * 8;
   for(; number < num_points; number++){
     r = inputVector[number] * scalar;
     if(r > max_val)
@@ -76,7 +76,7 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
-    
+
   const float* inputVectorPtr = (const float*)inputVector;
   int16_t* outputVectorPtr = outputVector;
 
@@ -105,7 +105,7 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const
     *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
   }
 
-  number = quarterPoints * 4;    
+  number = quarterPoints * 4;
   for(; number < num_points; number++){
     r = inputVector[number] * scalar;
     if(r > max_val)
diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_u.h b/volk/include/volk/volk_32f_s32f_convert_16i_u.h
index f58158041d..56e42c9bd5 100644
--- a/volk/include/volk/volk_32f_s32f_convert_16i_u.h
+++ b/volk/include/volk/volk_32f_s32f_convert_16i_u.h
@@ -19,7 +19,7 @@ static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const
   unsigned int number = 0;
 
   const unsigned int eighthPoints = num_points / 8;
-    
+
   const float* inputVectorPtr = (const float*)inputVector;
   int16_t* outputVectorPtr = outputVector;
 
@@ -51,7 +51,7 @@ static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const
     outputVectorPtr += 8;
   }
 
-  number = eighthPoints * 8;    
+  number = eighthPoints * 8;
   for(; number < num_points; number++){
     r = inputVector[number] * scalar;
     if(r > max_val)
@@ -77,7 +77,7 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
-    
+
   const float* inputVectorPtr = (const float*)inputVector;
   int16_t* outputVectorPtr = outputVector;
 
@@ -106,7 +106,7 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const
     *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
   }
 
-  number = quarterPoints * 4;    
+  number = quarterPoints * 4;
   for(; number < num_points; number++){
     r = inputVector[number] * scalar;
     if(r > max_val)
diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_a.h b/volk/include/volk/volk_32f_s32f_convert_32i_a.h
index 15fa282fba..38e6b2e745 100644
--- a/volk/include/volk/volk_32f_s32f_convert_32i_a.h
+++ b/volk/include/volk/volk_32f_s32f_convert_32i_a.h
@@ -18,7 +18,7 @@ static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const
   unsigned int number = 0;
 
   const unsigned int eighthPoints = num_points / 8;
-    
+
   const float* inputVectorPtr = (const float*)inputVector;
   int32_t* outputVectorPtr = outputVector;
 
@@ -42,7 +42,7 @@ static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const
     outputVectorPtr += 8;
   }
 
-  number = eighthPoints * 8;    
+  number = eighthPoints * 8;
   for(; number < num_points; number++){
     r = inputVector[number] * scalar;
     if(r > max_val)
@@ -67,7 +67,7 @@ static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
-    
+
   const float* inputVectorPtr = (const float*)inputVector;
   int32_t* outputVectorPtr = outputVector;
 
@@ -91,7 +91,7 @@ static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const
     outputVectorPtr += 4;
   }
 
-  number = quarterPoints * 4;    
+  number = quarterPoints * 4;
   for(; number < num_points; number++){
     r = inputVector[number] * scalar;
     if(r > max_val)
@@ -116,7 +116,7 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
-    
+
   const float* inputVectorPtr = (const float*)inputVector;
   int32_t* outputVectorPtr = outputVector;
 
@@ -144,7 +144,7 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const
     *outputVectorPtr++ = (int32_t)(outputFloatBuffer[3]);
   }
 
-  number = quarterPoints * 4;    
+  number = quarterPoints * 4;
   for(; number < num_points; number++){
     r = inputVector[number] * scalar;
     if(r > max_val)
diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_u.h b/volk/include/volk/volk_32f_s32f_convert_32i_u.h
index d203546c62..ee15edb464 100644
--- a/volk/include/volk/volk_32f_s32f_convert_32i_u.h
+++ b/volk/include/volk/volk_32f_s32f_convert_32i_u.h
@@ -18,7 +18,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
-    
+
   const float* inputVectorPtr = (const float*)inputVector;
   int32_t* outputVectorPtr = outputVector;
 
@@ -42,7 +42,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const
     outputVectorPtr += 4;
   }
 
-  number = quarterPoints * 4;    
+  number = quarterPoints * 4;
   for(; number < num_points; number++){
     r = inputVector[number] * scalar;
     if(r > max_val)
@@ -68,7 +68,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
-    
+
   const float* inputVectorPtr = (const float*)inputVector;
   int32_t* outputVectorPtr = outputVector;
 
@@ -96,7 +96,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const
     *outputVectorPtr++ = (int32_t)(outputFloatBuffer[3]);
   }
 
-  number = quarterPoints * 4;    
+  number = quarterPoints * 4;
   for(; number < num_points; number++){
     r = inputVector[number] * scalar;
     if(r > max_val)
diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_a.h b/volk/include/volk/volk_32f_s32f_convert_8i_a.h
index 05172171c2..800017d5da 100644
--- a/volk/include/volk/volk_32f_s32f_convert_8i_a.h
+++ b/volk/include/volk/volk_32f_s32f_convert_8i_a.h
@@ -18,7 +18,7 @@ static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const f
   unsigned int number = 0;
 
   const unsigned int sixteenthPoints = num_points / 16;
-    
+
   const float* inputVectorPtr = (const float*)inputVector;
   int8_t* outputVectorPtr = outputVector;
 
@@ -47,7 +47,7 @@ static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const f
     intInputVal2 = _mm_cvtps_epi32(inputVal2);
     intInputVal3 = _mm_cvtps_epi32(inputVal3);
     intInputVal4 = _mm_cvtps_epi32(inputVal4);
-    
+
     intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
     intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
 
@@ -57,7 +57,7 @@ static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const f
     outputVectorPtr += 16;
   }
 
-  number = sixteenthPoints * 16;    
+  number = sixteenthPoints * 16;
   for(; number < num_points; number++){
     r = inputVector[number] * scalar;
     if(r > max_val)
@@ -82,7 +82,7 @@ static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const fl
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
-    
+
   const float* inputVectorPtr = (const float*)inputVector;
 
   float min_val = -128;
@@ -110,7 +110,7 @@ static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const fl
     *outputVectorPtr++ = (int8_t)(outputFloatBuffer[3]);
   }
 
-  number = quarterPoints * 4;    
+  number = quarterPoints * 4;
   for(; number < num_points; number++){
     r = inputVector[number] * scalar;
     if(r > max_val)
diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_u.h b/volk/include/volk/volk_32f_s32f_convert_8i_u.h
index 12991e9c17..870e9419bb 100644
--- a/volk/include/volk/volk_32f_s32f_convert_8i_u.h
+++ b/volk/include/volk/volk_32f_s32f_convert_8i_u.h
@@ -18,7 +18,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f
   unsigned int number = 0;
 
   const unsigned int sixteenthPoints = num_points / 16;
-    
+
   const float* inputVectorPtr = (const float*)inputVector;
   int8_t* outputVectorPtr = outputVector;
 
@@ -47,7 +47,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f
     intInputVal2 = _mm_cvtps_epi32(inputVal2);
     intInputVal3 = _mm_cvtps_epi32(inputVal3);
     intInputVal4 = _mm_cvtps_epi32(inputVal4);
-    
+
     intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
     intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
 
@@ -57,7 +57,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f
     outputVectorPtr += 16;
   }
 
-  number = sixteenthPoints * 16;    
+  number = sixteenthPoints * 16;
   for(; number < num_points; number++){
     r = inputVector[number] * scalar;
     if(r > max_val)
@@ -83,7 +83,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
-    
+
   const float* inputVectorPtr = (const float*)inputVector;
   int8_t* outputVectorPtr = outputVector;
 
@@ -111,7 +111,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl
     *outputVectorPtr++ = (int8_t)(outputFloatBuffer[3]);
   }
 
-  number = quarterPoints * 4;    
+  number = quarterPoints * 4;
   for(; number < num_points; number++){
     r = inputVector[number] * scalar;
     if(r > max_val)
diff --git a/volk/include/volk/volk_32f_s32f_multiply_32f_a.h b/volk/include/volk/volk_32f_s32f_multiply_32f_a.h
index d1c6f3f652..99b8e68c5b 100644
--- a/volk/include/volk/volk_32f_s32f_multiply_32f_a.h
+++ b/volk/include/volk/volk_32f_s32f_multiply_32f_a.h
@@ -23,11 +23,11 @@ static inline void volk_32f_s32f_multiply_32f_a_sse(float* cVector, const float*
     __m128 aVal, bVal, cVal;
     bVal = _mm_set_ps1(scalar);
     for(;number < quarterPoints; number++){
-      
-      aVal = _mm_load_ps(aPtr); 
-      
-      cVal = _mm_mul_ps(aVal, bVal); 
-      
+
+      aVal = _mm_load_ps(aPtr);
+
+      cVal = _mm_mul_ps(aVal, bVal);
+
       _mm_store_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 4;
@@ -60,11 +60,11 @@ static inline void volk_32f_s32f_multiply_32f_a_avx(float* cVector, const float*
     __m256 aVal, bVal, cVal;
     bVal = _mm256_set1_ps(scalar);
     for(;number < eighthPoints; number++){
-      
-      aVal = _mm256_load_ps(aPtr); 
-      
-      cVal = _mm256_mul_ps(aVal, bVal); 
-      
+
+      aVal = _mm256_load_ps(aPtr);
+
+      cVal = _mm256_mul_ps(aVal, bVal);
+
       _mm256_store_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 8;
diff --git a/volk/include/volk/volk_32f_s32f_multiply_32f_u.h b/volk/include/volk/volk_32f_s32f_multiply_32f_u.h
index 0e700060f7..b3fae9b053 100644
--- a/volk/include/volk/volk_32f_s32f_multiply_32f_u.h
+++ b/volk/include/volk/volk_32f_s32f_multiply_32f_u.h
@@ -23,11 +23,11 @@ static inline void volk_32f_s32f_multiply_32f_u_sse(float* cVector, const float*
     __m128 aVal, bVal, cVal;
     bVal = _mm_set_ps1(scalar);
     for(;number < quarterPoints; number++){
-      
-      aVal = _mm_loadu_ps(aPtr); 
-      
-      cVal = _mm_mul_ps(aVal, bVal); 
-      
+
+      aVal = _mm_loadu_ps(aPtr);
+
+      cVal = _mm_mul_ps(aVal, bVal);
+
       _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 4;
@@ -60,11 +60,11 @@ static inline void volk_32f_s32f_multiply_32f_u_avx(float* cVector, const float*
     __m256 aVal, bVal, cVal;
     bVal = _mm256_set1_ps(scalar);
     for(;number < eighthPoints; number++){
-      
-      aVal = _mm256_loadu_ps(aPtr); 
-      
-      cVal = _mm256_mul_ps(aVal, bVal); 
-      
+
+      aVal = _mm256_loadu_ps(aPtr);
+
+      cVal = _mm256_mul_ps(aVal, bVal);
+
       _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 8;
diff --git a/volk/include/volk/volk_32f_s32f_power_32f_a.h b/volk/include/volk/volk_32f_s32f_power_32f_a.h
index 09c9059619..633ad14b09 100644
--- a/volk/include/volk/volk_32f_s32f_power_32f_a.h
+++ b/volk/include/volk/volk_32f_s32f_power_32f_a.h
@@ -21,7 +21,7 @@
 */
 static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){
   unsigned int number = 0;
-  
+
   float* cPtr = cVector;
   const float* aPtr = aVector;
 
@@ -33,22 +33,22 @@ static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector, const float*
   __m128 negatedValues;
   __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
   __m128 onesMask = _mm_set_ps1(1);
-  
+
   __m128 aVal, cVal;
   for(;number < quarterPoints; number++){
-    
+
     aVal = _mm_load_ps(aPtr);
     signMask = _mm_cmplt_ps(aVal, zeroValue);
     negatedValues = _mm_sub_ps(zeroValue, aVal);
     aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
-    
+
     // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after
     cVal = powf4(aVal, vPower); // Takes each input value to the specified power
 
     cVal = _mm_mul_ps( _mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);
 
     _mm_store_ps(cPtr,cVal); // Store the results back into the C container
-    
+
     aPtr += 4;
     cPtr += 4;
   }
@@ -78,7 +78,7 @@ static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector, const float*
 */
 static inline void volk_32f_s32f_power_32f_a_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){
   unsigned int number = 0;
-  
+
   float* cPtr = cVector;
   const float* aPtr = aVector;
 
@@ -90,22 +90,22 @@ static inline void volk_32f_s32f_power_32f_a_sse(float* cVector, const float* aV
   __m128 negatedValues;
   __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
   __m128 onesMask = _mm_set_ps1(1);
-  
+
   __m128 aVal, cVal;
   for(;number < quarterPoints; number++){
-    
+
     aVal = _mm_load_ps(aPtr);
     signMask = _mm_cmplt_ps(aVal, zeroValue);
     negatedValues = _mm_sub_ps(zeroValue, aVal);
     aVal = _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues) );
-    
+
     // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after
     cVal = powf4(aVal, vPower); // Takes each input value to the specified power
 
     cVal = _mm_mul_ps( _mm_or_ps( _mm_andnot_ps(signMask, onesMask), _mm_and_ps(signMask, negativeOneToPower) ), cVal);
 
     _mm_store_ps(cPtr,cVal); // Store the results back into the C container
-    
+
     aPtr += 4;
     cPtr += 4;
   }
diff --git a/volk/include/volk/volk_32f_s32f_stddev_32f_a.h b/volk/include/volk/volk_32f_s32f_stddev_32f_a.h
index 75fe0cb2ed..98401b2d42 100644
--- a/volk/include/volk/volk_32f_s32f_stddev_32f_a.h
+++ b/volk/include/volk/volk_32f_s32f_stddev_32f_a.h
@@ -29,7 +29,7 @@ static inline void volk_32f_s32f_stddev_32f_a_sse4_1(float* stddev, const float*
     __m128 aVal1, aVal2, aVal3, aVal4;
     __m128 cVal1, cVal2, cVal3, cVal4;
     for(;number < sixteenthPoints; number++) {
-      aVal1 = _mm_load_ps(aPtr); aPtr += 4;    
+      aVal1 = _mm_load_ps(aPtr); aPtr += 4;
       cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1);
 
       aVal2 = _mm_load_ps(aPtr); aPtr += 4;
@@ -47,12 +47,12 @@ static inline void volk_32f_s32f_stddev_32f_a_sse4_1(float* stddev, const float*
 
       squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2
     }
-    _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container  
+    _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
     returnValue = squareBuffer[0];
     returnValue += squareBuffer[1];
     returnValue += squareBuffer[2];
     returnValue += squareBuffer[3];
-  
+
     number = sixteenthPoints * 16;
     for(;number < num_points; number++){
       returnValue += (*aPtr) * (*aPtr);
@@ -93,12 +93,12 @@ static inline void volk_32f_s32f_stddev_32f_a_sse(float* stddev, const float* in
       squareAccumulator = _mm_add_ps(squareAccumulator, aVal);
       aPtr += 4;
     }
-    _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container  
+    _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
     returnValue = squareBuffer[0];
     returnValue += squareBuffer[1];
     returnValue += squareBuffer[2];
     returnValue += squareBuffer[3];
-  
+
     number = quarterPoints * 4;
     for(;number < num_points; number++){
       returnValue += (*aPtr) * (*aPtr);
@@ -125,7 +125,7 @@ static inline void volk_32f_s32f_stddev_32f_a_generic(float* stddev, const float
   if(num_points > 0){
     const float* aPtr = inputBuffer;
     unsigned int number = 0;
-      
+
     for(number = 0; number < num_points; number++){
       returnValue += (*aPtr) * (*aPtr);
       aPtr++;
diff --git a/volk/include/volk/volk_32f_sqrt_32f_a.h b/volk/include/volk/volk_32f_sqrt_32f_a.h
index e44c73cfd7..d9b16fc0fb 100644
--- a/volk/include/volk/volk_32f_sqrt_32f_a.h
+++ b/volk/include/volk/volk_32f_sqrt_32f_a.h
@@ -22,11 +22,11 @@ static inline void volk_32f_sqrt_32f_a_sse(float* cVector, const float* aVector,
 
     __m128 aVal, cVal;
     for(;number < quarterPoints; number++){
-      
-      aVal = _mm_load_ps(aPtr); 
-      
-      cVal = _mm_sqrt_ps(aVal); 
-      
+
+      aVal = _mm_load_ps(aPtr);
+
+      cVal = _mm_sqrt_ps(aVal);
+
       _mm_store_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 4;
diff --git a/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a.h b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a.h
index 20ff676d89..7de32f7b18 100644
--- a/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a.h
+++ b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a.h
@@ -31,7 +31,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a_sse4_1(float* stddev, float
     __m128 aVal1, aVal2, aVal3, aVal4;
     __m128 cVal1, cVal2, cVal3, cVal4;
     for(;number < sixteenthPoints; number++) {
-      aVal1 = _mm_load_ps(aPtr); aPtr += 4;   
+      aVal1 = _mm_load_ps(aPtr); aPtr += 4;
       cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1);
       accumulator = _mm_add_ps(accumulator, aVal1);  // accumulator += x
 
@@ -54,7 +54,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a_sse4_1(float* stddev, float
       squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2
     }
     _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container
-    _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container  
+    _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
     newMean = meanBuffer[0];
     newMean += meanBuffer[1];
     newMean += meanBuffer[2];
@@ -63,7 +63,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a_sse4_1(float* stddev, float
     returnValue += squareBuffer[1];
     returnValue += squareBuffer[2];
     returnValue += squareBuffer[3];
-  
+
     number = sixteenthPoints * 16;
     for(;number < num_points; number++){
       returnValue += (*aPtr) * (*aPtr);
@@ -110,7 +110,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a_sse(float* stddev, float* m
       aPtr += 4;
     }
     _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container
-    _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container  
+    _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
     newMean = meanBuffer[0];
     newMean += meanBuffer[1];
     newMean += meanBuffer[2];
@@ -119,7 +119,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a_sse(float* stddev, float* m
     returnValue += squareBuffer[1];
     returnValue += squareBuffer[2];
     returnValue += squareBuffer[3];
-  
+
     number = quarterPoints * 4;
     for(;number < num_points; number++){
       returnValue += (*aPtr) * (*aPtr);
@@ -149,7 +149,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a_generic(float* stddev, floa
   if(num_points > 0){
     const float* aPtr = inputBuffer;
     unsigned int number = 0;
-    
+
     for(number = 0; number < num_points; number++){
       returnValue += (*aPtr) * (*aPtr);
       newMean += *aPtr++;
diff --git a/volk/include/volk/volk_32f_x2_add_32f_a.h b/volk/include/volk/volk_32f_x2_add_32f_a.h
index 3bc83653b0..51e63e54d2 100644
--- a/volk/include/volk/volk_32f_x2_add_32f_a.h
+++ b/volk/include/volk/volk_32f_x2_add_32f_a.h
@@ -23,12 +23,12 @@ static inline void volk_32f_x2_add_32f_a_sse(float* cVector, const float* aVecto
 
     __m128 aVal, bVal, cVal;
     for(;number < quarterPoints; number++){
-      
-      aVal = _mm_load_ps(aPtr); 
+
+      aVal = _mm_load_ps(aPtr);
       bVal = _mm_load_ps(bPtr);
-      
-      cVal = _mm_add_ps(aVal, bVal); 
-      
+
+      cVal = _mm_add_ps(aVal, bVal);
+
       _mm_store_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 4;
diff --git a/volk/include/volk/volk_32f_x2_add_32f_u.h b/volk/include/volk/volk_32f_x2_add_32f_u.h
index e360a79580..52e8286bc2 100644
--- a/volk/include/volk/volk_32f_x2_add_32f_u.h
+++ b/volk/include/volk/volk_32f_x2_add_32f_u.h
@@ -23,12 +23,12 @@ static inline void volk_32f_x2_add_32f_u_sse(float* cVector, const float* aVecto
 
     __m128 aVal, bVal, cVal;
     for(;number < quarterPoints; number++){
-      
-      aVal = _mm_loadu_ps(aPtr); 
+
+      aVal = _mm_loadu_ps(aPtr);
       bVal = _mm_loadu_ps(bPtr);
-      
-      cVal = _mm_add_ps(aVal, bVal); 
-      
+
+      cVal = _mm_add_ps(aVal, bVal);
+
       _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 4;
diff --git a/volk/include/volk/volk_32f_x2_divide_32f_a.h b/volk/include/volk/volk_32f_x2_divide_32f_a.h
index 52ddfae875..7b60fb22ef 100644
--- a/volk/include/volk/volk_32f_x2_divide_32f_a.h
+++ b/volk/include/volk/volk_32f_x2_divide_32f_a.h
@@ -23,12 +23,12 @@ static inline void volk_32f_x2_divide_32f_a_sse(float* cVector, const float* aVe
 
     __m128 aVal, bVal, cVal;
     for(;number < quarterPoints; number++){
-      
-      aVal = _mm_load_ps(aPtr); 
+
+      aVal = _mm_load_ps(aPtr);
       bVal = _mm_load_ps(bPtr);
-      
-      cVal = _mm_div_ps(aVal, bVal); 
-      
+
+      cVal = _mm_div_ps(aVal, bVal);
+
       _mm_store_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 4;
diff --git a/volk/include/volk/volk_32f_x2_dot_prod_32f_a.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_a.h
index 0c58f2ecfb..448b2fdc01 100644
--- a/volk/include/volk/volk_32f_x2_dot_prod_32f_a.h
+++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_a.h
@@ -18,7 +18,7 @@ static inline void volk_32f_x2_dot_prod_32f_a_generic(float * result, const floa
   for(number = 0; number < num_points; number++){
     dotProduct += ((*aPtr++) * (*bPtr++));
   }
-  
+
   *result = dotProduct;
 }
 
@@ -29,7 +29,7 @@ static inline void volk_32f_x2_dot_prod_32f_a_generic(float * result, const floa
 
 
 static inline void volk_32f_x2_dot_prod_32f_a_sse( float* result, const  float* input, const  float* taps, unsigned int num_points) {
-  
+
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -42,11 +42,11 @@ static inline void volk_32f_x2_dot_prod_32f_a_sse( float* result, const  float*
   __m128 dotProdVal = _mm_setzero_ps();
 
   for(;number < quarterPoints; number++){
-      
-    aVal = _mm_load_ps(aPtr); 
+
+    aVal = _mm_load_ps(aPtr);
     bVal = _mm_load_ps(bPtr);
-      
-    cVal = _mm_mul_ps(aVal, bVal); 
+
+    cVal = _mm_mul_ps(aVal, bVal);
 
     dotProdVal = _mm_add_ps(cVal, dotProdVal);
 
@@ -69,10 +69,10 @@ static inline void volk_32f_x2_dot_prod_32f_a_sse( float* result, const  float*
   }
 
   *result = dotProduct;
-  
+
 }
 
-#endif /*LV_HAVE_SSE*/  
+#endif /*LV_HAVE_SSE*/
 
 #ifdef LV_HAVE_SSE3
 
@@ -91,11 +91,11 @@ static inline void volk_32f_x2_dot_prod_32f_a_sse3(float * result, const float *
   __m128 dotProdVal = _mm_setzero_ps();
 
   for(;number < quarterPoints; number++){
-      
-    aVal = _mm_load_ps(aPtr); 
+
+    aVal = _mm_load_ps(aPtr);
     bVal = _mm_load_ps(bPtr);
-      
-    cVal = _mm_mul_ps(aVal, bVal); 
+
+    cVal = _mm_mul_ps(aVal, bVal);
 
     dotProdVal = _mm_hadd_ps(dotProdVal, cVal);
 
@@ -117,7 +117,7 @@ static inline void volk_32f_x2_dot_prod_32f_a_sse3(float * result, const float *
   }
 
   *result = dotProduct;
-}  
+}
 
 #endif /*LV_HAVE_SSE3*/
 
@@ -140,7 +140,7 @@ static inline void volk_32f_x2_dot_prod_32f_a_sse4_1(float * result, const float
 
   __m128 dotProdVal = _mm_setzero_ps();
 
-  for(;number < sixteenthPoints; number++){      
+  for(;number < sixteenthPoints; number++){
 
     aVal1 = _mm_load_ps(aPtr); aPtr += 4;
     aVal2 = _mm_load_ps(aPtr); aPtr += 4;
@@ -151,7 +151,7 @@ static inline void volk_32f_x2_dot_prod_32f_a_sse4_1(float * result, const float
     bVal2 = _mm_load_ps(bPtr); bPtr += 4;
     bVal3 = _mm_load_ps(bPtr); bPtr += 4;
     bVal4 = _mm_load_ps(bPtr); bPtr += 4;
-    
+
     cVal1 = _mm_dp_ps(aVal1, bVal1, 0xF1);
     cVal2 = _mm_dp_ps(aVal2, bVal2, 0xF2);
     cVal3 = _mm_dp_ps(aVal3, bVal3, 0xF4);
@@ -178,7 +178,7 @@ static inline void volk_32f_x2_dot_prod_32f_a_sse4_1(float * result, const float
   }
 
   *result = dotProduct;
-}  
+}
 
 #endif /*LV_HAVE_SSE4_1*/
 
diff --git a/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h
index 7f47122fff..3b7284b575 100644
--- a/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h
+++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h
@@ -17,7 +17,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_generic(float * result, const floa
   for(number = 0; number < num_points; number++){
     dotProduct += ((*aPtr++) * (*bPtr++));
   }
-  
+
   *result = dotProduct;
 }
 
@@ -28,7 +28,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_generic(float * result, const floa
 
 
 static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const  float* input, const  float* taps, unsigned int num_points) {
-  
+
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -41,11 +41,11 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const  float*
   __m128 dotProdVal = _mm_setzero_ps();
 
   for(;number < quarterPoints; number++){
-      
-    aVal = _mm_loadu_ps(aPtr); 
+
+    aVal = _mm_loadu_ps(aPtr);
     bVal = _mm_loadu_ps(bPtr);
-      
-    cVal = _mm_mul_ps(aVal, bVal); 
+
+    cVal = _mm_mul_ps(aVal, bVal);
 
     dotProdVal = _mm_add_ps(cVal, dotProdVal);
 
@@ -68,10 +68,10 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const  float*
   }
 
   *result = dotProduct;
-  
+
 }
 
-#endif /*LV_HAVE_SSE*/  
+#endif /*LV_HAVE_SSE*/
 
 #ifdef LV_HAVE_SSE3
 
@@ -90,11 +90,11 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse3(float * result, const float *
   __m128 dotProdVal = _mm_setzero_ps();
 
   for(;number < quarterPoints; number++){
-      
-    aVal = _mm_loadu_ps(aPtr); 
+
+    aVal = _mm_loadu_ps(aPtr);
     bVal = _mm_loadu_ps(bPtr);
-      
-    cVal = _mm_mul_ps(aVal, bVal); 
+
+    cVal = _mm_mul_ps(aVal, bVal);
 
     dotProdVal = _mm_hadd_ps(dotProdVal, cVal);
 
@@ -116,7 +116,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse3(float * result, const float *
   }
 
   *result = dotProduct;
-}  
+}
 
 #endif /*LV_HAVE_SSE3*/
 
@@ -140,7 +140,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse4_1(float * result, const float
   __m128 dotProdVal = _mm_setzero_ps();
 
   for(;number < sixteenthPoints; number++){
-      
+
     aVal1 = _mm_loadu_ps(aPtr); aPtr += 4;
     aVal2 = _mm_loadu_ps(aPtr); aPtr += 4;
     aVal3 = _mm_loadu_ps(aPtr); aPtr += 4;
@@ -150,7 +150,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse4_1(float * result, const float
     bVal2 = _mm_loadu_ps(bPtr); bPtr += 4;
     bVal3 = _mm_loadu_ps(bPtr); bPtr += 4;
     bVal4 = _mm_loadu_ps(bPtr); bPtr += 4;
-    
+
     cVal1 = _mm_dp_ps(aVal1, bVal1, 0xF1);
     cVal2 = _mm_dp_ps(aVal2, bVal2, 0xF2);
     cVal3 = _mm_dp_ps(aVal3, bVal3, 0xF4);
@@ -177,7 +177,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse4_1(float * result, const float
   }
 
   *result = dotProduct;
-}  
+}
 
 #endif /*LV_HAVE_SSE4_1*/
 
diff --git a/volk/include/volk/volk_32f_x2_interleave_32fc_a.h b/volk/include/volk/volk_32f_x2_interleave_32fc_a.h
index 1d4d2dbbd5..52d80b6bb3 100644
--- a/volk/include/volk/volk_32f_x2_interleave_32fc_a.h
+++ b/volk/include/volk/volk_32f_x2_interleave_32fc_a.h
@@ -20,7 +20,7 @@ static inline void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t* complexVector, c
   const float* qBufferPtr = qBuffer;
 
   const uint64_t quarterPoints = num_points / 4;
-    
+
   __m128 iValue, qValue, cplxValue;
   for(;number < quarterPoints; number++){
     iValue = _mm_load_ps(iBufferPtr);
diff --git a/volk/include/volk/volk_32f_x2_max_32f_a.h b/volk/include/volk/volk_32f_x2_max_32f_a.h
index 7948c458df..79f2d04b56 100644
--- a/volk/include/volk/volk_32f_x2_max_32f_a.h
+++ b/volk/include/volk/volk_32f_x2_max_32f_a.h
@@ -23,12 +23,12 @@ static inline void volk_32f_x2_max_32f_a_sse(float* cVector, const float* aVecto
 
     __m128 aVal, bVal, cVal;
     for(;number < quarterPoints; number++){
-      
-      aVal = _mm_load_ps(aPtr); 
+
+      aVal = _mm_load_ps(aPtr);
       bVal = _mm_load_ps(bPtr);
-      
-      cVal = _mm_max_ps(aVal, bVal); 
-      
+
+      cVal = _mm_max_ps(aVal, bVal);
+
       _mm_store_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 4;
diff --git a/volk/include/volk/volk_32f_x2_min_32f_a.h b/volk/include/volk/volk_32f_x2_min_32f_a.h
index d771348681..42cac08339 100644
--- a/volk/include/volk/volk_32f_x2_min_32f_a.h
+++ b/volk/include/volk/volk_32f_x2_min_32f_a.h
@@ -23,12 +23,12 @@ static inline void volk_32f_x2_min_32f_a_sse(float* cVector, const float* aVecto
 
     __m128 aVal, bVal, cVal;
     for(;number < quarterPoints; number++){
-      
-      aVal = _mm_load_ps(aPtr); 
+
+      aVal = _mm_load_ps(aPtr);
       bVal = _mm_load_ps(bPtr);
-      
-      cVal = _mm_min_ps(aVal, bVal); 
-      
+
+      cVal = _mm_min_ps(aVal, bVal);
+
       _mm_store_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 4;
diff --git a/volk/include/volk/volk_32f_x2_multiply_32f_a.h b/volk/include/volk/volk_32f_x2_multiply_32f_a.h
index fae9a652f1..340e051657 100644
--- a/volk/include/volk/volk_32f_x2_multiply_32f_a.h
+++ b/volk/include/volk/volk_32f_x2_multiply_32f_a.h
@@ -23,12 +23,12 @@ static inline void volk_32f_x2_multiply_32f_a_sse(float* cVector, const float* a
 
     __m128 aVal, bVal, cVal;
     for(;number < quarterPoints; number++){
-      
-      aVal = _mm_load_ps(aPtr); 
+
+      aVal = _mm_load_ps(aPtr);
       bVal = _mm_load_ps(bPtr);
-      
-      cVal = _mm_mul_ps(aVal, bVal); 
-      
+
+      cVal = _mm_mul_ps(aVal, bVal);
+
       _mm_store_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 4;
@@ -62,12 +62,12 @@ static inline void volk_32f_x2_multiply_32f_a_avx(float* cVector, const float* a
 
     __m256 aVal, bVal, cVal;
     for(;number < eighthPoints; number++){
-      
-      aVal = _mm256_load_ps(aPtr); 
+
+      aVal = _mm256_load_ps(aPtr);
       bVal = _mm256_load_ps(bPtr);
-      
-      cVal = _mm256_mul_ps(aVal, bVal); 
-      
+
+      cVal = _mm256_mul_ps(aVal, bVal);
+
       _mm256_store_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 8;
diff --git a/volk/include/volk/volk_32f_x2_multiply_32f_u.h b/volk/include/volk/volk_32f_x2_multiply_32f_u.h
index 6c3ce5d83f..bfb896d602 100644
--- a/volk/include/volk/volk_32f_x2_multiply_32f_u.h
+++ b/volk/include/volk/volk_32f_x2_multiply_32f_u.h
@@ -23,12 +23,12 @@ static inline void volk_32f_x2_multiply_32f_u_sse(float* cVector, const float* a
 
     __m128 aVal, bVal, cVal;
     for(;number < quarterPoints; number++){
-      
-      aVal = _mm_loadu_ps(aPtr); 
+
+      aVal = _mm_loadu_ps(aPtr);
       bVal = _mm_loadu_ps(bPtr);
-      
-      cVal = _mm_mul_ps(aVal, bVal); 
-      
+
+      cVal = _mm_mul_ps(aVal, bVal);
+
       _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 4;
@@ -62,12 +62,12 @@ static inline void volk_32f_x2_multiply_32f_u_avx(float* cVector, const float* a
 
     __m256 aVal, bVal, cVal;
     for(;number < eighthPoints; number++){
-      
-      aVal = _mm256_loadu_ps(aPtr); 
+
+      aVal = _mm256_loadu_ps(aPtr);
       bVal = _mm256_loadu_ps(bPtr);
-      
-      cVal = _mm256_mul_ps(aVal, bVal); 
-      
+
+      cVal = _mm256_mul_ps(aVal, bVal);
+
       _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 8;
diff --git a/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a.h b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a.h
index cc02c3678d..10fc267dcd 100644
--- a/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a.h
+++ b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a.h
@@ -23,7 +23,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t* complexVec
     __m128 vScalar = _mm_set_ps1(scalar);
 
     const unsigned int quarterPoints = num_points / 4;
-    
+
     __m128 iValue, qValue, cplxValue1, cplxValue2;
     __m128i intValue1, intValue2;
 
@@ -59,7 +59,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t* complexVec
       *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar);
       *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar);
     }
-    
+
 }
 #endif /* LV_HAVE_SSE2 */
 
@@ -81,7 +81,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t* complexVect
     __m128 vScalar = _mm_set_ps1(scalar);
 
     const unsigned int quarterPoints = num_points / 4;
-    
+
     __m128 iValue, qValue, cplxValue;
 
     int16_t* complexVectorPtr = (int16_t*)complexVector;
@@ -106,9 +106,9 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t* complexVect
       // Interleaves the upper two values in the i and q variables into one buffer
       cplxValue = _mm_unpackhi_ps(iValue, qValue);
       cplxValue = _mm_mul_ps(cplxValue, vScalar);
- 
+
       _mm_store_ps(floatBuffer, cplxValue);
-      
+
       *complexVectorPtr++ = (int16_t)(floatBuffer[0]);
       *complexVectorPtr++ = (int16_t)(floatBuffer[1]);
       *complexVectorPtr++ = (int16_t)(floatBuffer[2]);
@@ -124,7 +124,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t* complexVect
       *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar);
       *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar);
     }
-    
+
 }
 #endif /* LV_HAVE_SSE */
 
diff --git a/volk/include/volk/volk_32f_x2_subtract_32f_a.h b/volk/include/volk/volk_32f_x2_subtract_32f_a.h
index 16cad008aa..e2b8be797f 100644
--- a/volk/include/volk/volk_32f_x2_subtract_32f_a.h
+++ b/volk/include/volk/volk_32f_x2_subtract_32f_a.h
@@ -23,12 +23,12 @@ static inline void volk_32f_x2_subtract_32f_a_sse(float* cVector, const float* a
 
     __m128 aVal, bVal, cVal;
     for(;number < quarterPoints; number++){
-      
-      aVal = _mm_load_ps(aPtr); 
+
+      aVal = _mm_load_ps(aPtr);
       bVal = _mm_load_ps(bPtr);
-      
-      cVal = _mm_sub_ps(aVal, bVal); 
-      
+
+      cVal = _mm_sub_ps(aVal, bVal);
+
       _mm_store_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 4;
diff --git a/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h
index 153bb3a255..3c530628c8 100644
--- a/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h
+++ b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h
@@ -14,33 +14,33 @@
 #include<pmmintrin.h>
 
 static inline void volk_32f_x3_sum_of_poly_32f_a_sse3(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) {
-  
-  
+
+
   float result = 0.0;
   float fst = 0.0;
   float sq = 0.0;
   float thrd = 0.0;
   float frth = 0.0;
   //float fith = 0.0;
-  
-  
-  
+
+
+
   __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10;// xmm11, xmm12;
 
   xmm9 = _mm_setzero_ps();
   xmm1 = _mm_setzero_ps();
-  
+
   xmm0 = _mm_load1_ps(&center_point_array[0]);
   xmm6 = _mm_load1_ps(&center_point_array[1]);
   xmm7 = _mm_load1_ps(&center_point_array[2]);
   xmm8 = _mm_load1_ps(&center_point_array[3]);
   //xmm11 = _mm_load1_ps(&center_point_array[4]);
   xmm10 = _mm_load1_ps(cutoff);
-  
+
   int bound = num_bytes >> 4;
   int leftovers = (num_bytes >> 2) & 3;
   int i = 0;
-  
+
   for(; i < bound; ++i) {
     xmm2 = _mm_load_ps(src0);
     xmm2 = _mm_max_ps(xmm10, xmm2);
@@ -57,23 +57,23 @@ static inline void volk_32f_x3_sum_of_poly_32f_a_sse3(float* target, float* src0
 
     xmm2 = _mm_add_ps(xmm2, xmm3);
     xmm3 = _mm_add_ps(xmm4, xmm5);
-    
+
     src0 += 4;
-    
+
     xmm9 = _mm_add_ps(xmm2, xmm9);
-    
+
     xmm1 = _mm_add_ps(xmm3, xmm1);
 
     //xmm9 = _mm_add_ps(xmm12, xmm9);
   }
-  
+
   xmm2 = _mm_hadd_ps(xmm9, xmm1);
   xmm3 = _mm_hadd_ps(xmm2, xmm2);
   xmm4 = _mm_hadd_ps(xmm3, xmm3);
 
   _mm_store_ss(&result, xmm4);
-    
-  
+
+
 
   for(i = 0; i < leftovers; ++i) {
     fst = src0[i];
@@ -82,11 +82,11 @@ static inline void volk_32f_x3_sum_of_poly_32f_a_sse3(float* target, float* src0
     thrd = fst * sq;
     frth = sq * sq;
     //fith = sq * thrd;
-    
-    result += (center_point_array[0] * fst + 
-	       center_point_array[1] * sq + 
-	       center_point_array[2] * thrd + 
-	       center_point_array[3] * frth);// + 
+
+    result += (center_point_array[0] * fst +
+	       center_point_array[1] * sq +
+	       center_point_array[2] * thrd +
+	       center_point_array[3] * frth);// +
 	       //center_point_array[4] * fith);
   }
 
@@ -94,7 +94,7 @@ static inline void volk_32f_x3_sum_of_poly_32f_a_sse3(float* target, float* src0
 
   target[0] = result;
 }
- 
+
 
 #endif /*LV_HAVE_SSE3*/
 
@@ -103,45 +103,45 @@ static inline void volk_32f_x3_sum_of_poly_32f_a_sse3(float* target, float* src0
 static inline void volk_32f_x3_sum_of_poly_32f_a_generic(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) {
 
 
-    
+
   float result = 0.0;
   float fst = 0.0;
   float sq = 0.0;
   float thrd = 0.0;
   float frth = 0.0;
   //float fith = 0.0;
-  
 
 
-  unsigned int i = 0; 
-  
+
+  unsigned int i = 0;
+
   for(; i < num_bytes >> 2; ++i) {
     fst = src0[i];
     fst = MAX(fst, *cutoff);
-    
+
     sq = fst * fst;
     thrd = fst * sq;
     frth = sq * sq;
     //fith = sq * thrd;
-    
-    result += (center_point_array[0] * fst + 
-	       center_point_array[1] * sq + 
-	       center_point_array[2] * thrd + 
+
+    result += (center_point_array[0] * fst +
+	       center_point_array[1] * sq +
+	       center_point_array[2] * thrd +
 	       center_point_array[3] * frth); //+
 	       //center_point_array[4] * fith);
-    /*printf("%f12...%d\n", (center_point_array[0] * fst + 
-		  center_point_array[1] * sq + 
-		  center_point_array[2] * thrd + 
+    /*printf("%f12...%d\n", (center_point_array[0] * fst +
+		  center_point_array[1] * sq +
+		  center_point_array[2] * thrd +
 			 center_point_array[3] * frth) +
-	   //center_point_array[4] * fith) + 
+	   //center_point_array[4] * fith) +
 	   (center_point_array[4]), i);
     */
   }
 
   result += ((float)(num_bytes >> 2)) * (center_point_array[4]);//(center_point_array[5]);
 
-  
-  
+
+
   *target = result;
 }
 
diff --git a/volk/include/volk/volk_32fc_32f_multiply_32fc_a.h b/volk/include/volk/volk_32fc_32f_multiply_32fc_a.h
index b7350b9fa7..28d584bf2c 100644
--- a/volk/include/volk/volk_32fc_32f_multiply_32fc_a.h
+++ b/volk/include/volk/volk_32fc_32f_multiply_32fc_a.h
@@ -23,11 +23,11 @@ static inline void volk_32fc_32f_multiply_32fc_a_sse(lv_32fc_t* cVector, const l
 
     __m128 aVal1, aVal2, bVal, bVal1, bVal2, cVal;
     for(;number < quarterPoints; number++){
-      
+
       aVal1 = _mm_load_ps((const float*)aPtr);
       aPtr += 2;
- 
-      aVal2 = _mm_load_ps((const float*)aPtr); 
+
+      aVal2 = _mm_load_ps((const float*)aPtr);
       aPtr += 2;
 
       bVal = _mm_load_ps(bPtr);
@@ -36,13 +36,13 @@ static inline void volk_32fc_32f_multiply_32fc_a_sse(lv_32fc_t* cVector, const l
       bVal1 = _mm_shuffle_ps(bVal, bVal, _MM_SHUFFLE(1,1,0,0));
       bVal2 = _mm_shuffle_ps(bVal, bVal, _MM_SHUFFLE(3,3,2,2));
 
-      cVal = _mm_mul_ps(aVal1, bVal1); 
-      
+      cVal = _mm_mul_ps(aVal1, bVal1);
+
       _mm_store_ps((float*)cPtr,cVal); // Store the results back into the C container
       cPtr += 2;
 
-      cVal = _mm_mul_ps(aVal2, bVal2); 
-      
+      cVal = _mm_mul_ps(aVal2, bVal2);
+
       _mm_store_ps((float*)cPtr,cVal); // Store the results back into the C container
 
       cPtr += 2;
@@ -69,7 +69,7 @@ static inline void volk_32fc_32f_multiply_32fc_a_generic(lv_32fc_t* cVector, con
   const lv_32fc_t* aPtr = aVector;
   const float* bPtr=  bVector;
   unsigned int number = 0;
-  
+
   for(number = 0; number < num_points; number++){
     *cPtr++ = (*aPtr++) * (*bPtr++);
   }
diff --git a/volk/include/volk/volk_32fc_conjugate_32fc_a.h b/volk/include/volk/volk_32fc_conjugate_32fc_a.h
index 1518af9be9..919280d510 100644
--- a/volk/include/volk/volk_32fc_conjugate_32fc_a.h
+++ b/volk/include/volk/volk_32fc_conjugate_32fc_a.h
@@ -25,11 +25,11 @@ static inline void volk_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_
     __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
 
     for(;number < halfPoints; number++){
-      
+
       x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
 
       x = _mm_xor_ps(x, conjugator); // conjugate register
-    
+
       _mm_store_ps((float*)c,x); // Store the results back into the C container
 
       a += 2;
diff --git a/volk/include/volk/volk_32fc_conjugate_32fc_u.h b/volk/include/volk/volk_32fc_conjugate_32fc_u.h
index b26fe0789a..e0d79ea7bc 100644
--- a/volk/include/volk/volk_32fc_conjugate_32fc_u.h
+++ b/volk/include/volk/volk_32fc_conjugate_32fc_u.h
@@ -21,13 +21,13 @@ static inline void volk_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_
     __m128 x;
     lv_32fc_t* c = cVector;
     const lv_32fc_t* a = aVector;
-  
+
     __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
 
     for(;number < halfPoints; number++){
-      
+
       x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
-      
+
       x = _mm_xor_ps(x, conjugator); // conjugate register
 
       _mm_storeu_ps((float*)c,x); // Store the results back into the C container
diff --git a/volk/include/volk/volk_32fc_deinterleave_32f_x2_a.h b/volk/include/volk/volk_32fc_deinterleave_32f_x2_a.h
index 9de036ef48..4106f38513 100644
--- a/volk/include/volk/volk_32fc_deinterleave_32f_x2_a.h
+++ b/volk/include/volk/volk_32fc_deinterleave_32f_x2_a.h
@@ -19,10 +19,10 @@ static inline void volk_32fc_deinterleave_32f_x2_a_sse(float* iBuffer, float* qB
   float* qBufferPtr = qBuffer;
 
   unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;    
+  const unsigned int quarterPoints = num_points / 4;
   __m128 cplxValue1, cplxValue2, iValue, qValue;
   for(;number < quarterPoints; number++){
-      
+
     cplxValue1 = _mm_load_ps(complexVectorPtr);
     complexVectorPtr += 4;
 
diff --git a/volk/include/volk/volk_32fc_deinterleave_64f_x2_a.h b/volk/include/volk/volk_32fc_deinterleave_64f_x2_a.h
index 29c369d9ae..77566e671d 100644
--- a/volk/include/volk/volk_32fc_deinterleave_64f_x2_a.h
+++ b/volk/include/volk/volk_32fc_deinterleave_64f_x2_a.h
@@ -20,23 +20,23 @@ static inline void volk_32fc_deinterleave_64f_x2_a_sse2(double* iBuffer, double*
     double* iBufferPtr = iBuffer;
     double* qBufferPtr = qBuffer;
 
-    const unsigned int halfPoints = num_points / 2;    
+    const unsigned int halfPoints = num_points / 2;
     __m128 cplxValue, fVal;
     __m128d dVal;
 
     for(;number < halfPoints; number++){
-      
+
       cplxValue = _mm_load_ps(complexVectorPtr);
       complexVectorPtr += 4;
 
       // Arrange in i1i2i1i2 format
       fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2,0,2,0));
-      dVal = _mm_cvtps_pd(fVal); 
+      dVal = _mm_cvtps_pd(fVal);
       _mm_store_pd(iBufferPtr, dVal);
 
       // Arrange in q1q2q1q2 format
       fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3,1,3,1));
-      dVal = _mm_cvtps_pd(fVal); 
+      dVal = _mm_cvtps_pd(fVal);
       _mm_store_pd(qBufferPtr, dVal);
 
       iBufferPtr += 2;
diff --git a/volk/include/volk/volk_32fc_deinterleave_imag_32f_a.h b/volk/include/volk/volk_32fc_deinterleave_imag_32f_a.h
index adc4112b95..c88809bebd 100644
--- a/volk/include/volk/volk_32fc_deinterleave_imag_32f_a.h
+++ b/volk/include/volk/volk_32fc_deinterleave_imag_32f_a.h
@@ -21,7 +21,7 @@ static inline void volk_32fc_deinterleave_imag_32f_a_sse(float* qBuffer, const l
 
   __m128 cplxValue1, cplxValue2, iValue;
   for(;number < quarterPoints; number++){
-      
+
     cplxValue1 = _mm_load_ps(complexVectorPtr);
     complexVectorPtr += 4;
 
diff --git a/volk/include/volk/volk_32fc_deinterleave_real_32f_a.h b/volk/include/volk/volk_32fc_deinterleave_real_32f_a.h
index a1d0fd5d10..0d6c6b7af4 100644
--- a/volk/include/volk/volk_32fc_deinterleave_real_32f_a.h
+++ b/volk/include/volk/volk_32fc_deinterleave_real_32f_a.h
@@ -21,7 +21,7 @@ static inline void volk_32fc_deinterleave_real_32f_a_sse(float* iBuffer, const l
 
   __m128 cplxValue1, cplxValue2, iValue;
   for(;number < quarterPoints; number++){
-      
+
     cplxValue1 = _mm_load_ps(complexVectorPtr);
     complexVectorPtr += 4;
 
diff --git a/volk/include/volk/volk_32fc_deinterleave_real_64f_a.h b/volk/include/volk/volk_32fc_deinterleave_real_64f_a.h
index 70a3b19718..1e346bacaf 100644
--- a/volk/include/volk/volk_32fc_deinterleave_real_64f_a.h
+++ b/volk/include/volk/volk_32fc_deinterleave_real_64f_a.h
@@ -18,17 +18,17 @@ static inline void volk_32fc_deinterleave_real_64f_a_sse2(double* iBuffer, const
   const float* complexVectorPtr = (float*)complexVector;
   double* iBufferPtr = iBuffer;
 
-  const unsigned int halfPoints = num_points / 2;    
+  const unsigned int halfPoints = num_points / 2;
   __m128 cplxValue, fVal;
   __m128d dVal;
   for(;number < halfPoints; number++){
-      
+
     cplxValue = _mm_load_ps(complexVectorPtr);
     complexVectorPtr += 4;
 
     // Arrange in i1i2i1i2 format
     fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2,0,2,0));
-    dVal = _mm_cvtps_pd(fVal); 
+    dVal = _mm_cvtps_pd(fVal);
     _mm_store_pd(iBufferPtr, dVal);
 
     iBufferPtr += 2;
diff --git a/volk/include/volk/volk_32fc_index_max_16u_a.h b/volk/include/volk/volk_32fc_index_max_16u_a.h
index 125a345827..842a6a0420 100644
--- a/volk/include/volk/volk_32fc_index_max_16u_a.h
+++ b/volk/include/volk/volk_32fc_index_max_16u_a.h
@@ -12,16 +12,16 @@
 
 
 static inline void volk_32fc_index_max_16u_a_sse3(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) {
-  
-  
-  
+
+
+
   union bit128 holderf;
   union bit128 holderi;
   float sq_dist = 0.0;
 
 
 
-  
+
   union bit128 xmm5, xmm4;
   __m128 xmm1, xmm2, xmm3;
   __m128i xmm8, xmm11, xmm12, xmmfive, xmmfour, xmm9, holder0, holder1, xmm10;
@@ -30,63 +30,63 @@ static inline void volk_32fc_index_max_16u_a_sse3(unsigned int* target, lv_32fc_
   xmm4.int_vec = xmmfour = _mm_setzero_si128();
   holderf.int_vec = holder0 = _mm_setzero_si128();
   holderi.int_vec = holder1 = _mm_setzero_si128();
- 
-  
+
+
   int bound = num_bytes >> 5;
   int leftovers0 = (num_bytes >> 4) & 1;
   int leftovers1 = (num_bytes >> 3) & 1;
   int i = 0;
-  
-  
+
+
   xmm8 = _mm_set_epi32(3, 2, 1, 0);//remember the crazy reverse order!
   xmm9 = xmm8 = _mm_setzero_si128();
   xmm10 = _mm_set_epi32(4, 4, 4, 4);
   xmm3 = _mm_setzero_ps();
 ;
-  
+
   //printf("%f, %f, %f, %f\n", ((float*)&xmm10)[0], ((float*)&xmm10)[1], ((float*)&xmm10)[2], ((float*)&xmm10)[3]);
-  
+
   for(; i < bound; ++i) {
-  
+
     xmm1 = _mm_load_ps((float*)src0);
     xmm2 = _mm_load_ps((float*)&src0[2]);
-    
+
 
     src0 += 4;
-  
-  
+
+
     xmm1 = _mm_mul_ps(xmm1, xmm1);
     xmm2 = _mm_mul_ps(xmm2, xmm2);
-    
-    
+
+
     xmm1 = _mm_hadd_ps(xmm1, xmm2);
 
     xmm3 = _mm_max_ps(xmm1, xmm3);
-  
+
     xmm4.float_vec = _mm_cmplt_ps(xmm1, xmm3);
     xmm5.float_vec = _mm_cmpeq_ps(xmm1, xmm3);
-    
-        
-    
+
+
+
     xmm11 = _mm_and_si128(xmm8, xmm5.int_vec);
     xmm12 = _mm_and_si128(xmm9, xmm4.int_vec);
-    
+
     xmm9 = _mm_add_epi32(xmm11,  xmm12);
 
-    xmm8 = _mm_add_epi32(xmm8, xmm10);    
+    xmm8 = _mm_add_epi32(xmm8, xmm10);
+
 
-    
     //printf("%f, %f, %f, %f\n", ((float*)&xmm3)[0], ((float*)&xmm3)[1], ((float*)&xmm3)[2], ((float*)&xmm3)[3]);
     //printf("%u, %u, %u, %u\n", ((uint32_t*)&xmm10)[0], ((uint32_t*)&xmm10)[1], ((uint32_t*)&xmm10)[2], ((uint32_t*)&xmm10)[3]);
 
   }
-  
-  
+
+
   for(i = 0; i < leftovers0; ++i) {
 
 
     xmm2 = _mm_load_ps((float*)src0);
-    
+
     xmm1 = _mm_movelh_ps(bit128_p(&xmm8)->float_vec, bit128_p(&xmm8)->float_vec);
     xmm8 = bit128_p(&xmm1)->int_vec;
 
@@ -99,63 +99,63 @@ static inline void volk_32fc_index_max_16u_a_sse3(unsigned int* target, lv_32fc_
     xmm3 = _mm_max_ps(xmm1, xmm3);
 
     xmm10 = _mm_set_epi32(2, 2, 2, 2);//load1_ps((float*)&init[2]);
-    
-    
+
+
     xmm4.float_vec = _mm_cmplt_ps(xmm1, xmm3);
     xmm5.float_vec = _mm_cmpeq_ps(xmm1, xmm3);
-    
-        
-    
+
+
+
     xmm11 = _mm_and_si128(xmm8, xmm5.int_vec);
     xmm12 = _mm_and_si128(xmm9, xmm4.int_vec);
-    
+
     xmm9 = _mm_add_epi32(xmm11, xmm12);
 
-    xmm8 = _mm_add_epi32(xmm8, xmm10);    
+    xmm8 = _mm_add_epi32(xmm8, xmm10);
     //printf("egads%u, %u, %u, %u\n", ((uint32_t*)&xmm9)[0], ((uint32_t*)&xmm9)[1], ((uint32_t*)&xmm9)[2], ((uint32_t*)&xmm9)[3]);
 
   }
-    
-  
-    
+
+
+
 
   for(i = 0; i < leftovers1; ++i) {
     //printf("%u, %u, %u, %u\n", ((uint32_t*)&xmm9)[0], ((uint32_t*)&xmm9)[1], ((uint32_t*)&xmm9)[2], ((uint32_t*)&xmm9)[3]);
-    
+
 
     sq_dist = lv_creal(src0[0]) * lv_creal(src0[0]) + lv_cimag(src0[0]) * lv_cimag(src0[0]);
-    
+
     xmm2 = _mm_load1_ps(&sq_dist);
 
     xmm1 = xmm3;
-    
+
     xmm3 = _mm_max_ss(xmm3, xmm2);
 
-    
-        
+
+
     xmm4.float_vec = _mm_cmplt_ps(xmm1, xmm3);
     xmm5.float_vec = _mm_cmpeq_ps(xmm1, xmm3);
-    
-        
-    xmm8 = _mm_shuffle_epi32(xmm8, 0x00);  
-    
+
+
+    xmm8 = _mm_shuffle_epi32(xmm8, 0x00);
+
     xmm11 = _mm_and_si128(xmm8, xmm4.int_vec);
     xmm12 = _mm_and_si128(xmm9, xmm5.int_vec);
-    
+
 
     xmm9 = _mm_add_epi32(xmm11, xmm12);
 
   }
-  
+
   //printf("%f, %f, %f, %f\n", ((float*)&xmm3)[0], ((float*)&xmm3)[1], ((float*)&xmm3)[2], ((float*)&xmm3)[3]);
 
   //printf("%u, %u, %u, %u\n", ((uint32_t*)&xmm9)[0], ((uint32_t*)&xmm9)[1], ((uint32_t*)&xmm9)[2], ((uint32_t*)&xmm9)[3]);
 
   _mm_store_ps((float*)&(holderf.f), xmm3);
   _mm_store_si128(&(holderi.int_vec), xmm9);
-  
+
   target[0] = holderi.i[0];
-  sq_dist = holderf.f[0]; 
+  sq_dist = holderf.f[0];
   target[0] = (holderf.f[1] > sq_dist) ? holderi.i[1] : target[0];
   sq_dist = (holderf.f[1] > sq_dist) ? holderf.f[1] : sq_dist;
   target[0] = (holderf.f[2] > sq_dist) ? holderi.i[2] : target[0];
@@ -163,27 +163,27 @@ static inline void volk_32fc_index_max_16u_a_sse3(unsigned int* target, lv_32fc_
   target[0] = (holderf.f[3] > sq_dist) ? holderi.i[3] : target[0];
   sq_dist = (holderf.f[3] > sq_dist) ? holderf.f[3] : sq_dist;
 
- 
-  
+
+
   /*
   float placeholder = 0.0;
-  uint32_t temp0, temp1; 
+  uint32_t temp0, temp1;
   unsigned int g0 = (((float*)&xmm3)[0] > ((float*)&xmm3)[1]);
   unsigned int l0 = g0 ^ 1;
 
   unsigned int g1 = (((float*)&xmm3)[1] > ((float*)&xmm3)[2]);
   unsigned int l1 = g1 ^ 1;
-  
+
   temp0 = g0 * ((uint32_t*)&xmm9)[0] + l0 * ((uint32_t*)&xmm9)[1];
   temp1 = g0 * ((uint32_t*)&xmm9)[2] + l0 * ((uint32_t*)&xmm9)[3];
-  sq_dist = g0 * ((float*)&xmm3)[0] + l0 * ((float*)&xmm3)[1]; 
+  sq_dist = g0 * ((float*)&xmm3)[0] + l0 * ((float*)&xmm3)[1];
   placeholder = g0 * ((float*)&xmm3)[2] + l0 * ((float*)&xmm3)[3];
-  
+
   g0 = (sq_dist > placeholder);
   l0 = g0 ^ 1;
   target[0] = g0 * temp0 + l0 * temp1;
   */
-  
+
 }
 
 #endif /*LV_HAVE_SSE3*/
@@ -193,20 +193,20 @@ static inline void volk_32fc_index_max_16u_a_generic(unsigned int* target, lv_32
   float sq_dist = 0.0;
   float max = 0.0;
   unsigned int index = 0;
-  
-  unsigned int i = 0; 
-  
+
+  unsigned int i = 0;
+
   for(; i < num_bytes >> 3; ++i) {
 
     sq_dist = lv_creal(src0[i]) * lv_creal(src0[i]) + lv_cimag(src0[i]) * lv_cimag(src0[i]);
-    
+
     index = sq_dist > max ? i : index;
     max = sq_dist > max ? sq_dist : max;
-    
-    
+
+
   }
   target[0] = index;
-  
+
 }
 
 #endif /*LV_HAVE_GENERIC*/
diff --git a/volk/include/volk/volk_32fc_magnitude_32f_a.h b/volk/include/volk/volk_32fc_magnitude_32f_a.h
index f18e9bc0b0..efb84a904b 100644
--- a/volk/include/volk/volk_32fc_magnitude_32f_a.h
+++ b/volk/include/volk/volk_32fc_magnitude_32f_a.h
@@ -59,7 +59,7 @@ static inline void volk_32fc_magnitude_32f_a_sse3(float* magnitudeVector, const
 static inline void volk_32fc_magnitude_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
-    
+
     const float* complexVectorPtr = (float*)complexVector;
     float* magnitudeVectorPtr = magnitudeVector;
 
diff --git a/volk/include/volk/volk_32fc_magnitude_32f_u.h b/volk/include/volk/volk_32fc_magnitude_32f_u.h
index ed1cedef99..c8b3f0a088 100644
--- a/volk/include/volk/volk_32fc_magnitude_32f_u.h
+++ b/volk/include/volk/volk_32fc_magnitude_32f_u.h
@@ -59,7 +59,7 @@ static inline void volk_32fc_magnitude_32f_u_sse3(float* magnitudeVector, const
 static inline void volk_32fc_magnitude_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
-    
+
     const float* complexVectorPtr = (float*)complexVector;
     float* magnitudeVectorPtr = magnitudeVector;
 
diff --git a/volk/include/volk/volk_32fc_magnitude_squared_32f_a.h b/volk/include/volk/volk_32fc_magnitude_squared_32f_a.h
index 00bdefbb55..d3ac9717a8 100644
--- a/volk/include/volk/volk_32fc_magnitude_squared_32f_a.h
+++ b/volk/include/volk/volk_32fc_magnitude_squared_32f_a.h
@@ -57,7 +57,7 @@ static inline void volk_32fc_magnitude_squared_32f_a_sse3(float* magnitudeVector
 static inline void volk_32fc_magnitude_squared_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
-    
+
     const float* complexVectorPtr = (float*)complexVector;
     float* magnitudeVectorPtr = magnitudeVector;
 
diff --git a/volk/include/volk/volk_32fc_magnitude_squared_32f_u.h b/volk/include/volk/volk_32fc_magnitude_squared_32f_u.h
index 6eb4a523ad..53a4e68eb4 100644
--- a/volk/include/volk/volk_32fc_magnitude_squared_32f_u.h
+++ b/volk/include/volk/volk_32fc_magnitude_squared_32f_u.h
@@ -57,7 +57,7 @@ static inline void volk_32fc_magnitude_squared_32f_u_sse3(float* magnitudeVector
 static inline void volk_32fc_magnitude_squared_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
-    
+
     const float* complexVectorPtr = (float*)complexVector;
     float* magnitudeVectorPtr = magnitudeVector;
 
diff --git a/volk/include/volk/volk_32fc_s32f_atan2_32f_a.h b/volk/include/volk/volk_32fc_s32f_atan2_32f_a.h
index 7bd001aa08..d86bd63c1c 100644
--- a/volk/include/volk/volk_32fc_s32f_atan2_32f_a.h
+++ b/volk/include/volk/volk_32fc_s32f_atan2_32f_a.h
@@ -27,14 +27,14 @@ static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector,  const
   const float invNormalizeFactor = 1.0 / normalizeFactor;
 
 #ifdef LV_HAVE_LIB_SIMDMATH
-  const unsigned int quarterPoints = num_points / 4;    
+  const unsigned int quarterPoints = num_points / 4;
   __m128 testVector = _mm_set_ps1(2*M_PI);
   __m128 correctVector = _mm_set_ps1(M_PI);
   __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
   __m128 phase;
   __m128 complex1, complex2, iValue, qValue;
   __m128 keepMask;
-    
+
   for (; number < quarterPoints; number++) {
     // Load IQ data:
     complex1 = _mm_load_ps(complexVectorPtr);
@@ -42,15 +42,15 @@ static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector,  const
     complex2 = _mm_load_ps(complexVectorPtr);
     complexVectorPtr += 4;
     // Deinterleave IQ data:
-    iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0)); 
-    qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1)); 
+    iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
+    qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
     // Arctan to get phase:
     phase = atan2f4(qValue, iValue);
     // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
     // Compare to 2pi:
     keepMask = _mm_cmpneq_ps(phase,testVector);
     phase = _mm_blendv_ps(correctVector, phase, keepMask);
-    // done with above correction.      
+    // done with above correction.
     phase = _mm_mul_ps(phase, vNormalizeFactor);
     _mm_store_ps((float*)outPtr, phase);
     outPtr += 4;
@@ -89,7 +89,7 @@ static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector,  const lv
   const float invNormalizeFactor = 1.0 / normalizeFactor;
 
 #ifdef LV_HAVE_LIB_SIMDMATH
-  const unsigned int quarterPoints = num_points / 4;    
+  const unsigned int quarterPoints = num_points / 4;
   __m128 testVector = _mm_set_ps1(2*M_PI);
   __m128 correctVector = _mm_set_ps1(M_PI);
   __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
@@ -97,7 +97,7 @@ static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector,  const lv
   __m128 complex1, complex2, iValue, qValue;
   __m128 mask;
   __m128 keepMask;
-    
+
   for (; number < quarterPoints; number++) {
     // Load IQ data:
     complex1 = _mm_load_ps(complexVectorPtr);
@@ -105,8 +105,8 @@ static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector,  const lv
     complex2 = _mm_load_ps(complexVectorPtr);
     complexVectorPtr += 4;
     // Deinterleave IQ data:
-    iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0)); 
-    qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1)); 
+    iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
+    qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
     // Arctan to get phase:
     phase = atan2f4(qValue, iValue);
     // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
@@ -115,7 +115,7 @@ static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector,  const lv
     phase = _mm_and_ps(phase, keepMask);
     mask = _mm_andnot_ps(keepMask, correctVector);
     phase = _mm_or_ps(phase, mask);
-    // done with above correction.      
+    // done with above correction.
     phase = _mm_mul_ps(phase, vNormalizeFactor);
     _mm_store_ps((float*)outPtr, phase);
     outPtr += 4;
diff --git a/volk/include/volk/volk_32fc_s32f_power_32fc_a.h b/volk/include/volk/volk_32fc_s32f_power_32fc_a.h
index 588b532b4e..3106edbefd 100644
--- a/volk/include/volk/volk_32fc_s32f_power_32fc_a.h
+++ b/volk/include/volk/volk_32fc_s32f_power_32fc_a.h
@@ -28,55 +28,55 @@ static inline lv_32fc_t __volk_s32fc_s32f_power_s32fc_a(const lv_32fc_t exp, con
 */
 static inline void volk_32fc_s32f_power_32fc_a_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){
   unsigned int number = 0;
-  
+
   lv_32fc_t* cPtr = cVector;
   const lv_32fc_t* aPtr = aVector;
 
 #ifdef LV_HAVE_LIB_SIMDMATH
   const unsigned int quarterPoints = num_points / 4;
   __m128 vPower = _mm_set_ps1(power);
-  
+
   __m128 cplxValue1, cplxValue2, magnitude, phase, iValue, qValue;
   for(;number < quarterPoints; number++){
-    
-    cplxValue1 = _mm_load_ps((float*)aPtr); 
+
+    cplxValue1 = _mm_load_ps((float*)aPtr);
     aPtr += 2;
-    
-    cplxValue2 = _mm_load_ps((float*)aPtr); 
+
+    cplxValue2 = _mm_load_ps((float*)aPtr);
     aPtr += 2;
-    
+
     // Convert to polar coordinates
-    
+
     // Arrange in i1i2i3i4 format
     iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
     // Arrange in q1q2q3q4 format
     qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
-    
+
     phase = atan2f4(qValue, iValue); // Calculate the Phase
-    
+
     magnitude = _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(iValue, iValue), _mm_mul_ps(qValue, qValue))); // Calculate the magnitude by square rooting the added I2 and Q2 values
-    
+
     // Now calculate the power of the polar coordinate data
     magnitude = powf4(magnitude, vPower); // Take the magnitude to the specified power
-    
+
     phase = _mm_mul_ps(phase, vPower); // Multiply the phase by the specified power
-    
+
     // Convert back to cartesian coordinates
     iValue = _mm_mul_ps( cosf4(phase), magnitude); // Multiply the cos of the phase by the magnitude
     qValue = _mm_mul_ps( sinf4(phase), magnitude); // Multiply the sin of the phase by the magnitude
-    
+
     cplxValue1 = _mm_unpacklo_ps(iValue, qValue); // Interleave the lower two i & q values
     cplxValue2 = _mm_unpackhi_ps(iValue, qValue); // Interleave the upper two i & q values
-    
+
     _mm_store_ps((float*)cPtr,cplxValue1); // Store the results back into the C container
-    
+
     cPtr += 2;
-    
+
     _mm_store_ps((float*)cPtr,cplxValue2); // Store the results back into the C container
-    
+
     cPtr += 2;
   }
-  
+
   number = quarterPoints * 4;
 #endif /* LV_HAVE_LIB_SIMDMATH */
 
diff --git a/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a.h b/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a.h
index 8d1959dae3..30a77dbc18 100644
--- a/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a.h
+++ b/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a.h
@@ -34,7 +34,7 @@ static inline void volk_32fc_s32f_power_spectrum_32f_a_sse3(float* logPowerOutpu
   __m128 input1, input2;
   const uint64_t quarterPoints = num_points / 4;
   for(;number < quarterPoints; number++){
-    // Load the complex values 
+    // Load the complex values
     input1 =_mm_load_ps(inputPtr);
     inputPtr += 4;
     input2 =_mm_load_ps(inputPtr);
@@ -43,30 +43,30 @@ static inline void volk_32fc_s32f_power_spectrum_32f_a_sse3(float* logPowerOutpu
     // Apply the normalization factor
     input1 = _mm_mul_ps(input1, invNormalizationFactor);
     input2 = _mm_mul_ps(input2, invNormalizationFactor);
-    
+
     // Multiply each value by itself
     // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
     input1 = _mm_mul_ps(input1, input1);
     // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
     input2 = _mm_mul_ps(input2, input2);
-    
+
     // Horizontal add, to add (r*r) + (i*i) for each complex value
     // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
     power = _mm_hadd_ps(input1, input2);
-    
+
     // Calculate the natural log power
     power = logf4(power);
-    
+
     // Convert to log10 and multiply by 10.0
     power = _mm_mul_ps(power, magScalar);
-    
+
     // Store the floating point results
     _mm_store_ps(destPtr, power);
-    
+
     destPtr += 4;
   }
-  
-  number = quarterPoints*4;  
+
+  number = quarterPoints*4;
 #endif /* LV_HAVE_LIB_SIMDMATH */
   // Calculate the FFT for any remaining points
 
@@ -81,10 +81,10 @@ static inline void volk_32fc_s32f_power_spectrum_32f_a_sse3(float* logPowerOutpu
     const float imag = *inputPtr++ * iNormalizationFactor;
 
     *destPtr = 10.0*log10f(((real * real) + (imag * imag)) + 1e-20);
-    
+
     destPtr++;
   }
-  
+
 }
 #endif /* LV_HAVE_SSE3 */
 
@@ -114,7 +114,7 @@ static inline void volk_32fc_s32f_power_spectrum_32f_a_generic(float* logPowerOu
 
     *realFFTDataPointsPtr = 10.0*log10f(((real * real) + (imag * imag)) + 1e-20);
 
-    
+
     realFFTDataPointsPtr++;
   }
 }
diff --git a/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a.h b/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a.h
index fc635f1719..27f755351d 100644
--- a/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a.h
+++ b/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a.h
@@ -32,19 +32,19 @@ static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float* lo
   magScalar = _mm_div_ps(magScalar, logf4(magScalar));
 
   __m128 invRBW = _mm_set_ps1(iRBW);
-  
+
   __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
 
   __m128 power;
   __m128 input1, input2;
   const uint64_t quarterPoints = num_points / 4;
   for(;number < quarterPoints; number++){
-    // Load the complex values 
+    // Load the complex values
     input1 =_mm_load_ps(inputPtr);
     inputPtr += 4;
     input2 =_mm_load_ps(inputPtr);
     inputPtr += 4;
-    
+
     // Apply the normalization factor
     input1 = _mm_mul_ps(input1, invNormalizationFactor);
     input2 = _mm_mul_ps(input2, invNormalizationFactor);
@@ -54,7 +54,7 @@ static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float* lo
     input1 = _mm_mul_ps(input1, input1);
     // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
     input2 = _mm_mul_ps(input2, input2);
-    
+
     // Horizontal add, to add (r*r) + (i*i) for each complex value
     // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
     power = _mm_hadd_ps(input1, input2);
@@ -64,17 +64,17 @@ static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float* lo
 
     // Calculate the natural log power
     power = logf4(power);
-    
+
     // Convert to log10 and multiply by 10.0
     power = _mm_mul_ps(power, magScalar);
-    
+
     // Store the floating point results
     _mm_store_ps(destPtr, power);
-    
+
     destPtr += 4;
   }
-  
-  number = quarterPoints*4;  
+
+  number = quarterPoints*4;
 #endif /* LV_HAVE_LIB_SIMDMATH */
   // Calculate the FFT for any remaining points
   for(; number < num_points; number++){
@@ -83,14 +83,14 @@ static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float* lo
     // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
     // 75 ohm load assumption
     // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
-    
+
     const float real = *inputPtr++ * iNormalizationFactor;
     const float imag = *inputPtr++ * iNormalizationFactor;
 
     *destPtr = 10.0*log10f((((real * real) + (imag * imag)) + 1e-20) * iRBW);
     destPtr++;
   }
-  
+
 }
 #endif /* LV_HAVE_SSE3 */
 
@@ -122,7 +122,7 @@ static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a_generic(float*
     const float imag = *inputPtr++ * iNormalizationFactor;
 
     *realFFTDataPointsPtr = 10.0*log10f((((real * real) + (imag * imag)) + 1e-20) * invRBW);
-    
+
     realFFTDataPointsPtr++;
   }
 }
diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
index 534dc2a25f..f206c5e874 100644
--- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
+++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
@@ -28,17 +28,17 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector, cons
     yh = _mm_set_ps1(lv_cimag(scalar));
 
     for(;number < halfPoints; number++){
-      
+
       x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-      
+
       tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-      
+
       x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-      
+
       tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-      
+
       z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-    
+
       _mm_store_ps((float*)c,z); // Store the results back into the C container
 
       a += 2;
diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
index 218c450f82..5c7d15b02f 100644
--- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
+++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
@@ -28,17 +28,17 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, cons
     yh = _mm_set_ps1(lv_cimag(scalar));
 
     for(;number < halfPoints; number++){
-      
+
       x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-      
+
       tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-      
+
       x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-      
+
       tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-      
+
       z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-    
+
       _mm_storeu_ps((float*)c,z); // Store the results back into the C container
 
       a += 2;
diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h
index 655075528e..e3dedf2fcd 100644
--- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h
+++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h
@@ -10,40 +10,40 @@
 
 
 static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
-  
+
   float * res = (float*) result;
   float * in = (float*) input;
   float * tp = (float*) taps;
   unsigned int n_2_ccomplex_blocks = num_bytes >> 4;
   unsigned int isodd = (num_bytes >> 3) &1;
-  
-  
-  
+
+
+
   float sum0[2] = {0,0};
   float sum1[2] = {0,0};
   unsigned int i = 0;
 
-  
+
   for(i = 0; i < n_2_ccomplex_blocks; ++i) {
-    
+
 
     sum0[0] += in[0] * tp[0] + in[1] * tp[1];
     sum0[1] += (-in[0] * tp[1]) + in[1] * tp[0];
     sum1[0] += in[2] * tp[2] + in[3] * tp[3];
     sum1[1] += (-in[2] * tp[3]) + in[3] * tp[2];
-    
-    
+
+
     in += 4;
     tp += 4;
 
   }
- 
-  
+
+
   res[0] = sum0[0] + sum1[0];
   res[1] = sum0[1] + sum1[1];
-  
-  
-  
+
+
+
   for(i = 0; i < isodd; ++i) {
 
 
@@ -64,13 +64,13 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_generic(lv_32fc_t* res
 
 
 static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
-  
+
   __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000};
-  
 
 
 
-  asm volatile 
+
+  asm volatile
     (
      "#  ccomplex_conjugate_dotprod_generic (float* result, const float *input,\n\t"
      "#                         const float *taps, unsigned num_bytes)\n\t"
@@ -187,32 +187,32 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse(lv_32fc_t* result,
      :[rsi] "r" (input), [rdx] "r" (taps), "c" (num_bytes), [rdi] "r" (result), [conjugator] "r" (conjugator)
      :"rax", "r8", "r9", "r10"
      );
-  
-  
+
+
   int getem = num_bytes % 16;
-  
-  
+
+
   for(; getem > 0; getem -= 8) {
-  
-    
+
+
     *result += (input[(num_bytes >> 3) - 1] * lv_conj(taps[(num_bytes >> 3) - 1]));
-  
+
   }
 
   return;
-}  
+}
 #endif
 
 #if LV_HAVE_SSE && LV_HAVE_32
 static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
-  
+
   __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000};
 
   int bound = num_bytes >> 4;
   int leftovers = num_bytes % 16;
 
-  
-  asm volatile 
+
+  asm volatile
     (
      "	#pushl	%%ebp\n\t"
      "	#movl	%%esp, %%ebp\n\t"
@@ -226,7 +226,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse_32(lv_32fc_t* resu
      "	movaps	0(%[edx]), %%xmm2\n\t"
      "  movl    %[ecx], (%[out])\n\t"
      "	shrl	$5, %[ecx]		# ecx = n_2_ccomplex_blocks / 2\n\t"
-     
+
      "  xorps   %%xmm1, %%xmm2\n\t"
      "	jmp	.%=L1_test\n\t"
      "	# 4 taps / loop\n\t"
@@ -317,28 +317,28 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse_32(lv_32fc_t* resu
      : [eax] "r" (input), [edx] "r" (taps), [ecx] "r" (num_bytes), [out] "r" (result), [conjugator] "r" (conjugator)
      );
 
-  
-  
-  
+
+
+
   printf("%d, %d\n", leftovers, bound);
-  
+
   for(; leftovers > 0; leftovers -= 8) {
-    
-    
+
+
     *result += (input[(bound << 1)] * lv_conj(taps[(bound << 1)]));
-    
+
   }
-  
+
   return;
-  
-  
-  
 
-  
-  
+
+
+
+
+
 }
 
-#endif /*LV_HAVE_SSE*/  
+#endif /*LV_HAVE_SSE*/
 
 
 
diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
index 3ae7208a83..e7493413f7 100644
--- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
+++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
@@ -9,39 +9,39 @@
 
 
 static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
-  
+
   float * res = (float*) result;
   float * in = (float*) input;
   float * tp = (float*) taps;
   unsigned int n_2_ccomplex_blocks = num_bytes >> 4;
   unsigned int isodd = (num_bytes >> 3) &1;
-  
-  
-  
+
+
+
   float sum0[2] = {0,0};
   float sum1[2] = {0,0};
   unsigned int i = 0;
 
-  
+
   for(i = 0; i < n_2_ccomplex_blocks; ++i) {
-    
+
     sum0[0] += in[0] * tp[0] + in[1] * tp[1];
     sum0[1] += (-in[0] * tp[1]) + in[1] * tp[0];
     sum1[0] += in[2] * tp[2] + in[3] * tp[3];
     sum1[1] += (-in[2] * tp[3]) + in[3] * tp[2];
-    
-    
+
+
     in += 4;
     tp += 4;
 
   }
- 
-  
+
+
   res[0] = sum0[0] + sum1[0];
   res[1] = sum0[1] + sum1[1];
-  
-  
-  
+
+
+
   for(i = 0; i < isodd; ++i) {
 
 
@@ -73,7 +73,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result
     uint32_t intRep[4];
     __m128 vec;
     } halfMask;
- 
+
   union NegMask {
     int intRep[4];
     __m128 vec;
@@ -85,13 +85,13 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result
 
   __m128 in1, in2, Rv, fehg, Iv, Rs, Ivm, Is;
   __m128 zv = {0,0,0,0};
-  
+
   halfMask.intRep[0] = halfMask.intRep[1] = 0xFFFFFFFF;
   halfMask.intRep[2] = halfMask.intRep[3] = 0x00000000;
 
   negMask.intRep[0] = negMask.intRep[2] = 0x80000000;
   negMask.intRep[1] = negMask.intRep[3] = 0;
-  
+
   // main loop
   while(num_bytes >= 4*sizeof(float)){
 
diff --git a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h
index cde9240cc7..cb2ac4c67e 100644
--- a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h
+++ b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h
@@ -7,44 +7,44 @@
 #include <string.h>
 
 
-#ifdef LV_HAVE_GENERIC 
+#ifdef LV_HAVE_GENERIC
 
 
 static inline void volk_32fc_x2_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
-  
+
   float * res = (float*) result;
   float * in = (float*) input;
   float * tp = (float*) taps;
   unsigned int n_2_ccomplex_blocks = num_bytes >> 4;
   unsigned int isodd = (num_bytes >> 3) &1;
-  
-  
-  
+
+
+
   float sum0[2] = {0,0};
   float sum1[2] = {0,0};
   unsigned int i = 0;
 
-  
+
   for(i = 0; i < n_2_ccomplex_blocks; ++i) {
-    
+
 
     sum0[0] += in[0] * tp[0] - in[1] * tp[1];
     sum0[1] += in[0] * tp[1] + in[1] * tp[0];
     sum1[0] += in[2] * tp[2] - in[3] * tp[3];
     sum1[1] += in[2] * tp[3] + in[3] * tp[2];
-    
-    
+
+
     in += 4;
     tp += 4;
 
   }
 
-  
+
   res[0] = sum0[0] + sum1[0];
   res[1] = sum0[1] + sum1[1];
-  
-  
-  
+
+
+
   for(i = 0; i < isodd; ++i) {
 
 
@@ -61,9 +61,9 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_generic(lv_32fc_t* result, const
 
 
 static inline void volk_32fc_x2_dot_prod_32fc_a_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
-  
 
-  asm 
+
+  asm
     (
      "#  ccomplex_dotprod_generic (float* result, const float *input,\n\t"
      "#                         const float *taps, unsigned num_bytes)\n\t"
@@ -175,20 +175,20 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_sse_64(lv_32fc_t* result, const
      :[rsi] "r" (input), [rdx] "r" (taps), "c" (num_bytes), [rdi] "r" (result)
      :"rax", "r8", "r9", "r10"
      );
-  
-  
+
+
   int getem = num_bytes % 16;
-  
-  
+
+
   for(; getem > 0; getem -= 8) {
-  
-    
+
+
     *result += (input[(num_bytes >> 3) - 1] * taps[(num_bytes >> 3) - 1]);
-  
+
   }
 
   return;
-  
+
 }
 
 #endif
@@ -200,7 +200,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const
   volk_32fc_x2_dot_prod_32fc_a_generic(result, input, taps, num_bytes);
 
 #if 0
-  asm volatile 
+  asm volatile
     (
      "	#pushl	%%ebp\n\t"
      "	#movl	%%esp, %%ebp\n\t"
@@ -299,28 +299,28 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const
      : "eax", "ecx", "edx"
      );
 
-  
+
   int getem = num_bytes % 16;
-  
+
   for(; getem > 0; getem -= 8) {
-    
-    
+
+
     *result += (input[(num_bytes >> 3) - 1] * taps[(num_bytes >> 3) - 1]);
-    
+
   }
-  
+
   return;
-#endif  
+#endif
 }
 
-#endif /*LV_HAVE_SSE*/  
+#endif /*LV_HAVE_SSE*/
 
 #ifdef LV_HAVE_SSE3
 
 #include <pmmintrin.h>
 
 static inline void volk_32fc_x2_dot_prod_32fc_a_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
-  
+
 
   lv_32fc_t dotProduct;
   memset(&dotProduct, 0x0, 2*sizeof(float));
@@ -336,19 +336,19 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_sse3(lv_32fc_t* result, const lv
   dotProdVal = _mm_setzero_ps();
 
   for(;number < halfPoints; number++){
-      
+
     x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
     y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
-      
+
     yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
     yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-      
+
     tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-      
+
     x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-      
+
     tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-      
+
     z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
 
     dotProdVal = _mm_add_ps(dotProdVal, z); // Add the complex multiplication results together
@@ -368,7 +368,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_sse3(lv_32fc_t* result, const lv
   }
 
   *result = dotProduct;
-}  
+}
 
 #endif /*LV_HAVE_SSE3*/
 
@@ -379,7 +379,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_sse3(lv_32fc_t* result, const lv
 static inline void volk_32fc_x2_dot_prod_32fc_a_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   volk_32fc_x2_dot_prod_32fc_a_sse3(result, input, taps, num_bytes);
   // SSE3 version runs twice as fast as the SSE4.1 version, so turning off SSE4 version for now
-   /* 
+   /*
     __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1;
     float *p_input, *p_taps;
     __m64 *p_result;
@@ -391,7 +391,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_sse4_1(lv_32fc_t* result, const
     static const __m128i neg = {0x000000000000000080000000};
 
     int i = 0;
-  
+
     int bound = (num_bytes >> 5);
     int leftovers = (num_bytes & 24) >> 3;
 
@@ -399,27 +399,27 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_sse4_1(lv_32fc_t* result, const
     real1 = _mm_sub_ps(real1, real1);
     im0 = _mm_sub_ps(im0, im0);
     im1 = _mm_sub_ps(im1, im1);
-  
+
     for(; i < bound; ++i) {
-  
-    
+
+
     xmm0 = _mm_load_ps(p_input);
     xmm1 = _mm_load_ps(p_taps);
-    
+
     p_input += 4;
     p_taps += 4;
-    
+
     xmm2 = _mm_load_ps(p_input);
     xmm3 = _mm_load_ps(p_taps);
-    
+
     p_input += 4;
     p_taps += 4;
-    
+
     xmm4 = _mm_unpackhi_ps(xmm0, xmm2);
     xmm5 = _mm_unpackhi_ps(xmm1, xmm3);
     xmm0 = _mm_unpacklo_ps(xmm0, xmm2);
     xmm2 = _mm_unpacklo_ps(xmm1, xmm3);
-    
+
     //imaginary vector from input
     xmm1 = _mm_unpackhi_ps(xmm0, xmm4);
     //real vector from input
@@ -428,39 +428,39 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_sse4_1(lv_32fc_t* result, const
     xmm0 = _mm_unpackhi_ps(xmm2, xmm5);
     //real vector from taps
     xmm2 = _mm_unpacklo_ps(xmm2, xmm5);
-    
+
     xmm4 = _mm_dp_ps(xmm3, xmm2, 0xf1);
     xmm5 = _mm_dp_ps(xmm1, xmm0, 0xf1);
-    
+
     xmm6 = _mm_dp_ps(xmm3, xmm0, 0xf2);
     xmm7 = _mm_dp_ps(xmm1, xmm2, 0xf2);
-    
+
     real0 = _mm_add_ps(xmm4, real0);
     real1 = _mm_add_ps(xmm5, real1);
     im0 = _mm_add_ps(xmm6, im0);
     im1 = _mm_add_ps(xmm7, im1);
-    
+
     }
 
 
-    
-    
+
+
     real1 = _mm_xor_ps(real1, (__m128)neg);
-    
-  
+
+
     im0 = _mm_add_ps(im0, im1);
     real0 = _mm_add_ps(real0, real1);
-  
+
     im0 = _mm_add_ps(im0, real0);
-  
+
     _mm_storel_pi(p_result, im0);
-  
+
     for(i = bound * 4; i < (bound * 4) + leftovers; ++i) {
-    
+
     *result += input[i] * taps[i];
     }
   */
-}  
+}
 
 #endif /*LV_HAVE_SSE4_1*/
 
diff --git a/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h b/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h
index aec8bd7166..f79ddb59bf 100644
--- a/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h
+++ b/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h
@@ -24,21 +24,21 @@ static inline void volk_32fc_x2_multiply_32fc_a_sse3(lv_32fc_t* cVector, const l
     const lv_32fc_t* a = aVector;
     const lv_32fc_t* b = bVector;
     for(;number < halfPoints; number++){
-      
+
       x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
       y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
-      
+
       yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
       yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-      
+
       tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-      
+
       x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-      
+
       tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-      
+
       z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-    
+
       _mm_store_ps((float*)c,z); // Store the results back into the C container
 
       a += 2;
diff --git a/volk/include/volk/volk_32fc_x2_multiply_32fc_u.h b/volk/include/volk/volk_32fc_x2_multiply_32fc_u.h
index 729c1a4ad7..a998d6184e 100644
--- a/volk/include/volk/volk_32fc_x2_multiply_32fc_u.h
+++ b/volk/include/volk/volk_32fc_x2_multiply_32fc_u.h
@@ -25,21 +25,21 @@ static inline void volk_32fc_x2_multiply_32fc_u_sse3(lv_32fc_t* cVector, const l
     const lv_32fc_t* b = bVector;
 
     for(;number < halfPoints; number++){
-      
+
       x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
       y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
-      
+
       yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
       yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-      
+
       tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-      
+
       x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-      
+
       tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-      
+
       z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-   
+
       _mm_storeu_ps((float*)c,z); // Store the results back into the C container
 
       a += 2;
diff --git a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h
index 2a1bcbce0e..2755192e96 100644
--- a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h
+++ b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h
@@ -27,23 +27,23 @@ static inline void volk_32fc_x2_multiply_conjugate_32fc_a_sse3(lv_32fc_t* cVecto
     __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
 
     for(;number < halfPoints; number++){
-      
+
       x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
       y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
 
       y = _mm_xor_ps(y, conjugator); // conjugate y
-      
+
       yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
       yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-      
+
       tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-      
+
       x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-      
+
       tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-      
+
       z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-    
+
       _mm_store_ps((float*)c,z); // Store the results back into the C container
 
       a += 2;
diff --git a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h
index 92f6a051ee..09dcd635b9 100644
--- a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h
+++ b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h
@@ -27,23 +27,23 @@ static inline void volk_32fc_x2_multiply_conjugate_32fc_u_sse3(lv_32fc_t* cVecto
     __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
 
     for(;number < halfPoints; number++){
-      
+
       x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
       y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
 
       y = _mm_xor_ps(y, conjugator); // conjugate y
-      
+
       yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
       yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-      
+
       tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-      
+
       x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-      
+
       tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-      
+
       z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-    
+
       _mm_storeu_ps((float*)c,z); // Store the results back into the C container
 
       a += 2;
diff --git a/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h
index 2d5f36b279..75eb9173d5 100644
--- a/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h
+++ b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h
@@ -11,7 +11,7 @@
 #include<pmmintrin.h>
 
 static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) {
-  
+
 
   __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
 
@@ -23,31 +23,31 @@ static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_sse3(float* t
   int leftovers0 = (num_bytes >> 4) & 1;
   int leftovers1 = (num_bytes >> 3) & 1;
   int i = 0;
-  
-  
-  
+
+
+
   xmm1 = _mm_setzero_ps();
-  xmm1 = _mm_loadl_pi(xmm1, (__m64*)src0);  
+  xmm1 = _mm_loadl_pi(xmm1, (__m64*)src0);
   xmm2 = _mm_load_ps((float*)&points[0]);
   xmm8 = _mm_load1_ps(&scalar);
   xmm1 = _mm_movelh_ps(xmm1, xmm1);
   xmm3 = _mm_load_ps((float*)&points[2]);
-  
-  
+
+
   for(; i < bound - 1; ++i) {
-  
+
     xmm4 = _mm_sub_ps(xmm1, xmm2);
     xmm5 = _mm_sub_ps(xmm1, xmm3);
     points += 4;
     xmm6 = _mm_mul_ps(xmm4, xmm4);
     xmm7 = _mm_mul_ps(xmm5, xmm5);
-    
+
     xmm2 = _mm_load_ps((float*)&points[0]);
-    
+
     xmm4 = _mm_hadd_ps(xmm6, xmm7);
 
     xmm3 = _mm_load_ps((float*)&points[2]);
-    
+
     xmm4 = _mm_mul_ps(xmm4, xmm8);
 
     _mm_store_ps(target, xmm4);
@@ -55,46 +55,46 @@ static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_sse3(float* t
     target += 4;
 
   }
-  
+
   xmm4 = _mm_sub_ps(xmm1, xmm2);
   xmm5 = _mm_sub_ps(xmm1, xmm3);
-  
-  
+
+
 
   points += 4;
   xmm6 = _mm_mul_ps(xmm4, xmm4);
   xmm7 = _mm_mul_ps(xmm5, xmm5);
-    
+
   xmm4 = _mm_hadd_ps(xmm6, xmm7);
-  
+
   xmm4 = _mm_mul_ps(xmm4, xmm8);
-   
+
   _mm_store_ps(target, xmm4);
-  
+
   target += 4;
-  
+
 
   for(i = 0; i < leftovers0; ++i) {
-    
+
     xmm2 = _mm_load_ps((float*)&points[0]);
-    
+
     xmm4 = _mm_sub_ps(xmm1, xmm2);
-    
+
     points += 2;
-    
+
     xmm6 = _mm_mul_ps(xmm4, xmm4);
 
     xmm4 = _mm_hadd_ps(xmm6, xmm6);
 
     xmm4 = _mm_mul_ps(xmm4, xmm8);
-    
+
     _mm_storeh_pi((__m64*)target, xmm4);
 
     target += 2;
   }
 
   for(i = 0; i < leftovers1; ++i) {
-    
+
     diff = src0[0] - points[0];
 
     sq_dist = scalar * (lv_creal(diff) * lv_creal(diff) + lv_cimag(diff) * lv_cimag(diff));
@@ -109,13 +109,13 @@ static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_sse3(float* t
 static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) {
   lv_32fc_t diff;
   float sq_dist;
-  unsigned int i = 0; 
-  
+  unsigned int i = 0;
+
   for(; i < num_bytes >> 3; ++i) {
     diff = src0[0] - points[i];
 
     sq_dist = scalar * (lv_creal(diff) * lv_creal(diff) + lv_cimag(diff) * lv_cimag(diff));
-    
+
     target[i] = sq_dist;
   }
 }
diff --git a/volk/include/volk/volk_32fc_x2_square_dist_32f_a.h b/volk/include/volk/volk_32fc_x2_square_dist_32f_a.h
index 6a4a08ca54..b819eaffd4 100644
--- a/volk/include/volk/volk_32fc_x2_square_dist_32f_a.h
+++ b/volk/include/volk/volk_32fc_x2_square_dist_32f_a.h
@@ -10,7 +10,7 @@
 #include<pmmintrin.h>
 
 static inline void volk_32fc_x2_square_dist_32f_a_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) {
-  
+
 
   __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
 
@@ -22,11 +22,11 @@ static inline void volk_32fc_x2_square_dist_32f_a_sse3(float* target, lv_32fc_t*
   int i = 0;
 
   xmm1 = _mm_setzero_ps();
-  xmm1 = _mm_loadl_pi(xmm1, (__m64*)src0);  
+  xmm1 = _mm_loadl_pi(xmm1, (__m64*)src0);
   xmm2 = _mm_load_ps((float*)&points[0]);
   xmm1 = _mm_movelh_ps(xmm1, xmm1);
   xmm3 = _mm_load_ps((float*)&points[2]);
-  
+
 
   for(; i < bound - 1; ++i) {
     xmm4 = _mm_sub_ps(xmm1, xmm2);
@@ -34,9 +34,9 @@ static inline void volk_32fc_x2_square_dist_32f_a_sse3(float* target, lv_32fc_t*
     points += 4;
     xmm6 = _mm_mul_ps(xmm4, xmm4);
     xmm7 = _mm_mul_ps(xmm5, xmm5);
-    
+
     xmm2 = _mm_load_ps((float*)&points[0]);
-    
+
     xmm4 = _mm_hadd_ps(xmm6, xmm7);
 
     xmm3 = _mm_load_ps((float*)&points[2]);
@@ -46,41 +46,41 @@ static inline void volk_32fc_x2_square_dist_32f_a_sse3(float* target, lv_32fc_t*
     target += 4;
 
   }
-  
+
   xmm4 = _mm_sub_ps(xmm1, xmm2);
   xmm5 = _mm_sub_ps(xmm1, xmm3);
-  
-  
+
+
 
   points += 4;
   xmm6 = _mm_mul_ps(xmm4, xmm4);
   xmm7 = _mm_mul_ps(xmm5, xmm5);
-    
+
   xmm4 = _mm_hadd_ps(xmm6, xmm7);
-   
+
   _mm_store_ps(target, xmm4);
-  
+
   target += 4;
 
   for(i = 0; i < leftovers0; ++i) {
-    
+
     xmm2 = _mm_load_ps((float*)&points[0]);
-    
+
     xmm4 = _mm_sub_ps(xmm1, xmm2);
-    
+
     points += 2;
-    
+
     xmm6 = _mm_mul_ps(xmm4, xmm4);
 
     xmm4 = _mm_hadd_ps(xmm6, xmm6);
-    
+
     _mm_storeh_pi((__m64*)target, xmm4);
 
     target += 2;
   }
 
   for(i = 0; i < leftovers1; ++i) {
-    
+
     diff = src0[0] - points[0];
 
     sq_dist = lv_creal(diff) * lv_creal(diff) + lv_cimag(diff) * lv_cimag(diff);
@@ -95,13 +95,13 @@ static inline void volk_32fc_x2_square_dist_32f_a_sse3(float* target, lv_32fc_t*
 static inline void volk_32fc_x2_square_dist_32f_a_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) {
   lv_32fc_t diff;
   float sq_dist;
-  unsigned int i = 0; 
-  
+  unsigned int i = 0;
+
   for(; i < num_bytes >> 3; ++i) {
     diff = src0[0] - points[i];
 
     sq_dist = lv_creal(diff) * lv_creal(diff) + lv_cimag(diff) * lv_cimag(diff);
-    
+
     target[i] = sq_dist;
   }
 }
diff --git a/volk/include/volk/volk_32i_s32f_convert_32f_a.h b/volk/include/volk/volk_32i_s32f_convert_32f_a.h
index 5581428693..8f4123d719 100644
--- a/volk/include/volk/volk_32i_s32f_convert_32f_a.h
+++ b/volk/include/volk/volk_32i_s32f_convert_32f_a.h
@@ -17,7 +17,7 @@
 static inline void volk_32i_s32f_convert_32f_a_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
-    
+
      float* outputVectorPtr = outputVector;
      const float iScalar = 1.0 / scalar;
     __m128 invScalar = _mm_set_ps1(iScalar);
diff --git a/volk/include/volk/volk_32i_s32f_convert_32f_u.h b/volk/include/volk/volk_32i_s32f_convert_32f_u.h
index d8afd218cf..b3a8ab2015 100644
--- a/volk/include/volk/volk_32i_s32f_convert_32f_u.h
+++ b/volk/include/volk/volk_32i_s32f_convert_32f_u.h
@@ -18,7 +18,7 @@
 static inline void volk_32i_s32f_convert_32f_u_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
-    
+
      float* outputVectorPtr = outputVector;
      const float iScalar = 1.0 / scalar;
     __m128 invScalar = _mm_set_ps1(iScalar);
diff --git a/volk/include/volk/volk_32i_x2_and_32i_a.h b/volk/include/volk/volk_32i_x2_and_32i_a.h
index dcd63d98eb..e5330847b3 100644
--- a/volk/include/volk/volk_32i_x2_and_32i_a.h
+++ b/volk/include/volk/volk_32i_x2_and_32i_a.h
@@ -23,12 +23,12 @@ static inline void volk_32i_x2_and_32i_a_sse(int32_t* cVector, const int32_t* aV
 
     __m128 aVal, bVal, cVal;
     for(;number < quarterPoints; number++){
-      
-      aVal = _mm_load_ps(aPtr); 
+
+      aVal = _mm_load_ps(aPtr);
       bVal = _mm_load_ps(bPtr);
-      
-      cVal = _mm_and_ps(aVal, bVal); 
-      
+
+      cVal = _mm_and_ps(aVal, bVal);
+
       _mm_store_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 4;
diff --git a/volk/include/volk/volk_32i_x2_or_32i_a.h b/volk/include/volk/volk_32i_x2_or_32i_a.h
index 243e8178cf..24045894c6 100644
--- a/volk/include/volk/volk_32i_x2_or_32i_a.h
+++ b/volk/include/volk/volk_32i_x2_or_32i_a.h
@@ -23,12 +23,12 @@ static inline void volk_32i_x2_or_32i_a_sse(int32_t* cVector, const int32_t* aVe
 
     __m128 aVal, bVal, cVal;
     for(;number < quarterPoints; number++){
-      
-      aVal = _mm_load_ps(aPtr); 
+
+      aVal = _mm_load_ps(aPtr);
       bVal = _mm_load_ps(bPtr);
-      
-      cVal = _mm_or_ps(aVal, bVal); 
-      
+
+      cVal = _mm_or_ps(aVal, bVal);
+
       _mm_store_ps(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 4;
diff --git a/volk/include/volk/volk_32u_byteswap_a.h b/volk/include/volk/volk_32u_byteswap_a.h
index b88848096f..71ae027d37 100644
--- a/volk/include/volk/volk_32u_byteswap_a.h
+++ b/volk/include/volk/volk_32u_byteswap_a.h
@@ -39,9 +39,9 @@ static inline void volk_32u_byteswap_a_sse2(uint32_t* intsToSwap, unsigned int n
     _mm_store_si128((__m128i*)inputPtr, output);
     inputPtr += 4;
   }
-  
+
   // Byteswap any remaining points:
-  number = quarterPoints*4;  
+  number = quarterPoints*4;
   for(; number < num_points; number++){
     uint32_t outputVal = *inputPtr;
     outputVal = (((outputVal >> 24) & 0xff) | ((outputVal >> 8) & 0x0000ff00) | ((outputVal << 8) & 0x00ff0000) | ((outputVal << 24) & 0xff000000));
@@ -64,7 +64,7 @@ static inline void volk_32u_byteswap_a_generic(uint32_t* intsToSwap, unsigned in
   for(point = 0; point < num_points; point++){
     uint32_t output = *inputPtr;
     output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | ((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
-    
+
     *inputPtr = output;
     inputPtr++;
   }
diff --git a/volk/include/volk/volk_64f_convert_32f_a.h b/volk/include/volk/volk_64f_convert_32f_a.h
index 2126e4f956..11d51702bc 100644
--- a/volk/include/volk/volk_64f_convert_32f_a.h
+++ b/volk/include/volk/volk_64f_convert_32f_a.h
@@ -16,7 +16,7 @@ static inline void volk_64f_convert_32f_a_sse2(float* outputVector, const double
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
-    
+
   const double* inputVectorPtr = (const double*)inputVector;
   float* outputVectorPtr = outputVector;
   __m128 ret, ret2;
@@ -25,7 +25,7 @@ static inline void volk_64f_convert_32f_a_sse2(float* outputVector, const double
   for(;number < quarterPoints; number++){
     inputVal1 = _mm_load_pd(inputVectorPtr); inputVectorPtr += 2;
     inputVal2 = _mm_load_pd(inputVectorPtr); inputVectorPtr += 2;
- 
+
     ret = _mm_cvtpd_ps(inputVal1);
     ret2 = _mm_cvtpd_ps(inputVal2);
 
@@ -35,7 +35,7 @@ static inline void volk_64f_convert_32f_a_sse2(float* outputVector, const double
     outputVectorPtr += 4;
   }
 
-  number = quarterPoints * 4;    
+  number = quarterPoints * 4;
   for(; number < num_points; number++){
     outputVector[number] = (float)(inputVector[number]);
   }
diff --git a/volk/include/volk/volk_64f_convert_32f_u.h b/volk/include/volk/volk_64f_convert_32f_u.h
index 5c323230af..31dc5b5fe9 100644
--- a/volk/include/volk/volk_64f_convert_32f_u.h
+++ b/volk/include/volk/volk_64f_convert_32f_u.h
@@ -16,7 +16,7 @@ static inline void volk_64f_convert_32f_u_sse2(float* outputVector, const double
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
-    
+
   const double* inputVectorPtr = (const double*)inputVector;
   float* outputVectorPtr = outputVector;
   __m128 ret, ret2;
@@ -25,7 +25,7 @@ static inline void volk_64f_convert_32f_u_sse2(float* outputVector, const double
   for(;number < quarterPoints; number++){
     inputVal1 = _mm_loadu_pd(inputVectorPtr); inputVectorPtr += 2;
     inputVal2 = _mm_loadu_pd(inputVectorPtr); inputVectorPtr += 2;
- 
+
     ret = _mm_cvtpd_ps(inputVal1);
     ret2 = _mm_cvtpd_ps(inputVal2);
 
@@ -35,7 +35,7 @@ static inline void volk_64f_convert_32f_u_sse2(float* outputVector, const double
     outputVectorPtr += 4;
   }
 
-  number = quarterPoints * 4;    
+  number = quarterPoints * 4;
   for(; number < num_points; number++){
     outputVector[number] = (float)(inputVector[number]);
   }
diff --git a/volk/include/volk/volk_64f_x2_max_64f_a.h b/volk/include/volk/volk_64f_x2_max_64f_a.h
index 61a704c528..33aae6d102 100644
--- a/volk/include/volk/volk_64f_x2_max_64f_a.h
+++ b/volk/include/volk/volk_64f_x2_max_64f_a.h
@@ -23,12 +23,12 @@ static inline void volk_64f_x2_max_64f_a_sse2(double* cVector, const double* aVe
 
     __m128d aVal, bVal, cVal;
     for(;number < halfPoints; number++){
-      
-      aVal = _mm_load_pd(aPtr); 
+
+      aVal = _mm_load_pd(aPtr);
       bVal = _mm_load_pd(bPtr);
-      
-      cVal = _mm_max_pd(aVal, bVal); 
-      
+
+      cVal = _mm_max_pd(aVal, bVal);
+
       _mm_store_pd(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 2;
diff --git a/volk/include/volk/volk_64f_x2_min_64f_a.h b/volk/include/volk/volk_64f_x2_min_64f_a.h
index 148b72c595..25d8b4c982 100644
--- a/volk/include/volk/volk_64f_x2_min_64f_a.h
+++ b/volk/include/volk/volk_64f_x2_min_64f_a.h
@@ -23,12 +23,12 @@ static inline void volk_64f_x2_min_64f_a_sse2(double* cVector, const double* aVe
 
     __m128d aVal, bVal, cVal;
     for(;number < halfPoints; number++){
-      
-      aVal = _mm_load_pd(aPtr); 
+
+      aVal = _mm_load_pd(aPtr);
       bVal = _mm_load_pd(bPtr);
-      
-      cVal = _mm_min_pd(aVal, bVal); 
-      
+
+      cVal = _mm_min_pd(aVal, bVal);
+
       _mm_store_pd(cPtr,cVal); // Store the results back into the C container
 
       aPtr += 2;
diff --git a/volk/include/volk/volk_64u_byteswap_a.h b/volk/include/volk/volk_64u_byteswap_a.h
index d4fc74a6e4..3d1d87623e 100644
--- a/volk/include/volk/volk_64u_byteswap_a.h
+++ b/volk/include/volk/volk_64u_byteswap_a.h
@@ -34,7 +34,7 @@ static inline void volk_64u_byteswap_a_sse2(uint64_t* intsToSwap, unsigned int n
       output = _mm_or_si128(output, byte2);
       byte3 = _mm_and_si128(byte3, byte3mask);
       output = _mm_or_si128(output, byte3);
-      
+
       // Reorder the two words
       output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1));
 
@@ -42,17 +42,17 @@ static inline void volk_64u_byteswap_a_sse2(uint64_t* intsToSwap, unsigned int n
       _mm_store_si128((__m128i*)inputPtr, output);
       inputPtr += 4;
     }
-  
+
     // Byteswap any remaining points:
-    number = halfPoints*2;  
+    number = halfPoints*2;
     for(; number < num_points; number++){
       uint32_t output1 = *inputPtr;
       uint32_t output2 = inputPtr[1];
-      
+
       output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
-      
+
       output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
-      
+
       *inputPtr++ = output2;
       *inputPtr++ = output1;
     }
@@ -71,11 +71,11 @@ static inline void volk_64u_byteswap_a_generic(uint64_t* intsToSwap, unsigned in
   for(point = 0; point < num_points; point++){
     uint32_t output1 = *inputPtr;
     uint32_t output2 = inputPtr[1];
-    
+
     output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
-    
+
     output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
-    
+
     *inputPtr++ = output2;
     *inputPtr++ = output1;
   }
diff --git a/volk/include/volk/volk_64u_popcnt_a.h b/volk/include/volk/volk_64u_popcnt_a.h
index 4683f1e387..7d7359ccf6 100644
--- a/volk/include/volk/volk_64u_popcnt_a.h
+++ b/volk/include/volk/volk_64u_popcnt_a.h
@@ -11,7 +11,7 @@
 static inline void volk_64u_popcnt_a_generic(uint64_t* ret, const uint64_t value) {
 
   //const uint32_t* valueVector = (const uint32_t*)&value;
-  
+
   // This is faster than a lookup table
   //uint32_t retVal = valueVector[0];
   uint32_t retVal = (uint32_t)(value && 0x00000000FFFFFFFF);
diff --git a/volk/include/volk/volk_8i_s32f_convert_32f_a.h b/volk/include/volk/volk_8i_s32f_convert_32f_a.h
index 7f2623ac6e..02a7f356e0 100644
--- a/volk/include/volk/volk_8i_s32f_convert_32f_a.h
+++ b/volk/include/volk/volk_8i_s32f_convert_32f_a.h
@@ -17,7 +17,7 @@
 static inline void volk_8i_s32f_convert_32f_a_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int sixteenthPoints = num_points / 16;
-    
+
     float* outputVectorPtr = outputVector;
     const float iScalar = 1.0 / scalar;
     __m128 invScalar = _mm_set_ps1(iScalar);
diff --git a/volk/include/volk/volk_8i_s32f_convert_32f_u.h b/volk/include/volk/volk_8i_s32f_convert_32f_u.h
index 3cd6bb67ce..8bb2c0d1a4 100644
--- a/volk/include/volk/volk_8i_s32f_convert_32f_u.h
+++ b/volk/include/volk/volk_8i_s32f_convert_32f_u.h
@@ -18,7 +18,7 @@
 static inline void volk_8i_s32f_convert_32f_u_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int sixteenthPoints = num_points / 16;
-    
+
     float* outputVectorPtr = outputVector;
     const float iScalar = 1.0 / scalar;
     __m128 invScalar = _mm_set_ps1( iScalar );
diff --git a/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a.h b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a.h
index b723c6f8ba..d82da59fb1 100644
--- a/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a.h
+++ b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a.h
@@ -20,7 +20,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a_sse4_1(float* iBuffer, fl
   float* qBufferPtr = qBuffer;
 
   unsigned int number = 0;
-  const unsigned int eighthPoints = num_points / 8;    
+  const unsigned int eighthPoints = num_points / 8;
   __m128 iFloatValue, qFloatValue;
 
   const float iScalar= 1.0 / scalar;
@@ -71,7 +71,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a_sse4_1(float* iBuffer, fl
     *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
     *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
   }
-    
+
 }
 #endif /* LV_HAVE_SSE4_1 */
 
@@ -90,7 +90,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a_sse(float* iBuffer, float
   float* qBufferPtr = qBuffer;
 
   unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;    
+  const unsigned int quarterPoints = num_points / 4;
   __m128 cplxValue1, cplxValue2, iValue, qValue;
 
   __m128 invScalar = _mm_set_ps1(1.0/scalar);
@@ -103,7 +103,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a_sse(float* iBuffer, float
     floatBuffer[1] = (float)(complexVectorPtr[1]);
     floatBuffer[2] = (float)(complexVectorPtr[2]);
     floatBuffer[3] = (float)(complexVectorPtr[3]);
-      
+
     floatBuffer[4] = (float)(complexVectorPtr[4]);
     floatBuffer[5] = (float)(complexVectorPtr[5]);
     floatBuffer[6] = (float)(complexVectorPtr[6]);
diff --git a/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a.h b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a.h
index 74073f5a6d..b2c15d3a30 100644
--- a/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a.h
+++ b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a.h
@@ -18,7 +18,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer,
   float* iBufferPtr = iBuffer;
 
   unsigned int number = 0;
-  const unsigned int eighthPoints = num_points / 8;    
+  const unsigned int eighthPoints = num_points / 8;
   __m128 iFloatValue;
 
   const float iScalar= 1.0 / scalar;
@@ -57,7 +57,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer,
     *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
     complexVectorPtr++;
   }
-    
+
 }
 #endif /* LV_HAVE_SSE4_1 */
 
@@ -75,7 +75,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, con
   float* iBufferPtr = iBuffer;
 
   unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;    
+  const unsigned int quarterPoints = num_points / 4;
   __m128 iValue;
 
   const float iScalar= 1.0 / scalar;
@@ -88,7 +88,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, con
     floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2;
     floatBuffer[1] = (float)(*complexVectorPtr); complexVectorPtr += 2;
     floatBuffer[2] = (float)(*complexVectorPtr); complexVectorPtr += 2;
-    floatBuffer[3] = (float)(*complexVectorPtr); complexVectorPtr += 2; 
+    floatBuffer[3] = (float)(*complexVectorPtr); complexVectorPtr += 2;
 
     iValue = _mm_load_ps(floatBuffer);
 
@@ -104,7 +104,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, con
     *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
     complexVectorPtr++;
   }
-    
+
 }
 #endif /* LV_HAVE_SSE */
 
diff --git a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h
index 0c280eb6e9..f85fdb9995 100644
--- a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h
+++ b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h
@@ -23,15 +23,15 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVect
   const lv_8sc_t* a = aVector;
   const lv_8sc_t* b = bVector;
   __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
-    
+
   for(;number < quarterPoints; number++){
     // Convert into 8 bit values into 16 bit values
     x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
     y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
-      
+
     // Calculate the ar*cr - ai*(-ci) portions
     realz = _mm_madd_epi16(x,y);
-      
+
     // Calculate the complex conjugate of the cr + ci j values
     y = _mm_sign_epi16(y, conjugateSign);
 
@@ -47,7 +47,7 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVect
     b += 4;
     c += 4;
   }
-    
+
   number = quarterPoints * 4;
   int16_t* c16Ptr = (int16_t*)&cVector[number];
   int8_t* a8Ptr = (int8_t*)&aVector[number];
diff --git a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h
index a2c2b04f63..4b16171cec 100644
--- a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h
+++ b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h
@@ -80,7 +80,7 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(lv_32fc_t*
     float bImag = (float)*b8Ptr++;
     lv_32fc_t bVal = lv_cmake( bReal, -bImag );
     lv_32fc_t temp = aVal * bVal;
-    
+
     *cFloatPtr++ = lv_creal(temp) / scalar;
     *cFloatPtr++ = lv_cimag(temp) / scalar;
   }
@@ -109,7 +109,7 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_generic(lv_32fc_t*
     float bImag = (float)*b8Ptr++;
     lv_32fc_t bVal = lv_cmake( bReal, -bImag );
     lv_32fc_t temp = aVal * bVal;
-    
+
     *cPtr++ = (lv_creal(temp) * invScalar);
     *cPtr++ = (lv_cimag(temp) * invScalar);
   }
diff --git a/volk/lib/gcc_x86_cpuid.h b/volk/lib/gcc_x86_cpuid.h
index 2d0916fb36..98eeb33a31 100644
--- a/volk/lib/gcc_x86_cpuid.h
+++ b/volk/lib/gcc_x86_cpuid.h
@@ -5,16 +5,16 @@
  * under the terms of the GNU General Public License as published by the
  * Free Software Foundation; either version 3, or (at your option) any
  * later version.
- * 
+ *
  * This file is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
- * 
+ *
  * Under Section 7 of GPL version 3, you are granted additional
  * permissions described in the GCC Runtime Library Exception, version
  * 3.1, as published by the Free Software Foundation.
- * 
+ *
  * You should have received a copy of the GNU General Public License and
  * a copy of the GCC Runtime Library Exception along with this program;
  * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
diff --git a/volk/lib/qa_16s_add_quad_aligned16.cc b/volk/lib/qa_16s_add_quad_aligned16.cc
index 5d5eb7e188..8da43b9724 100644
--- a/volk/lib/qa_16s_add_quad_aligned16.cc
+++ b/volk/lib/qa_16s_add_quad_aligned16.cc
@@ -16,7 +16,7 @@ void qa_16s_add_quad_aligned16::t1() {
 
 
 void qa_16s_add_quad_aligned16::t1() {
-  
+
   volk_environment_init();
   clock_t start, end;
   double total;
@@ -27,7 +27,7 @@ void qa_16s_add_quad_aligned16::t1() {
   __VOLK_ATTR_ALIGNED(16) short input2[vlen];
   __VOLK_ATTR_ALIGNED(16) short input3[vlen];
   __VOLK_ATTR_ALIGNED(16) short input4[vlen];
-  
+
   __VOLK_ATTR_ALIGNED(16) short output0[vlen];
   __VOLK_ATTR_ALIGNED(16) short output1[vlen];
   __VOLK_ATTR_ALIGNED(16) short output2[vlen];
@@ -48,13 +48,13 @@ void qa_16s_add_quad_aligned16::t1() {
     short minus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
     short plus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
     short minus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-    
+
     input0[i] = plus0 - minus0;
     input1[i] = plus1 - minus1;
     input2[i] = plus2 - minus2;
     input3[i] = plus3 - minus3;
     input4[i] = plus4 - minus4;
-    
+
   }
   printf("16s_add_quad_aligned\n");
 
@@ -76,7 +76,7 @@ void qa_16s_add_quad_aligned16::t1() {
     //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
     //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
   }
-  
+
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
index 2e6e6a1a0b..5a58569a1d 100644
--- a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
+++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
@@ -22,17 +22,17 @@ void qa_16s_branch_4_state_8_aligned16::t1() {
   static char permute2[16]__attribute__((aligned(16))) = {0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f};
   static char permute3[16]__attribute__((aligned(16))) = {0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d};
   static char* permuters[4] = {permute0, permute1, permute2, permute3};
-  
+
   unsigned int num_bytes = vlen << 1;
 
   volk_environment_init();
   clock_t start, end;
   double total;
-  
+
   __VOLK_ATTR_ALIGNED(16) short target[vlen];
   __VOLK_ATTR_ALIGNED(16) short target2[vlen];
   __VOLK_ATTR_ALIGNED(16) short target3[vlen];
-  
+
   __VOLK_ATTR_ALIGNED(16) short src0[vlen];
   __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] =  {
 7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 };
@@ -45,29 +45,29 @@ void qa_16s_branch_4_state_8_aligned16::t1() {
   __VOLK_ATTR_ALIGNED(16) short cntl3[vlen] =  {
     0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff };
   __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
-  
-  
+
+
 
   for(int i = 0; i < vlen; ++i) {
     src0[i] = i;
-    
+
   }
-  
+
 
   printf("16s_branch_4_state_8_aligned\n");
-  
-  
+
+
   start = clock();
   for(int i = 0; i < num_iters; ++i) {
     volk_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
   }
   end = clock();
-  
+
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
 
   printf("permute_and_scalar_add_time: %f\n", total);
-  
-  
+
+
 
   start = clock();
   for(int i = 0; i < num_iters; ++i) {
@@ -78,25 +78,25 @@ void qa_16s_branch_4_state_8_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
 
   printf("branch_4_state_8_time, ssse3: %f\n", total);
-  
+
   start = clock();
   for(int i = 0; i < num_iters; ++i) {
     volk_16s_branch_4_state_8_aligned16_manual(target3, src0, permuters, cntl2, cntl3, scalars, "generic");
   }
   end = clock();
-  
+
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
 
   printf("permute_and_scalar_add_time, generic: %f\n", total);
-  
-  
-  
+
+
+
   for(int i = 0; i < vlen; ++i) {
     printf("psa... %d, b4s8... %d\n", target[i], target3[i]);
   }
-  
+
   for(int i = 0; i < vlen; ++i) {
-    
+
     CPPUNIT_ASSERT(target[i] == target2[i]);
     CPPUNIT_ASSERT(target[i] == target3[i]);
   }
diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
index 3cd4e906df..dadd2c5804 100644
--- a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
+++ b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
@@ -16,13 +16,13 @@ void qa_16s_permute_and_scalar_add_aligned16::t1() {
 
 void qa_16s_permute_and_scalar_add_aligned16::t1() {
   const int vlen = 64;
-  
+
   unsigned int num_bytes = vlen << 1;
 
   volk_environment_init();
   clock_t start, end;
   double total;
-  
+
   __VOLK_ATTR_ALIGNED(16) short target[vlen];
   __VOLK_ATTR_ALIGNED(16) short target2[vlen];
   __VOLK_ATTR_ALIGNED(16) short src0[vlen];
@@ -43,7 +43,7 @@ void qa_16s_permute_and_scalar_add_aligned16::t1() {
   }
 
   printf("16s_permute_and_scalar_add_aligned\n");
-  
+
   start = clock();
   for(int i = 0; i < 100000; ++i) {
     volk_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "generic");
@@ -53,24 +53,24 @@ void qa_16s_permute_and_scalar_add_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
 
   printf("generic_time: %f\n", total);
-  
+
   start = clock();
   for(int i = 0; i < 100000; ++i) {
     volk_16s_permute_and_scalar_add_aligned16_manual(target2, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
   }
   end = clock();
-  
+
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
 
   printf("sse2_time: %f\n", total);
-  
-  
+
+
   for(int i = 0; i < vlen; ++i) {
     //printf("generic... %d, sse2... %d\n", target[i], target2[i]);
   }
-  
+
   for(int i = 0; i < vlen; ++i) {
-    
+
     CPPUNIT_ASSERT(target[i] == target2[i]);
   }
 }
diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.cc b/volk/lib/qa_16s_quad_max_star_aligned16.cc
index 192a69e350..2a5dec44ab 100644
--- a/volk/lib/qa_16s_quad_max_star_aligned16.cc
+++ b/volk/lib/qa_16s_quad_max_star_aligned16.cc
@@ -16,7 +16,7 @@ void qa_16s_quad_max_star_aligned16::t1() {
 
 void qa_16s_quad_max_star_aligned16::t1() {
   const int vlen = 34;
-  
+
   __VOLK_ATTR_ALIGNED(16) short input0[vlen];
   __VOLK_ATTR_ALIGNED(16) short input1[vlen];
   __VOLK_ATTR_ALIGNED(16) short input2[vlen];
@@ -50,9 +50,9 @@ void qa_16s_quad_max_star_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     printf("generic... %d, sse2... %d, inputs: %d, %d, %d, %d\n", output0[i], output1[i], input0[i], input1[i], input2[i], input3[i]);
   }
-  
+
   for(int i = 0; i < vlen; ++i) {
-    
+
     CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
   }
 }
diff --git a/volk/lib/qa_32f_fm_detect_aligned16.cc b/volk/lib/qa_32f_fm_detect_aligned16.cc
index a2e7a85be3..4e792ec6cb 100644
--- a/volk/lib/qa_32f_fm_detect_aligned16.cc
+++ b/volk/lib/qa_32f_fm_detect_aligned16.cc
@@ -15,18 +15,18 @@ void qa_32f_fm_detect_aligned16::t1() {
 #else
 
 void qa_32f_fm_detect_aligned16::t1() {
-  
+
   volk_environment_init();
   clock_t start, end;
   double total;
   const int vlen = 3201;
   const int ITERS = 10000;
   __VOLK_ATTR_ALIGNED(16) float input0[vlen];
-  
+
   __VOLK_ATTR_ALIGNED(16) float output0[vlen];
   __VOLK_ATTR_ALIGNED(16) float output01[vlen];
 
-  for(int i = 0; i < vlen; ++i) {   
+  for(int i = 0; i < vlen; ++i) {
     input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
   }
   printf("32f_fm_detect_aligned\n");
@@ -51,7 +51,7 @@ void qa_32f_fm_detect_aligned16::t1() {
     //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
     //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
   }
-  
+
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i]) * 1e-4);
diff --git a/volk/lib/qa_32f_index_max_aligned16.cc b/volk/lib/qa_32f_index_max_aligned16.cc
index a1c3d4cd14..2df206726d 100644
--- a/volk/lib/qa_32f_index_max_aligned16.cc
+++ b/volk/lib/qa_32f_index_max_aligned16.cc
@@ -34,12 +34,12 @@ void qa_32f_index_max_aligned16::t1(){
 
 
 void qa_32f_index_max_aligned16::t1(){
- 
+
   const int vlen = VEC_LEN;
 
-  
+
   volk_runtime_init();
-  
+
   volk_environment_init();
   int ret;
 
@@ -47,8 +47,8 @@ void qa_32f_index_max_aligned16::t1(){
   unsigned int* target_sse;
   unsigned int* target_generic;
   float* src0 ;
-  
-  
+
+
   unsigned int i_target_sse4_1;
   target_sse4_1 = &i_target_sse4_1;
   unsigned int i_target_sse;
@@ -57,20 +57,20 @@ void qa_32f_index_max_aligned16::t1(){
   target_generic = &i_target_generic;
 
   ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float));
-  
+
   random_floats((float*)src0, vlen);
-  
+
   printf("32f_index_max_aligned16\n");
 
   clock_t start, end;
   double total;
-  
-  
+
+
   start = clock();
   for(int k = 0; k < NUM_ITERS; ++k) {
     volk_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic");
   }
-  end = clock();  
+  end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic time: %f\n", total);
 
@@ -78,25 +78,25 @@ void qa_32f_index_max_aligned16::t1(){
   for(int k = 0; k < NUM_ITERS; ++k) {
     volk_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2");
   }
-  
-  end = clock();  
+
+  end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("sse time: %f\n", total);
-  
+
   start = clock();
   for(int k = 0; k < NUM_ITERS; ++k) {
     get_volk_runtime()->volk_32f_index_max_aligned16(target_sse4_1, src0, vlen);
   }
-  
-  end = clock();  
+
+  end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("sse4.1 time: %f\n", total);
-  
-  
+
+
   printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], target_sse[0], target_sse4_1[0]);
   CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]);
   CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]);
-  
+
   free(src0);
 }
 
diff --git a/volk/lib/qa_32fc_index_max_aligned16.cc b/volk/lib/qa_32fc_index_max_aligned16.cc
index 4d83f16395..3859bcb522 100644
--- a/volk/lib/qa_32fc_index_max_aligned16.cc
+++ b/volk/lib/qa_32fc_index_max_aligned16.cc
@@ -33,36 +33,36 @@ void qa_32fc_index_max_aligned16::t1(){
 
 
 void qa_32fc_index_max_aligned16::t1(){
- 
+
   const int vlen = VEC_LEN;
-  
+
   volk_environment_init();
   int ret;
-  
+
   unsigned int* target;
   unsigned int* target_generic;
   std::complex<float>* src0 ;
-  
-  
+
+
   unsigned int i_target;
   target = &i_target;
   unsigned int i_target_generic;
   target_generic = &i_target_generic;
   ret = posix_memalign((void**)&src0, 16, vlen << 3);
-  
+
   random_floats((float*)src0, vlen * 2);
-  
+
   printf("32fc_index_max_aligned16\n");
 
   clock_t start, end;
   double total;
-  
-  
+
+
   start = clock();
   for(int k = 0; k < NUM_ITERS; ++k) {
     volk_32fc_index_max_aligned16_manual(target_generic, src0, vlen << 3, "generic");
   }
-  end = clock();  
+  end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic time: %f\n", total);
 
@@ -70,19 +70,19 @@ void qa_32fc_index_max_aligned16::t1(){
   for(int k = 0; k < NUM_ITERS; ++k) {
   volk_32fc_index_max_aligned16_manual(target, src0, vlen << 3, "sse3");
   }
-  
-  end = clock();  
+
+  end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("sse3 time: %f\n", total);
 
-  
-  
-  
+
+
+
   printf("generic: %u, sse3: %u\n", target_generic[0], target[0]);
   CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], 1.1);
-  
 
-  
+
+
   free(src0);
 }
 
diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
index 981bb19e69..daca31d9ce 100644
--- a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
+++ b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
@@ -15,14 +15,14 @@ void qa_32fc_power_spectral_density_32f_aligned16::t1() {
 #else
 
 void qa_32fc_power_spectral_density_32f_aligned16::t1() {
-  
+
   volk_environment_init();
   clock_t start, end;
   double total;
   const int vlen = 3201;
   const int ITERS = 10000;
   __VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen];
-  
+
   __VOLK_ATTR_ALIGNED(16) float output_generic[vlen];
   __VOLK_ATTR_ALIGNED(16) float output_sse3[vlen];
 
@@ -30,7 +30,7 @@ void qa_32fc_power_spectral_density_32f_aligned16::t1() {
   const float rbw = 1.7;
 
   float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
+  for(int i = 0; i < 2*vlen; ++i) {
     inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
   }
   printf("32fc_power_spectral_density_32f_aligned\n");
@@ -54,7 +54,7 @@ void qa_32fc_power_spectral_density_32f_aligned16::t1() {
     //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
     //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
   }
-  
+
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4));
diff --git a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc
index fefdf06eeb..b825c20e4e 100644
--- a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc
+++ b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc
@@ -7,7 +7,7 @@
 
 #define assertcomplexEqual(expected, actual, delta)			\
   CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);
 
 #define	ERR_DELTA	(1e-4)
 
@@ -35,7 +35,7 @@ void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
 
   std::complex<float>* input;
   std::complex<float>* taps;
-  
+
   std::complex<float>* result_generic;
   std::complex<float>* result;
 
@@ -43,19 +43,19 @@ void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
   ret = posix_memalign((void**)&taps, 16, vlen << 3);
   ret = posix_memalign((void**)&result_generic, 16, 8);
   ret = posix_memalign((void**)&result, 16, 8);
-  
+
 
   result_generic[0] = std::complex<float>(0,0);
   result[0] = std::complex<float>(0,0);
 
   random_floats((float*)input, vlen * 2);
   random_floats((float*)taps, vlen * 2);
-  
-  
+
+
 
   volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result_generic, input, taps, vlen * 8,  "generic");
 
-  
+
   volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result, input, taps, vlen * 8, "sse");
 
   printf("32fc_x2_conjugate_dot_prod_32fc_u\n");
@@ -67,7 +67,7 @@ void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
   free(taps);
   free(result_generic);
   free(result);
-  
+
 }
 
 
@@ -87,13 +87,13 @@ random_floats (float *buf, unsigned n)
 
 void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
   const int vlen = 789743;
-  
+
   volk_environment_init();
   int ret;
 
   std::complex<float>* input;
   std::complex<float>* taps;
-  
+
   std::complex<float>* result_generic;
   std::complex<float>* result;
 
@@ -101,19 +101,19 @@ void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
   ret = posix_memalign((void**)&taps, 16, vlen << 3);
   ret = posix_memalign((void**)&result_generic, 16, 8);
   ret = posix_memalign((void**)&result, 16, 8);
-  
+
 
   result_generic[0] = std::complex<float>(0,0);
   result[0] = std::complex<float>(0,0);
 
   random_floats((float*)input, vlen * 2);
   random_floats((float*)taps, vlen * 2);
-  
-  
+
+
 
   volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result_generic, input, taps, vlen * 8,  "generic");
 
-  
+
   volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result, input, taps, vlen * 8, "sse_32");
 
   printf("32fc_x2_conjugate_dot_prod_32fc_u\n");
@@ -125,7 +125,7 @@ void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
   free(taps);
   free(result_generic);
   free(result);
-  
+
 }
 
 
diff --git a/volk/lib/qa_32u_popcnt_aligned16.cc b/volk/lib/qa_32u_popcnt_aligned16.cc
index c880260f2a..5559d933df 100644
--- a/volk/lib/qa_32u_popcnt_aligned16.cc
+++ b/volk/lib/qa_32u_popcnt_aligned16.cc
@@ -16,8 +16,8 @@ void qa_32u_popcnt_aligned16::t1() {
 #else
 
 void qa_32u_popcnt_aligned16::t1() {
-  
-  
+
+
   volk_runtime_init();
 
   volk_environment_init();
@@ -26,7 +26,7 @@ void qa_32u_popcnt_aligned16::t1() {
 
   const int ITERS = 10000000;
   __VOLK_ATTR_ALIGNED(16) uint32_t input0;
-  
+
   __VOLK_ATTR_ALIGNED(16) uint32_t output0;
   __VOLK_ATTR_ALIGNED(16) uint32_t output01;
 
@@ -55,7 +55,7 @@ void qa_32u_popcnt_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("sse4.2_time: %f\n", total);
 
-  
+
   CPPUNIT_ASSERT_EQUAL(output0, output01);
 }
 
diff --git a/volk/lib/qa_64u_popcnt_aligned16.cc b/volk/lib/qa_64u_popcnt_aligned16.cc
index 6be4e50ea0..391601f223 100644
--- a/volk/lib/qa_64u_popcnt_aligned16.cc
+++ b/volk/lib/qa_64u_popcnt_aligned16.cc
@@ -16,8 +16,8 @@ void qa_64u_popcnt_aligned16::t1() {
 #else
 
 void qa_64u_popcnt_aligned16::t1() {
-  
-  
+
+
   volk_runtime_init();
 
   volk_environment_init();
@@ -26,7 +26,7 @@ void qa_64u_popcnt_aligned16::t1() {
 
   const int ITERS = 10000000;
   __VOLK_ATTR_ALIGNED(16) uint64_t input0;
-  
+
   __VOLK_ATTR_ALIGNED(16) uint64_t output0;
   __VOLK_ATTR_ALIGNED(16) uint64_t output01;
 
@@ -55,7 +55,7 @@ void qa_64u_popcnt_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("sse4.2_time: %f\n", total);
 
-  
+
   CPPUNIT_ASSERT_EQUAL(output0, output01);
 }
 
diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index bb37801c94..c15979b3f2 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -46,7 +46,7 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) {
             case 4:
                 if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand;
                 else ((uint32_t *)data)[i] = (uint32_t) scaled_rand;
-            break;           
+            break;
             case 2:
                 if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand;
                 else ((uint16_t *)data)[i] = (uint16_t) scaled_rand;
@@ -69,7 +69,7 @@ static std::vector<std::string> get_arch_list(struct volk_func_desc desc) {
         //if(!(archs[i+1] & volk_get_lvarch())) continue; //this arch isn't available on this pc
         archlist.push_back(std::string(desc.indices[i]));
     }
-    
+
     return archlist;
 }
 
@@ -81,15 +81,15 @@ volk_type_t volk_type_from_string(std::string name) {
     type.is_signed = false;
     type.size = 0;
     type.str = name;
-    
+
     if(name.size() < 2) throw std::string("name too short to be a datatype");
-    
+
     //is it a scalar?
-    if(name[0] == 's') { 
+    if(name[0] == 's') {
         type.is_scalar = true;
         name = name.substr(1, name.size()-1);
     }
-    
+
     //get the data size
     size_t last_size_pos = name.find_last_of("0123456789");
     if(last_size_pos < 0) throw std::string("no size spec in type ").append(name);
@@ -98,7 +98,7 @@ volk_type_t volk_type_from_string(std::string name) {
 
     assert(((size % 8) == 0) && (size <= 64) && (size != 0));
     type.size = size/8; //in bytes
-    
+
     for(size_t i=last_size_pos+1; i < name.size(); i++) {
         switch (name[i]) {
         case 'f':
@@ -117,19 +117,19 @@ volk_type_t volk_type_from_string(std::string name) {
             throw;
         }
     }
-    
+
     return type;
 }
 
-static void get_signatures_from_name(std::vector<volk_type_t> &inputsig, 
-                                   std::vector<volk_type_t> &outputsig, 
+static void get_signatures_from_name(std::vector<volk_type_t> &inputsig,
+                                   std::vector<volk_type_t> &outputsig,
                                    std::string name) {
     boost::char_separator<char> sep("_");
     boost::tokenizer<boost::char_separator<char> > tok(name, sep);
     std::vector<std::string> toked;
     tok.assign(name);
     toked.assign(tok.begin(), tok.end());
-    
+
     assert(toked[0] == "volk");
     toked.erase(toked.begin());
 
@@ -143,7 +143,7 @@ static void get_signatures_from_name(std::vector<volk_type_t> &inputsig,
         try {
             type = volk_type_from_string(token);
             if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name...
-            
+
             if(side == SIDE_INPUT) inputsig.push_back(type);
             else outputsig.push_back(type);
         } catch (...){
@@ -160,7 +160,7 @@ static void get_signatures_from_name(std::vector<volk_type_t> &inputsig,
                 side = SIDE_NAME;
                 fn_name.append("_");
                 fn_name.append(token);
-            } 
+            }
             else if(side == SIDE_OUTPUT) {
                 if(token != toked.back()) throw; //the last token in the name is the alignment
             }
@@ -223,7 +223,7 @@ bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
             }
         }
     }
-    
+
     return fail;
 }
 
@@ -239,7 +239,7 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
             }
         }
     }
-    
+
     return fail;
 }
 
@@ -264,10 +264,10 @@ bool run_volk_tests(struct volk_func_desc desc,
                     std::vector<std::string> *best_arch_vector = 0
                    ) {
     std::cout << "RUN_VOLK_TESTS: " << name << std::endl;
-    
+
     //first let's get a list of available architectures for the test
     std::vector<std::string> arch_list = get_arch_list(desc);
-    
+
     if(arch_list.size() < 2) {
         std::cout << "no architectures to test" << std::endl;
         return false;
@@ -279,7 +279,7 @@ bool run_volk_tests(struct volk_func_desc desc,
     //now we have to get a function signature by parsing the name
     std::vector<volk_type_t> inputsig, outputsig;
     get_signatures_from_name(inputsig, outputsig, name);
-    
+
     //pull the input scalars into their own vector
     std::vector<volk_type_t> inputsc;
     for(size_t i=0; i<inputsig.size(); i++) {
@@ -299,7 +299,7 @@ bool run_volk_tests(struct volk_func_desc desc,
     for(size_t i=0; i<inbuffs.size(); i++) {
         load_random_data(inbuffs[i], inputsig[i], vlen);
     }
-    
+
     //ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch
     std::vector<std::vector<void *> > test_data;
     for(size_t i=0; i<arch_list.size(); i++) {
@@ -312,7 +312,7 @@ bool run_volk_tests(struct volk_func_desc desc,
         }
         test_data.push_back(arch_buffs);
     }
-    
+
     std::vector<volk_type_t> both_sigs;
     both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end());
     both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end());
@@ -326,7 +326,7 @@ bool run_volk_tests(struct volk_func_desc desc,
         switch(both_sigs.size()) {
             case 1:
                 if(inputsc.size() == 0) {
-                    run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); 
+                    run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
                     if(inputsc[0].is_complex) {
                         run_cast_test1_s32fc((volk_fn_1arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
@@ -364,23 +364,23 @@ bool run_volk_tests(struct volk_func_desc desc,
                 throw "no function handler for this signature";
                 break;
         }
-        
+
         end = clock();
         double arch_time = (double)(end-start)/(double)CLOCKS_PER_SEC;
         std::cout << arch_list[i] << " completed in " << arch_time << "s" << std::endl;
 
         profile_times.push_back(arch_time);
     }
-    
+
     //and now compare each output to the generic output
     //first we have to know which output is the generic one, they aren't in order...
     size_t generic_offset=0;
-    for(size_t i=0; i<arch_list.size(); i++) 
+    for(size_t i=0; i<arch_list.size(); i++)
         if(arch_list[i] == "generic") generic_offset=i;
 
     //now compare
     //if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
-    
+
     bool fail = false;
     bool fail_global = false;
     std::vector<bool> arch_results;
@@ -438,7 +438,7 @@ bool run_volk_tests(struct volk_func_desc desc,
         }
         arch_results.push_back(!fail);
     }
-        
+
     double best_time = std::numeric_limits<double>::max();
     std::string best_arch = "generic";
     for(size_t i=0; i < arch_list.size(); i++) {
@@ -447,7 +447,7 @@ bool run_volk_tests(struct volk_func_desc desc,
             best_arch = arch_list[i];
         }
     }
-        
+
     std::cout << "Best arch: " << best_arch << std::endl;
     if(best_arch_vector) {
         best_arch_vector->push_back(name + std::string(" ") + best_arch);
diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c
index 7e705bed46..5e5c9dfff7 100644
--- a/volk/lib/volk_prefs.c
+++ b/volk/lib/volk_prefs.c
@@ -26,7 +26,7 @@ int load_preferences(struct volk_arch_pref **prefs) {
     char path[512], line[512], function[128], arch[32];
     int n_arch_prefs = 0;
     struct volk_arch_pref *t_pref;
-    
+
     //get the config path
     get_config_path(path);
     if (path == NULL) return n_arch_prefs; //no prefs found
diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c
index 4baa078bca..865d60955c 100644
--- a/volk/lib/volk_rank_archs.c
+++ b/volk/lib/volk_rank_archs.c
@@ -26,14 +26,14 @@ unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsign
       n_arch_prefs = load_preferences(&volk_arch_prefs);
       prefs_loaded = 1;
   }
-  
+
   //now look for the function name in the prefs list
   for(i=0; i < n_arch_prefs; i++) {
       if(!strncmp(name, volk_arch_prefs[i].name, 128)) { //found it
         return get_index(indices, n_archs, volk_arch_prefs[i].arch);
       }
   }
-  
+
   for(i=1; i < n_archs; ++i) {
     if((arch_defs[i]&(!arch)) == 0) {
       best_val = (arch_defs[i] > arch_defs[best_val + 1]) ? i-1 : best_val;
diff --git a/volk/libvector_replace.sh b/volk/libvector_replace.sh
index 4c7e33e1b4..e1940c00f3 100755
--- a/volk/libvector_replace.sh
+++ b/volk/libvector_replace.sh
@@ -2,8 +2,8 @@
 
 cd $1
 files=`ls`
-for file in $files 
-do 
+for file in $files
+do
     sed 's/libvector/volk/g' < $file > tempfile
     sed 's/LIBVECTOR/VOLK/g' < tempfile > $file
 done
diff --git a/volk/msvc/inttypes.h b/volk/msvc/inttypes.h
index 1c2baa82eb..0a1b60fc16 100644
--- a/volk/msvc/inttypes.h
+++ b/volk/msvc/inttypes.h
@@ -1,32 +1,32 @@
 // ISO C9x  compliant inttypes.h for Microsoft Visual Studio
-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
-// 
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
+//
 //  Copyright (c) 2006 Alexander Chemeris
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
-// 
+//
 //   1. Redistributions of source code must retain the above copyright notice,
 //      this list of conditions and the following disclaimer.
-// 
+//
 //   2. Redistributions in binary form must reproduce the above copyright
 //      notice, this list of conditions and the following disclaimer in the
 //      documentation and/or other materials provided with the distribution.
-// 
+//
 //   3. The name of the author may be used to endorse or promote products
 //      derived from this software without specific prior written permission.
-// 
+//
 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// 
+//
 ///////////////////////////////////////////////////////////////////////////////
 
 #ifndef _MSC_VER // [
diff --git a/volk/msvc/stdint.h b/volk/msvc/stdint.h
index ab6d37e111..108bc89826 100644
--- a/volk/msvc/stdint.h
+++ b/volk/msvc/stdint.h
@@ -1,32 +1,32 @@
 // ISO C9x  compliant stdint.h for Microsoft Visual Studio
-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
-// 
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
+//
 //  Copyright (c) 2006-2008 Alexander Chemeris
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
-// 
+//
 //   1. Redistributions of source code must retain the above copyright notice,
 //      this list of conditions and the following disclaimer.
-// 
+//
 //   2. Redistributions in binary form must reproduce the above copyright
 //      notice, this list of conditions and the following disclaimer in the
 //      documentation and/or other materials provided with the distribution.
-// 
+//
 //   3. The name of the author may be used to endorse or promote products
 //      derived from this software without specific prior written permission.
-// 
+//
 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// 
+//
 ///////////////////////////////////////////////////////////////////////////////
 
 #ifndef _MSC_VER // [
diff --git a/volk/python/__init__.py b/volk/python/__init__.py
index 7239e7e238..7c9c4a0c64 100644
--- a/volk/python/__init__.py
+++ b/volk/python/__init__.py
@@ -1,18 +1,18 @@
 #
 # Copyright 2010 Free Software Foundation, Inc.
-# 
+#
 # This file is part of GNU Radio
-# 
+#
 # GNU Radio is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 3, or (at your option)
 # any later version.
-# 
+#
 # GNU Radio is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
@@ -31,7 +31,7 @@ except ImportError:
 	from DLFCN import RTLD_GLOBAL as _RTLD_GLOBAL
     except ImportError:
 	pass
-    
+
 if _RTLD_GLOBAL != 0:
     _dlopenflags = sys.getdlopenflags()
     sys.setdlopenflags(_dlopenflags|_RTLD_GLOBAL)
diff --git a/volk/python/qa_square.py b/volk/python/qa_square.py
index 53f0433ca9..c74bc25ef7 100755
--- a/volk/python/qa_square.py
+++ b/volk/python/qa_square.py
@@ -1,24 +1,24 @@
 #!/usr/bin/env python
 #
 # Copyright 2010 Free Software Foundation, Inc.
-# 
+#
 # This file is part of GNU Radio
-# 
+#
 # GNU Radio is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 3, or (at your option)
 # any later version.
-# 
+#
 # GNU Radio is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU General Public License
 # along with GNU Radio; see the file COPYING.  If not, write to
 # the Free Software Foundation, Inc., 51 Franklin Street,
 # Boston, MA 02110-1301, USA.
-# 
+#
 
 from gnuradio import gr, gr_unittest
 import volk_swig as volk
@@ -42,6 +42,6 @@ class qa_volk(gr_unittest.TestCase):
         self.tb.run()
         result_data = dst.data()
         self.assertFloatTuplesAlmostEqual(expected_result, result_data, 6)
-        
+
 if __name__ == '__main__':
     gr_unittest.main()
diff --git a/volk/python/volk.i b/volk/python/volk.i
index ea3a037ba0..d678a91209 100644
--- a/volk/python/volk.i
+++ b/volk/python/volk.i
@@ -1,19 +1,19 @@
 /* -*- c++ -*- */
 /*
  * Copyright 2010 Free Software Foundation, Inc.
- * 
+ *
  * This file is part of GNU Radio
- * 
+ *
  * GNU Radio is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 3, or (at your option)
  * any later version.
- * 
+ *
  * GNU Radio is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License along
  * with this program; if not, write to the Free Software Foundation, Inc.,
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
diff --git a/volk/python/volk_square_ff.i b/volk/python/volk_square_ff.i
index 1d485bd7e6..5456c81fd4 100644
--- a/volk/python/volk_square_ff.i
+++ b/volk/python/volk_square_ff.i
@@ -1,19 +1,19 @@
 /* -*- c++ -*- */
 /*
  * Copyright 2010 Free Software Foundation, Inc.
- * 
+ *
  * This file is part of GNU Radio
- * 
+ *
  * GNU Radio is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 3, or (at your option)
  * any later version.
- * 
+ *
  * GNU Radio is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License along
  * with this program; if not, write to the Free Software Foundation, Inc.,
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
diff --git a/volk/spu_lib/gc_spu_macs.h b/volk/spu_lib/gc_spu_macs.h
index 8e3e3f2a62..e86dce3f5e 100644
--- a/volk/spu_lib/gc_spu_macs.h
+++ b/volk/spu_lib/gc_spu_macs.h
@@ -1,19 +1,19 @@
 /* -*- asm -*- */
 /*
  * Copyright 2008 Free Software Foundation, Inc.
- * 
+ *
  * This file is part of GNU Radio
- * 
+ *
  * GNU Radio is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 3, or (at your option)
  * any later version.
- * 
+ *
  * GNU Radio is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License along
  * with this program; if not, write to the Free Software Foundation, Inc.,
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
@@ -279,7 +279,7 @@ name:
 	iluh	_gc_t0, 4*(s)*0x0101 + 0x0001;	\
 	iohl	_gc_t0, 4*(s)*0x0101 + 0x0203;	\
 	shufb	rt, ra, ra, _gc_t0;
-	
+
 // replicate double from slot s [0,1]
 #define	VSPLTD(rt, ra, s) \
 	/* sp is always 16-byte aligned */ \
@@ -301,13 +301,13 @@ name:
 
 #define MIN_SELB(rt, ra, rb, rc)	selb	rt, ra, rb, rc;
 #define MAX_SELB(rt, ra, rb, rc)	selb	rt, rb, ra, rc;
-	
+
 	// words
 
 #define MIN(rt, ra, rb) \
 	cgt	_gc_t0, ra, rb; \
 	MIN_SELB(rt, ra, rb, _gc_t0)
-	
+
 #define	MAX(rt, ra, rb) \
 	cgt	_gc_t0, ra, rb; \
 	MAX_SELB(rt, ra, rb, _gc_t0)
@@ -315,17 +315,17 @@ name:
 #define UMIN(rt, ra, rb) \
 	clgt	_gc_t0, ra, rb; \
 	MIN_SELB(rt, ra, rb, _gc_t0)
-	
+
 #define	UMAX(rt, ra, rb) \
 	clgt	_gc_t0, ra, rb; \
 	MAX_SELB(rt, ra, rb, _gc_t0)
 
 	// bytes
-	
+
 #define MINB(rt, ra, rb) \
 	cgtb	_gc_t0, ra, rb; \
 	MIN_SELB(rt, ra, rb, _gc_t0)
-	
+
 #define	MAXB(rt, ra, rb) \
 	cgtb	_gc_t0, ra, rb; \
 	MAX_SELB(rt, ra, rb, _gc_t0)
@@ -333,17 +333,17 @@ name:
 #define UMINB(rt, ra, rb) \
 	clgtb	_gc_t0, ra, rb; \
 	MIN_SELB(rt, ra, rb, _gc_t0)
-	
+
 #define	UMAXB(rt, ra, rb) \
 	clgtb	_gc_t0, ra, rb; \
 	MAX_SELB(rt, ra, rb, _gc_t0)
 
 	// halfwords
-	
+
 #define MINH(rt, ra, rb) \
 	cgth	_gc_t0, ra, rb; \
 	MIN_SELB(rt, ra, rb, _gc_t0)
-	
+
 #define	MAXH(rt, ra, rb) \
 	cgth	_gc_t0, ra, rb; \
 	MAX_SELB(rt, ra, rb, _gc_t0)
@@ -351,17 +351,17 @@ name:
 #define UMINH(rt, ra, rb) \
 	clgth	_gc_t0, ra, rb; \
 	MIN_SELB(rt, ra, rb, _gc_t0)
-	
+
 #define	UMAXH(rt, ra, rb) \
 	clgth	_gc_t0, ra, rb; \
 	MAX_SELB(rt, ra, rb, _gc_t0)
 
 	// floats
-	
+
 #define FMIN(rt, ra, rb) \
 	fcgt	_gc_t0, ra, rb; \
 	MIN_SELB(rt, ra, rb, _gc_t0)
-	
+
 #define	FMAX(rt, ra, rb) \
 	fcgt	_gc_t0, ra, rb; \
 	MAX_SELB(rt, ra, rb, _gc_t0)
@@ -370,7 +370,7 @@ name:
 #define FMINMAG(rt, ra, rb) \
 	fcmgt	_gc_t0, ra, rb; \
 	MIN_SELB(rt, ra, rb, _gc_t0)
-	
+
 // Ignoring the sign, select the values with the maximum magnitude
 #define	FMAXMAG(rt, ra, rb) \
 	fcmgt	_gc_t0, ra, rb; \
diff --git a/volk/spu_lib/spu_16s_cmpgt_unaligned.c b/volk/spu_lib/spu_16s_cmpgt_unaligned.c
index 765cacd9a8..8811e68014 100644
--- a/volk/spu_lib/spu_16s_cmpgt_unaligned.c
+++ b/volk/spu_lib/spu_16s_cmpgt_unaligned.c
@@ -4,14 +4,14 @@ void* libvector_16s_cmpgt_unaligned(void* target, void* src, signed short val, u
 	//loop iterator i
 	int i = 0;
 	void* retval = target;
-	
+
 
 	//put the target and source addresses into qwords
 	vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0};
 	vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0};
-	
+
 	//create shuffle masks
-	
+
 	//shuffle mask building blocks:
 	//all from the first vector
 	vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
@@ -19,9 +19,9 @@ void* libvector_16s_cmpgt_unaligned(void* target, void* src, signed short val, u
 	//all from the second vector
 	vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
 										 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-	
 
-	
+
+
 	//gamma: second half of the second, first half of the first, break at (unsigned int)src%16
 	vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned int)src%16));
 	vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
@@ -29,16 +29,16 @@ void* libvector_16s_cmpgt_unaligned(void* target, void* src, signed short val, u
 	vector unsigned char cmp_res = spu_or(gt_res, eq_res);
 	vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
 	vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
-	vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, 
+	vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change,
 												 (vector unsigned int)oneup);
 	shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned int)src%16);
 
-	
-	
+
+
 
 	vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -((unsigned int)target%16));
 	vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned int)target%16));
-	
+
 	//alpha: first half of first, second half of second, break at (unsigned int)target%16
 	src_cmp = spu_splats((unsigned char)((unsigned int)target%16));
 	gt_res = spu_cmpgt(oneup, src_cmp);
@@ -47,13 +47,13 @@ void* libvector_16s_cmpgt_unaligned(void* target, void* src, signed short val, u
 	phase_change = spu_and(sixteen_uchar, cmp_res);
 	vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change,
 													 (vector unsigned int)oneup);
-	
+
 	//delta: first half of first, first half of second, break at (unsigned int)target%16
 	vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha);
 	//epsilon: second half of second, second half of first, break at (unsigned int)target%16
 	vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha);
 	//zeta: second half of second, first half of first, break at 16 - (unsigned int)target%16
-	vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); 
+	vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16);
 
 	//beta: first half of first, second half of second, break at num_bytes%16
 	src_cmp = spu_splats((unsigned char)(num_bytes%16));
@@ -63,17 +63,17 @@ void* libvector_16s_cmpgt_unaligned(void* target, void* src, signed short val, u
 	phase_change = spu_and(sixteen_uchar, cmp_res);
 	vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change,
 													 (vector unsigned int)oneup);
-	
-	
-	
 
 
-	
+
+
+
+
 	qword src_past;
 	qword src_present;
 	qword tgt_past;
 	qword tgt_present;
-	
+
 	qword in_temp;
 	qword out_temp0;
 	qword out_temp1;
@@ -85,53 +85,53 @@ void* libvector_16s_cmpgt_unaligned(void* target, void* src, signed short val, u
 	vector unsigned short compare;
 	vector unsigned short ones = {1, 1, 1, 1, 1, 1, 1, 1};
 	vector unsigned short after_and;
-	
+
 	for(i = 0; i < num_bytes/16; ++i) {
-		
+
 		src_present = si_lqd((qword)address_counter_src, 16);
 		tgt_present = si_lqd((qword)address_counter_tgt, 16);
-		
+
 		in_temp = spu_shuffle(src_present, src_past, (vector unsigned char)shuffle_mask_gamma);
 
 		compare = spu_cmpgt((vector signed short) in_temp, vec_val);
 		after_and = spu_and(compare, ones);
-		
-		
+
+
 		out_temp0 = spu_shuffle(tgt_past, (qword)after_and, shuffle_mask_delta);
 		out_temp1 = spu_shuffle(tgt_present, (qword)after_and, shuffle_mask_epsilon);
 
 		si_stqd(out_temp0, (qword)address_counter_tgt, 0);
 		si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-		
+
 		tgt_past = out_temp1;
 		src_past = src_present;
 		address_counter_src = spu_add(address_counter_src, 16);
 		address_counter_tgt = spu_add(address_counter_tgt, 16);
-	
+
 
 	}
-	
+
 	src_present = si_lqd((qword)address_counter_src, 16);
 	tgt_present = si_lqd((qword)address_counter_tgt, 16);
-		
-	
+
+
 	in_temp = spu_shuffle(src_present, src_past,(vector unsigned char) shuffle_mask_gamma);
-	
+
 	compare = spu_cmpgt((vector signed short) in_temp, vec_val);
 	after_and = spu_and(compare, ones);
-		
+
 
 	qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta);
 	qword meld = spu_shuffle((qword)after_and, target_temp, (vector unsigned char)shuffle_mask_beta);
 
-	
-	
+
+
 	out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
 	out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-	
+
 	si_stqd(out_temp0, (qword)address_counter_tgt, 0);
 	si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-	
+
 	return retval;
 }
 
@@ -156,5 +156,5 @@ int main(){
 	}
 	printf("\n");
 }
-*/	
+*/
 
diff --git a/volk/spu_lib/spu_16s_vector_subtract_unaligned.c b/volk/spu_lib/spu_16s_vector_subtract_unaligned.c
index a3ce6c2fea..ea110c8d21 100644
--- a/volk/spu_lib/spu_16s_vector_subtract_unaligned.c
+++ b/volk/spu_lib/spu_16s_vector_subtract_unaligned.c
@@ -4,15 +4,15 @@ void* libvector_16s_vector_subtract_unaligned(void* target,  void* src0, void* s
 	//loop iterator i
 	int i = 0;
 	void* retval = target;
-	
+
 
 	//put the target and source addresses into qwords
 	vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0};
 	vector unsigned int address_counter_src0 = {(unsigned int)src0, 0, 0 ,0};
 	vector unsigned int address_counter_src1 = {(unsigned int)src1, 0, 0, 0};
-	
+
 	//create shuffle masks
-	
+
 	//shuffle mask building blocks:
 	//all from the first vector
 	vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
@@ -20,9 +20,9 @@ void* libvector_16s_vector_subtract_unaligned(void* target,  void* src0, void* s
 	//all from the second vector
 	vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
 										 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-	
 
-	
+
+
 	//gamma: second half of the second, first half of the first, break at (unsigned int)src0%16
 	vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned int)src0%16));
 	vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
@@ -30,7 +30,7 @@ void* libvector_16s_vector_subtract_unaligned(void* target,  void* src0, void* s
 	vector unsigned char cmp_res = spu_or(gt_res, eq_res);
 	vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
 	vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
-	vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, 
+	vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change,
 												 (vector unsigned int)oneup);
 	shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned int)src0%16);
 
@@ -41,17 +41,17 @@ void* libvector_16s_vector_subtract_unaligned(void* target,  void* src0, void* s
 	cmp_res = spu_or(gt_res, eq_res);
 	sixteen_uchar = spu_splats((unsigned char)16);
 	phase_change = spu_and(sixteen_uchar, cmp_res);
-	vector unsigned int shuffle_mask_eta = spu_add((vector unsigned int)phase_change, 
+	vector unsigned int shuffle_mask_eta = spu_add((vector unsigned int)phase_change,
 												 (vector unsigned int)oneup);
 	shuffle_mask_eta = spu_rlqwbyte(shuffle_mask_eta, (unsigned int)src1%16);
-	
 
-	
-	
+
+
+
 
 	vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -((unsigned int)target%16));
 	vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned int)target%16));
-	
+
 	//alpha: first half of first, second half of second, break at (unsigned int)target%16
 	src_cmp = spu_splats((unsigned char)((unsigned int)target%16));
 	gt_res = spu_cmpgt(oneup, src_cmp);
@@ -60,13 +60,13 @@ void* libvector_16s_vector_subtract_unaligned(void* target,  void* src0, void* s
 	phase_change = spu_and(sixteen_uchar, cmp_res);
 	vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change,
 													 (vector unsigned int)oneup);
-	
+
 	//delta: first half of first, first half of second, break at (unsigned int)target%16
 	vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha);
 	//epsilon: second half of second, second half of first, break at (unsigned int)target%16
 	vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha);
 	//zeta: second half of second, first half of first, break at 16 - (unsigned int)target%16
-	vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); 
+	vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16);
 
 	//beta: first half of first, second half of second, break at num_bytes%16
 	src_cmp = spu_splats((unsigned char)(num_bytes%16));
@@ -76,19 +76,19 @@ void* libvector_16s_vector_subtract_unaligned(void* target,  void* src0, void* s
 	phase_change = spu_and(sixteen_uchar, cmp_res);
 	vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change,
 													 (vector unsigned int)oneup);
-	
-	
-	
 
 
-	
+
+
+
+
 	qword src0_past;
 	qword src0_present;
 	qword src1_past;
 	qword src1_present;
 	qword tgt_past;
 	qword tgt_present;
-	
+
 	qword in_temp0;
 	qword in_temp1;
 	qword out_temp0;
@@ -99,54 +99,54 @@ void* libvector_16s_vector_subtract_unaligned(void* target,  void* src0, void* s
 	src0_past = si_lqd((qword)address_counter_src0, 0);
 	src1_past = si_lqd((qword)address_counter_src1, 0);
 	tgt_past = si_lqd((qword)address_counter_tgt, 0);
-	
+
 	for(i = 0; i < num_bytes/16; ++i) {
-		
+
 		src0_present = si_lqd((qword)address_counter_src0, 16);
 		src1_present = si_lqd((qword)address_counter_src1, 16);
 		tgt_present = si_lqd((qword)address_counter_tgt, 16);
-		
+
 		in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char)shuffle_mask_gamma);
 		in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char)shuffle_mask_eta);
-		
+
 		sum = spu_sub((vector signed short)in_temp0, (vector signed short)in_temp1);
-	   
+
 
 		out_temp0 = spu_shuffle(tgt_past, (qword)sum, shuffle_mask_delta);
 		out_temp1 = spu_shuffle(tgt_present, (qword)sum, shuffle_mask_epsilon);
-		
+
 		si_stqd(out_temp0, (qword)address_counter_tgt, 0);
 		si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-		
+
 		tgt_past = out_temp1;
 		src0_past = src0_present;
 		src1_past = src1_present;
 		address_counter_src0 = spu_add(address_counter_src0, 16);
 		address_counter_src1 = spu_add(address_counter_src1, 16);
 		address_counter_tgt = spu_add(address_counter_tgt, 16);
-		
-		
+
+
 	}
-	
+
 	src0_present = si_lqd((qword)address_counter_src0, 16);
 	src1_present = si_lqd((qword)address_counter_src1, 16);
 	tgt_present = si_lqd((qword)address_counter_tgt, 16);
-	
-	
+
+
 	in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char) shuffle_mask_gamma);
 	in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char) shuffle_mask_eta);
 	sum = spu_sub((vector signed short)in_temp0, (vector signed short)in_temp1);
 	qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta);
 	qword meld = spu_shuffle((qword)sum, target_temp, (vector unsigned char)shuffle_mask_beta);
 
-	
-	
+
+
 	out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
 	out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-	
+
 	si_stqd(out_temp0, (qword)address_counter_tgt, 0);
 	si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-	
+
 	return retval;
 }
 
diff --git a/volk/spu_lib/spu_16s_vector_sum_unaligned.c b/volk/spu_lib/spu_16s_vector_sum_unaligned.c
index 5a1cb9aaf8..0097b4f56a 100644
--- a/volk/spu_lib/spu_16s_vector_sum_unaligned.c
+++ b/volk/spu_lib/spu_16s_vector_sum_unaligned.c
@@ -4,15 +4,15 @@ void* libvector_16s_vector_sum_unaligned(void* target,  void* src0, void* src1,
 	//loop iterator i
 	int i = 0;
 	void* retval = target;
-	
+
 
 	//put the target and source addresses into qwords
 	vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0};
 	vector unsigned int address_counter_src0 = {(unsigned int)src0, 0, 0 ,0};
 	vector unsigned int address_counter_src1 = {(unsigned int)src1, 0, 0, 0};
-	
+
 	//create shuffle masks
-	
+
 	//shuffle mask building blocks:
 	//all from the first vector
 	vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
@@ -20,9 +20,9 @@ void* libvector_16s_vector_sum_unaligned(void* target,  void* src0, void* src1,
 	//all from the second vector
 	vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
 										 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-	
 
-	
+
+
 	//gamma: second half of the second, first half of the first, break at (unsigned int)src0%16
 	vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned int)src0%16));
 	vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
@@ -30,7 +30,7 @@ void* libvector_16s_vector_sum_unaligned(void* target,  void* src0, void* src1,
 	vector unsigned char cmp_res = spu_or(gt_res, eq_res);
 	vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
 	vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
-	vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, 
+	vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change,
 												 (vector unsigned int)oneup);
 	shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned int)src0%16);
 
@@ -41,17 +41,17 @@ void* libvector_16s_vector_sum_unaligned(void* target,  void* src0, void* src1,
 	cmp_res = spu_or(gt_res, eq_res);
 	sixteen_uchar = spu_splats((unsigned char)16);
 	phase_change = spu_and(sixteen_uchar, cmp_res);
-	vector unsigned int shuffle_mask_eta = spu_add((vector unsigned int)phase_change, 
+	vector unsigned int shuffle_mask_eta = spu_add((vector unsigned int)phase_change,
 												 (vector unsigned int)oneup);
 	shuffle_mask_eta = spu_rlqwbyte(shuffle_mask_eta, (unsigned int)src1%16);
-	
 
-	
-	
+
+
+
 
 	vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -((unsigned int)target%16));
 	vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned int)target%16));
-	
+
 	//alpha: first half of first, second half of second, break at (unsigned int)target%16
 	src_cmp = spu_splats((unsigned char)((unsigned int)target%16));
 	gt_res = spu_cmpgt(oneup, src_cmp);
@@ -60,13 +60,13 @@ void* libvector_16s_vector_sum_unaligned(void* target,  void* src0, void* src1,
 	phase_change = spu_and(sixteen_uchar, cmp_res);
 	vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change,
 													 (vector unsigned int)oneup);
-	
+
 	//delta: first half of first, first half of second, break at (unsigned int)target%16
 	vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha);
 	//epsilon: second half of second, second half of first, break at (unsigned int)target%16
 	vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha);
 	//zeta: second half of second, first half of first, break at 16 - (unsigned int)target%16
-	vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); 
+	vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16);
 
 	//beta: first half of first, second half of second, break at num_bytes%16
 	src_cmp = spu_splats((unsigned char)(num_bytes%16));
@@ -76,19 +76,19 @@ void* libvector_16s_vector_sum_unaligned(void* target,  void* src0, void* src1,
 	phase_change = spu_and(sixteen_uchar, cmp_res);
 	vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change,
 													 (vector unsigned int)oneup);
-	
-	
-	
 
 
-	
+
+
+
+
 	qword src0_past;
 	qword src0_present;
 	qword src1_past;
 	qword src1_present;
 	qword tgt_past;
 	qword tgt_present;
-	
+
 	qword in_temp0;
 	qword in_temp1;
 	qword out_temp0;
@@ -99,54 +99,54 @@ void* libvector_16s_vector_sum_unaligned(void* target,  void* src0, void* src1,
 	src0_past = si_lqd((qword)address_counter_src0, 0);
 	src1_past = si_lqd((qword)address_counter_src1, 0);
 	tgt_past = si_lqd((qword)address_counter_tgt, 0);
-	
+
 	for(i = 0; i < num_bytes/16; ++i) {
-		
+
 		src0_present = si_lqd((qword)address_counter_src0, 16);
 		src1_present = si_lqd((qword)address_counter_src1, 16);
 		tgt_present = si_lqd((qword)address_counter_tgt, 16);
-		
+
 		in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char)shuffle_mask_gamma);
 		in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char)shuffle_mask_eta);
-		
+
 		sum = spu_add((vector signed int)in_temp0, (vector signed int)in_temp1);
-	   
+
 
 		out_temp0 = spu_shuffle(tgt_past, (qword)sum, shuffle_mask_delta);
 		out_temp1 = spu_shuffle(tgt_present, (qword)sum, shuffle_mask_epsilon);
-		
+
 		si_stqd(out_temp0, (qword)address_counter_tgt, 0);
 		si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-		
+
 		tgt_past = out_temp1;
 		src0_past = src0_present;
 		src1_past = src1_present;
 		address_counter_src0 = spu_add(address_counter_src0, 16);
 		address_counter_src1 = spu_add(address_counter_src1, 16);
 		address_counter_tgt = spu_add(address_counter_tgt, 16);
-		
-		
+
+
 	}
-	
+
 	src0_present = si_lqd((qword)address_counter_src0, 16);
 	src1_present = si_lqd((qword)address_counter_src1, 16);
 	tgt_present = si_lqd((qword)address_counter_tgt, 16);
-	
-	
+
+
 	in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char) shuffle_mask_gamma);
 	in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char) shuffle_mask_eta);
 	sum = spu_add((vector signed int)in_temp0, (vector signed int)in_temp1);
 	qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta);
 	qword meld = spu_shuffle((qword)sum, target_temp, (vector unsigned char)shuffle_mask_beta);
 
-	
-	
+
+
 	out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
 	out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-	
+
 	si_stqd(out_temp0, (qword)address_counter_tgt, 0);
 	si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-	
+
 	return retval;
 }
 
diff --git a/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c b/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c
index 58fd4aa0cf..d1c9604889 100644
--- a/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c
+++ b/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c
@@ -7,15 +7,15 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target,  void* src0, voi
 	//loop iterator i
 	int i = 0;
 	void* retval = target;
-	
+
 
 	//put the target and source addresses into qwords
 	vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0};
 	vector unsigned int address_counter_src0 = {(unsigned int)src0, 0, 0 ,0};
 	vector unsigned int address_counter_src1 = {(unsigned int)src1, 0, 0, 0};
-	
+
 	//create shuffle masks
-	
+
 	//shuffle mask building blocks:
 	//all from the first vector
 	vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
@@ -23,9 +23,9 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target,  void* src0, voi
 	//all from the second vector
 	vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
 										 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-	
 
-	
+
+
 	//gamma: second half of the second, first half of the first, break at (unsigned int)src0%16
 	vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned int)src0%16));
 	vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
@@ -33,7 +33,7 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target,  void* src0, voi
 	vector unsigned char cmp_res = spu_or(gt_res, eq_res);
 	vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
 	vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
-	vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, 
+	vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change,
 												 (vector unsigned int)oneup);
 	shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned int)src0%16);
 
@@ -44,17 +44,17 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target,  void* src0, voi
 	cmp_res = spu_or(gt_res, eq_res);
 	sixteen_uchar = spu_splats((unsigned char)16);
 	phase_change = spu_and(sixteen_uchar, cmp_res);
-	vector unsigned int shuffle_mask_eta = spu_add((vector unsigned int)phase_change, 
+	vector unsigned int shuffle_mask_eta = spu_add((vector unsigned int)phase_change,
 												 (vector unsigned int)oneup);
 	shuffle_mask_eta = spu_rlqwbyte(shuffle_mask_eta, (unsigned int)src1%16);
-	
 
-	
-	
+
+
+
 
 	vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -((unsigned int)target%16));
 	vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned int)target%16));
-	
+
 	//alpha: first half of first, second half of second, break at (unsigned int)target%16
 	src_cmp = spu_splats((unsigned char)((unsigned int)target%16));
 	gt_res = spu_cmpgt(oneup, src_cmp);
@@ -63,13 +63,13 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target,  void* src0, voi
 	phase_change = spu_and(sixteen_uchar, cmp_res);
 	vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change,
 													 (vector unsigned int)oneup);
-	
+
 	//delta: first half of first, first half of second, break at (unsigned int)target%16
 	vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha);
 	//epsilon: second half of second, second half of first, break at (unsigned int)target%16
 	vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha);
 	//zeta: second half of second, first half of first, break at 16 - (unsigned int)target%16
-	vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); 
+	vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16);
 
 	//beta: first half of first, second half of second, break at num_bytes%16
 	src_cmp = spu_splats((unsigned char)(num_bytes%16));
@@ -79,19 +79,19 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target,  void* src0, voi
 	phase_change = spu_and(sixteen_uchar, cmp_res);
 	vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change,
 													 (vector unsigned int)oneup);
-	
-	
-	
 
 
-	
+
+
+
+
 	qword src0_past;
 	qword src0_present;
 	qword src1_past;
 	qword src1_present;
 	qword tgt_past;
 	qword tgt_present;
-	
+
 	qword in_temp0;
 	qword in_temp1;
 	qword out_temp0;
@@ -101,7 +101,7 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target,  void* src0, voi
 	src0_past = si_lqd((qword)address_counter_src0, 0);
 	src1_past = si_lqd((qword)address_counter_src1, 0);
 	tgt_past = si_lqd((qword)address_counter_tgt, 0);
-	
+
 	vector unsigned char shuffle_mask_complexprod0 = {0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
 													  0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b};
 	vector unsigned char shuffle_mask_complexprod1 = {0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
@@ -110,7 +110,7 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target,  void* src0, voi
 													  0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f};
 	vector unsigned char sign_changer = {0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
 										 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00};
-	
+
 	vector float prod0;
 	qword shuf0;
 	vector float prod1;
@@ -118,54 +118,54 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target,  void* src0, voi
 	qword summand0;
 	qword summand1;
 	vector float sum;
-	
+
 
 	for(i = 0; i < num_bytes/16; ++i) {
-		
+
 		src0_present = si_lqd((qword)address_counter_src0, 16);
 		src1_present = si_lqd((qword)address_counter_src1, 16);
 		tgt_present = si_lqd((qword)address_counter_tgt, 16);
-		
+
 		in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char)shuffle_mask_gamma);
 		in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char)shuffle_mask_eta);
-		
+
 		prod0 = spu_mul((vector float)in_temp0, (vector float)in_temp1);
 		shuf0 = spu_shuffle((qword)in_temp1, (qword)in_temp1, shuffle_mask_complexprod0);
 		prod1 = spu_mul((vector float)in_temp0, (vector float)shuf0);
 		sign_change = spu_xor(prod0, (vector float)sign_changer);
-		
+
 		summand0 = spu_shuffle((qword)sign_change, (qword)prod1, shuffle_mask_complexprod1);
-			   
+
 		summand1 = spu_shuffle((qword)sign_change, (qword)prod1, shuffle_mask_complexprod2);
-		
+
 		sum = spu_add((vector float)summand0, (vector float)summand1);
-	   
+
 
 		out_temp0 = spu_shuffle(tgt_past, (qword)sum, shuffle_mask_delta);
 		out_temp1 = spu_shuffle(tgt_present, (qword)sum, shuffle_mask_epsilon);
-		
+
 		si_stqd(out_temp0, (qword)address_counter_tgt, 0);
 		si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-		
+
 		tgt_past = out_temp1;
 		src0_past = src0_present;
 		src1_past = src1_present;
 		address_counter_src0 = spu_add(address_counter_src0, 16);
 		address_counter_src1 = spu_add(address_counter_src1, 16);
 		address_counter_tgt = spu_add(address_counter_tgt, 16);
-		
-		
+
+
 	}
-	
+
 	src0_present = si_lqd((qword)address_counter_src0, 16);
 	src1_present = si_lqd((qword)address_counter_src1, 16);
 	tgt_present = si_lqd((qword)address_counter_tgt, 16);
-	
-	
+
+
 	in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char) shuffle_mask_gamma);
 	in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char) shuffle_mask_eta);
-	
-	
+
+
 	prod0 = spu_mul((vector float)in_temp0, (vector float)in_temp1);
 	shuf0 = spu_shuffle((qword)in_temp1, (qword)in_temp1, shuffle_mask_complexprod0);
 	prod1 = spu_mul(prod0, (vector float)shuf0);
@@ -173,20 +173,20 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target,  void* src0, voi
 	summand0 = spu_shuffle((qword)sign_change, (qword)prod1, shuffle_mask_complexprod1);
 	summand1 = spu_shuffle((qword)sign_change, (qword)prod1, shuffle_mask_complexprod2);
 	sum = spu_add((vector float)summand0, (vector float)summand1);
-	
-	
-	
+
+
+
 	qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta);
 	qword meld = spu_shuffle((qword)sum, target_temp, (vector unsigned char)shuffle_mask_beta);
-	
-	
-	
+
+
+
 	out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
 	out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-	
+
 	si_stqd(out_temp0, (qword)address_counter_tgt, 0);
 	si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-	
+
 	return retval;
 }
 
@@ -209,14 +209,14 @@ int main(){
 
 	vector_product_complex(res, pooh, bear, 48*sizeof(float));
 
-	
+
 
 	for(i = 0; i < 48; ++i) {
 		printf("%f, ", res[i]);
 	}
 	printf("\n");
 
-	
+
 }
 */
 
diff --git a/volk/spu_lib/spu_memcpy_unaligned.c b/volk/spu_lib/spu_memcpy_unaligned.c
index 2a0dabcd7b..0f15b5d807 100644
--- a/volk/spu_lib/spu_memcpy_unaligned.c
+++ b/volk/spu_lib/spu_memcpy_unaligned.c
@@ -5,14 +5,14 @@ void* libvector_memcpy_unaligned(void* target, void* src, unsigned int num_bytes
 	//loop iterator i
 	int i = 0;
 	void* retval = target;
-	
+
 
 	//put the target and source addresses into qwords
 	vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0};
 	vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0};
-	
+
 	//create shuffle masks
-	
+
 	//shuffle mask building blocks:
 	//all from the first vector
 	vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
@@ -20,9 +20,9 @@ void* libvector_memcpy_unaligned(void* target, void* src, unsigned int num_bytes
 	//all from the second vector
 	vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
 										 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-	
 
-	
+
+
 	//gamma: second half of the second, first half of the first, break at (unsigned int)src%16
 	vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned int)src%16));
 	vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
@@ -30,16 +30,16 @@ void* libvector_memcpy_unaligned(void* target, void* src, unsigned int num_bytes
 	vector unsigned char cmp_res = spu_or(gt_res, eq_res);
 	vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
 	vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
-	vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, 
+	vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change,
 												 (vector unsigned int)oneup);
 	shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned int)src%16);
 
-	
-	
+
+
 
 	vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -((unsigned int)target%16));
 	vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned int)target%16));
-	
+
 	//alpha: first half of first, second half of second, break at (unsigned int)target%16
 	src_cmp = spu_splats((unsigned char)((unsigned int)target%16));
 	gt_res = spu_cmpgt(oneup, src_cmp);
@@ -48,13 +48,13 @@ void* libvector_memcpy_unaligned(void* target, void* src, unsigned int num_bytes
 	phase_change = spu_and(sixteen_uchar, cmp_res);
 	vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change,
 													 (vector unsigned int)oneup);
-	
+
 	//delta: first half of first, first half of second, break at (unsigned int)target%16
 	vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha);
 	//epsilon: second half of second, second half of first, break at (unsigned int)target%16
 	vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha);
 	//zeta: second half of second, first half of first, break at 16 - (unsigned int)target%16
-	vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); 
+	vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16);
 
 	//beta: first half of first, second half of second, break at num_bytes%16
 	src_cmp = spu_splats((unsigned char)(num_bytes%16));
@@ -64,61 +64,61 @@ void* libvector_memcpy_unaligned(void* target, void* src, unsigned int num_bytes
 	phase_change = spu_and(sixteen_uchar, cmp_res);
 	vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change,
 													 (vector unsigned int)oneup);
-	
-	
-	
 
 
-	
+
+
+
+
 	qword src_past;
 	qword src_present;
 	qword tgt_past;
 	qword tgt_present;
-	
+
 	qword in_temp;
 	qword out_temp0;
 	qword out_temp1;
 
 	src_past = si_lqd((qword)address_counter_src, 0);
 	tgt_past = si_lqd((qword)address_counter_tgt, 0);
-	
+
 	for(i = 0; i < num_bytes/16; ++i) {
-		
+
 		src_present = si_lqd((qword)address_counter_src, 16);
 		tgt_present = si_lqd((qword)address_counter_tgt, 16);
-		
+
 		in_temp = spu_shuffle(src_present, src_past, (vector unsigned char)shuffle_mask_gamma);
-		
+
 		out_temp0 = spu_shuffle(tgt_past, in_temp, shuffle_mask_delta);
 		out_temp1 = spu_shuffle(tgt_present, in_temp, shuffle_mask_epsilon);
 
 		si_stqd(out_temp0, (qword)address_counter_tgt, 0);
 		si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-		
+
 		tgt_past = out_temp1;
 		src_past = src_present;
 		address_counter_src = spu_add(address_counter_src, 16);
 		address_counter_tgt = spu_add(address_counter_tgt, 16);
-	
+
 
 	}
-	
+
 	src_present = si_lqd((qword)address_counter_src, 16);
 	tgt_present = si_lqd((qword)address_counter_tgt, 16);
-		
-	
+
+
 	in_temp = spu_shuffle(src_present, src_past,(vector unsigned char) shuffle_mask_gamma);
 	qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta);
 	qword meld = spu_shuffle(in_temp, target_temp, (vector unsigned char)shuffle_mask_beta);
 
-	
-	
+
+
 	out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
 	out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-	
+
 	si_stqd(out_temp0, (qword)address_counter_tgt, 0);
 	si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-	
+
 	return retval;
 }
 
@@ -133,9 +133,9 @@ void* mcpy(void* target, void* src, size_t num_bytes){
 	//put the target and source addresses into qwords
 	vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0};
 	vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0};
-	
+
 	//create shuffle masks
-	
+
 	//shuffle mask building blocks:
 	//all from the first vector
 	vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
@@ -143,9 +143,9 @@ void* mcpy(void* target, void* src, size_t num_bytes){
 	//all from the second vector
 	vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
 										 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-	
 
-	
+
+
 	//gamma: second half of the second, first half of the first, break at src%16
 	vector unsigned char src_cmp = spu_splats((unsigned char)(src%16));
 	vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
@@ -153,16 +153,16 @@ void* mcpy(void* target, void* src, size_t num_bytes){
 	vector unsigned char cmp_res = spu_or(gt_res, eq_res);
 	vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
 	vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
-	vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, 
+	vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change,
 												 (vector unsigned int)oneup);
 	shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, src%16);
 
-	
-	
+
+
 
 	vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -(target%16));
 	vector unsigned char tgt_first = spu_rlqwbyte(oneup, -(target%16));
-	
+
 	//alpha: first half of first, second half of second, break at target%16
 	src_cmp = spu_splats((unsigned char)(target%16));
 	gt_res = spu_cmpgt(oneup, src_cmp);
@@ -171,13 +171,13 @@ void* mcpy(void* target, void* src, size_t num_bytes){
 	phase_change = spu_and(sixteen_uchar, cmp_res);
 	vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change,
 													 (vector unsigned int)oneup);
-	
+
 	//delta: first half of first, first half of second, break at target%16
 	vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha);
 	//epsilon: second half of second, second half of first, break at target%16
 	vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha);
 	//zeta: second half of second, first half of first, break at 16 - target%16
-	vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, target%16); 
+	vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, target%16);
 
 	//beta: first half of first, second half of second, break at num_bytes%16
 	src_cmp = spu_splats((unsigned char)(num_bytes%16));
@@ -187,10 +187,10 @@ void* mcpy(void* target, void* src, size_t num_bytes){
 	phase_change = spu_and(sixteen_uchar, cmp_res);
 	vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change,
 													 (vector unsigned int)oneup);
-	
-	
+
+
 	  printf("num_bytesmod16 %d\n", num_bytes%16);
-	printf("beta %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d\n", 
+	printf("beta %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d\n",
 		   spu_extract((vector unsigned char) shuffle_mask_beta, 0),
 		   spu_extract((vector unsigned char) shuffle_mask_beta, 1),
 		   spu_extract((vector unsigned char) shuffle_mask_beta, 2),
@@ -207,64 +207,64 @@ void* mcpy(void* target, void* src, size_t num_bytes){
 		   spu_extract((vector unsigned char) shuffle_mask_beta, 13),
 		   spu_extract((vector unsigned char) shuffle_mask_beta, 14),
 		   spu_extract((vector unsigned char) shuffle_mask_beta, 15));
-	
 
-	
-	
 
 
-	
+
+
+
+
 	qword src_past;
 	qword src_present;
 	qword tgt_past;
 	qword tgt_present;
-	
+
 	qword in_temp;
 	qword out_temp0;
 	qword out_temp1;
 
 	src_past = si_lqd((qword)address_counter_src, 0);
 	tgt_past = si_lqd((qword)address_counter_tgt, 0);
-	
+
 	for(i = 0; i < num_bytes/16; ++i) {
-		
+
 		src_present = si_lqd((qword)address_counter_src, 16);
 		tgt_present = si_lqd((qword)address_counter_tgt, 16);
-		
+
 		in_temp = spu_shuffle(src_present, src_past, (vector unsigned char)shuffle_mask_gamma);
-		
+
 		out_temp0 = spu_shuffle(tgt_past, in_temp, shuffle_mask_delta);
 		out_temp1 = spu_shuffle(tgt_present, in_temp, shuffle_mask_epsilon);
 
 		si_stqd(out_temp0, (qword)address_counter_tgt, 0);
 		si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-		
+
 		tgt_past = out_temp1;
 		src_past = src_present;
 		address_counter_src = spu_add(address_counter_src, 16);
 		address_counter_tgt = spu_add(address_counter_tgt, 16);
-	
+
 
 	}
-	
+
 	src_present = si_lqd((qword)address_counter_src, 16);
 	tgt_present = si_lqd((qword)address_counter_tgt, 16);
-		
-	
+
+
 	in_temp = spu_shuffle(src_present, src_past,(vector unsigned char) shuffle_mask_gamma);
 	qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta);
 	qword meld = spu_shuffle(in_temp, target_temp, (vector unsigned char)shuffle_mask_beta);
 
-	
-	
+
+
 	out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
 	out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-	
+
 	si_stqd(out_temp0, (qword)address_counter_tgt, 0);
 	si_stqd(out_temp1, (qword)address_counter_tgt, 16);
 
 	return retval;
-	   
+
 }
 */
 /*
@@ -286,5 +286,5 @@ int main(){
 	}
 	printf("\n");
 }
-	
+
 */
diff --git a/volk/spu_lib/spu_memset_unaligned.S b/volk/spu_lib/spu_memset_unaligned.S
index a655c4c52f..c260a125cd 100644
--- a/volk/spu_lib/spu_memset_unaligned.S
+++ b/volk/spu_lib/spu_memset_unaligned.S
@@ -1,19 +1,19 @@
 /* -*- asm -*- */
 /*
  * Copyright 2008 Free Software Foundation, Inc.
- * 
+ *
  * This file is part of GNU Radio
- * 
+ *
  * GNU Radio is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 3, or (at your option)
  * any later version.
- * 
+ *
  * GNU Radio is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License along
  * with this program; if not, write to the Free Software Foundation, Inc.,
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
@@ -33,11 +33,11 @@
 	 *	  size_t i;
 	 *	  for (i = 0; i < n; i++)
 	 *	    p[i] = c;
-	 *	
+	 *
 	 *	  return pv;
 	 *	}
 	 */
-	
+
 #define	p_arg	arg1	// we're going to clobber arg1 w/ the return value
 #define	c	arg2	// the constant we're writing
 #define	n	arg3	// how many bytes to write
@@ -50,16 +50,16 @@
 #define an	r18	// aligned n (n rounded down to mod 16 boundary)
 #define	next_p	r19
 #define	cond1	r20
-#define	cond2	r21				
+#define	cond2	r21
 #define m	r22
 #define r	r23
-	
+
 	PROC_ENTRY(libvector_memset_unaligned)
-	
+
 	// Hint the return from do_head, in case we go that way.
 	// There's pretty much nothing to can do to hint the branch to it.
 	hbrr	do_head_br, head_complete
-	
+
 	MR(p, p_arg)	// leaves p, the return value, in the correct reg (r3)
 	BRZ_RETURN(n)
 
@@ -69,11 +69,11 @@
 head_complete:
 
 	/*
-	 * preconditions:	
+	 * preconditions:
 	 *   p%16 == 0, n > 0
 	 */
 	hbrr	middle_loop_br, middle_loop
-	
+
 	ROUND_DOWN(an, n, 16)	// an is "aligned n"
 	MODULO(n, n, 16)	// what's left over in the last quad
 	brz	an, do_tail	// no whole quad words; skip to tail
@@ -96,20 +96,20 @@ middle_loop:
 	  stqd	c,  4*16(p)
 	  stqd	c,  5*16(p)
 	  stqd	c,  6*16(p)
-	
+
 	MR(p, next_p)
 	  stqd	c,  7*16-128(next_p)
 	or	cond2, n, an
 middle_loop_br:
 	  brnz	cond1, middle_loop
-	
+
 	/*
-	 * if an and n are both zero, return now 
+	 * if an and n are both zero, return now
 	 */
 	BRZ_RETURN(cond2)
 
 	/*
-	 * otherwise handle last of full quad words 
+	 * otherwise handle last of full quad words
 	 *
 	 *   0 <= an < 128, p%16 == 0
 	 */
@@ -119,18 +119,18 @@ middle2:
 	 */
 	brz	an, do_tail
 	hbrr	middle2_loop_br, middle2_loop
-	
+
 	.p2align 3
-middle2_loop:	
+middle2_loop:
 	ai	next_p, p, 16
 	  stqd	c, 0(p)
 	ai	an, an, -16
 	  LMR(p, next_p)
 middle2_loop_br:
 	  brnz	an, middle2_loop
-	
+
 	/* We're done with the full quadwords. */
-	
+
 	/*
 	 * Handle the final partial quadword.
 	 * We'll be modifying only the left hand portion of the quad.
@@ -146,7 +146,7 @@ do_tail:
 	shlqby  mask, mask, t1
 	selb	t0, old, c, mask
 	stqd	t0, 0(p)
-do_tail_ret:	
+do_tail_ret:
 	RETURN()
 
 	/*
@@ -176,7 +176,7 @@ do_head:
 	MR(t1, p)
 	sf	t0, m, r	// t0 = r - m
 	a	p, p, m		// p += m
-	rotqby	mask, mask, t0	// rotate 0's to the right place	
+	rotqby	mask, mask, t0	// rotate 0's to the right place
 	sf	n, m, n		// n -= m
 	selb	t0, c, old, mask // merge
 	stqd	t0, 0(t1)
author	Tom Rondeau <trondeau@vt.edu>	2012-04-13 18:36:53 -0400
committer	Tom Rondeau <trondeau@vt.edu>	2012-04-13 18:36:53 -0400
commit	f919f9dcbb54a08e6e26d6c229ce92fb784fa1b2 (patch)
tree	7e846386b9eb1676f9a93fc4a1e55916b9accc97 /volk
parent	6a1e9783fec6ed827f49db27c171591d30f32933 (diff)