From 52c51c983d51ff725238c22571b2d466875a5f22 Mon Sep 17 00:00:00 2001
From: Josh Blum <josh@joshknows.com>
Date: Mon, 14 Nov 2011 11:30:59 -0800
Subject: volk: conversion tweaks to build avx on MSVC

---
 volk/include/volk/volk_16i_max_star_horizontal_16i_a.h         | 6 +++---
 volk/include/volk/volk_32fc_index_max_16u_a.h                  | 4 ++--
 volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h     | 8 ++++----
 volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h      | 4 ++--
 volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h | 4 ++--
 volk/include/volk/volk_common.h                                | 2 ++
 6 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'volk/include')

diff --git a/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
index f60b33a41f..a10a62350e 100644
--- a/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
+++ b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_16i_max_star_horizontal_16i_a_H
 #define INCLUDED_volk_16i_max_star_horizontal_16i_a_H
 
+#include <volk/volk_common.h>
 
 #include<inttypes.h>
 #include<stdio.h>	
@@ -21,7 +22,7 @@ static inline  void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, in
 
   
   
-  volatile __m128i xmm0, xmm1, xmm2, xmm3, xmm4; 
+  __m128i xmm0, xmm1, xmm2, xmm3, xmm4;
   __m128i  xmm5, xmm6, xmm7, xmm8;
   
   xmm4 = _mm_load_si128((__m128i*)shufmask0);
@@ -92,8 +93,7 @@ static inline  void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, in
     
     xmm0 = _mm_shuffle_epi8(xmm0, xmm3);
     
-
-    _mm_storel_pd((double*)p_target, (__m128d)xmm0);
+    _mm_storel_pd((double*)p_target, bit128_p(&xmm0)->double_vec);
     
     p_target = (__m128i*)((int8_t*)p_target + 8);
 
diff --git a/volk/include/volk/volk_32fc_index_max_16u_a.h b/volk/include/volk/volk_32fc_index_max_16u_a.h
index 9566aa32e5..125a345827 100644
--- a/volk/include/volk/volk_32fc_index_max_16u_a.h
+++ b/volk/include/volk/volk_32fc_index_max_16u_a.h
@@ -87,8 +87,8 @@ static inline void volk_32fc_index_max_16u_a_sse3(unsigned int* target, lv_32fc_
 
     xmm2 = _mm_load_ps((float*)src0);
     
-    xmm1 = _mm_movelh_ps((__m128)xmm8, (__m128)xmm8);
-    xmm8 = (__m128i)xmm1;
+    xmm1 = _mm_movelh_ps(bit128_p(&xmm8)->float_vec, bit128_p(&xmm8)->float_vec);
+    xmm8 = bit128_p(&xmm1)->int_vec;
 
     xmm2 = _mm_mul_ps(xmm2, xmm2);
 
diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
index f11c93682d..02faf86c23 100644
--- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
+++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
@@ -96,9 +96,9 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result
 
     in1 = _mm_loadu_ps( (float*) (input+offset) );
     in2 = _mm_loadu_ps( (float*) (taps+offset) );
-    Rv = in1*in2;
+    Rv = _mm_mul_ps(in1, in2);
     fehg = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(2,3,0,1));
-    Iv = in1*fehg;
+    Iv = _mm_mul_ps(in1, fehg);
     Rs = _mm_hadd_ps( _mm_hadd_ps(Rv, zv) ,zv);
     Ivm = _mm_xor_ps( negMask.vec, Iv );
     Is = _mm_hadd_ps( _mm_hadd_ps(Ivm, zv) ,zv);
@@ -119,9 +119,9 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result
 
     in1 = _mm_loadu_ps( (float*) (input+offset) );
     in2 = _mm_loadu_ps( (float*) (taps+offset) );
-    Rv = _mm_and_ps(in1*in2, halfMask.vec);
+    Rv = _mm_and_ps(_mm_mul_ps(in1, in2), halfMask.vec);
     fehg = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(2,3,0,1));
-    Iv = _mm_and_ps(in1*fehg, halfMask.vec);
+    Iv = _mm_and_ps(_mm_mul_ps(in1, fehg), halfMask.vec);
     Rs = _mm_hadd_ps(_mm_hadd_ps(Rv, zv),zv);
     Ivm = _mm_xor_ps( negMask.vec, Iv );
     Is = _mm_hadd_ps(_mm_hadd_ps(Ivm, zv),zv);
diff --git a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h
index 0bb76f1d17..0c280eb6e9 100644
--- a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h
+++ b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h
@@ -26,8 +26,8 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVect
     
   for(;number < quarterPoints; number++){
     // Convert into 8 bit values into 16 bit values
-    x = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)a));
-    y = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)b));
+    x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
+    y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
       
     // Calculate the ar*cr - ai*(-ci) portions
     realz = _mm_madd_epi16(x,y);
diff --git a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h
index 3e05608a4a..a2c2b04f63 100644
--- a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h
+++ b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h
@@ -29,8 +29,8 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(lv_32fc_t*
 
   for(;number < quarterPoints; number++){
     // Convert into 8 bit values into 16 bit values
-    x = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)a));
-    y = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)b));
+    x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
+    y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
 
     // Calculate the ar*cr - ai*(-ci) portions
     realz = _mm_madd_epi16(x,y);
diff --git a/volk/include/volk/volk_common.h b/volk/include/volk/volk_common.h
index 2c935d1fb3..38263d5f75 100644
--- a/volk/include/volk/volk_common.h
+++ b/volk/include/volk/volk_common.h
@@ -91,4 +91,6 @@ union bit128{
   #endif
 };
 
+#define bit128_p(x) ((union bit128 *)(x))
+
 #endif /*INCLUDED_LIBVOLK_COMMON_H*/
-- 
cgit v1.2.3