Statistics
| Branch: | Tag: | Revision:

root / volk / lib / qa_32f_index_max_aligned16.cc @ 23914465

History | View | Annotate | Download (2.2 kB)

1
#include <volk/volk_runtime.h>
2
#include <volk/volk.h>
3
#include <qa_32f_index_max_aligned16.h>
4
#include <stdio.h>
5
#include <stdlib.h>
6
#include <time.h>
7
8
#define ERR_DELTA (1e-4)
9
#define NUM_ITERS 1000000
10
#define VEC_LEN 3097
11
static float uniform() {
12
  return 2.0 * ((float) rand() / RAND_MAX - 0.5);        // uniformly (-1, 1)
13
}
14
15
static void
16
random_floats (float *buf, unsigned n)
17
{
18
  unsigned int i = 0;
19
  for (; i < n; i++) {
20
21
    buf[i] = uniform () * 32767;
22
23
  }
24
}
25
26
27
#ifndef LV_HAVE_SSE
28
29
void qa_32f_index_max_aligned16::t1(){
30
  printf("sse not available... no test performed\n");
31
}
32
33
#else
34
35
36
void qa_32f_index_max_aligned16::t1(){
37
 
38
  const int vlen = VEC_LEN;
39
40
  
41
  volk_runtime_init();
42
  
43
  volk_environment_init();
44
  int ret;
45
46
  unsigned int* target_sse4_1;
47
  unsigned int* target_sse;
48
  unsigned int* target_generic;
49
  float* src0 ;
50
  
51
  
52
  unsigned int i_target_sse4_1;
53
  target_sse4_1 = &i_target_sse4_1;
54
  unsigned int i_target_sse;
55
  target_sse = &i_target_sse;
56
  unsigned int i_target_generic;
57
  target_generic = &i_target_generic;
58
59
  ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float));
60
  
61
  random_floats((float*)src0, vlen);
62
  
63
  printf("32f_index_max_aligned16\n");
64
65
  clock_t start, end;
66
  double total;
67
  
68
  
69
  start = clock();
70
  for(int k = 0; k < NUM_ITERS; ++k) {
71
    volk_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic");
72
  }
73
  end = clock();  
74
  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
75
  printf("generic time: %f\n", total);
76
77
  start = clock();
78
  for(int k = 0; k < NUM_ITERS; ++k) {
79
    volk_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2");
80
  }
81
  
82
  end = clock();  
83
  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
84
  printf("sse time: %f\n", total);
85
  
86
  start = clock();
87
  for(int k = 0; k < NUM_ITERS; ++k) {
88
    get_volk_runtime()->volk_32f_index_max_aligned16(target_sse4_1, src0, vlen);
89
  }
90
  
91
  end = clock();  
92
  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
93
  printf("sse4.1 time: %f\n", total);
94
  
95
  
96
  printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], target_sse[0], target_sse4_1[0]);
97
  CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]);
98
  CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]);
99
  
100
  free(src0);
101
}
102
103
#endif /*LV_HAVE_SSE3*/