root / volk / lib / qa_32f_index_max_aligned16.cc @ 23914465
History | View | Annotate | Download (2.2 kB)
| 1 | #include <volk/volk_runtime.h> |
|---|---|
| 2 | #include <volk/volk.h> |
| 3 | #include <qa_32f_index_max_aligned16.h> |
| 4 | #include <stdio.h> |
| 5 | #include <stdlib.h> |
| 6 | #include <time.h> |
| 7 | |
| 8 | #define ERR_DELTA (1e-4) |
| 9 | #define NUM_ITERS 1000000 |
| 10 | #define VEC_LEN 3097 |
| 11 | static float uniform() { |
| 12 | return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) |
| 13 | } |
| 14 | |
| 15 | static void |
| 16 | random_floats (float *buf, unsigned n) |
| 17 | {
|
| 18 | unsigned int i = 0; |
| 19 | for (; i < n; i++) {
|
| 20 | |
| 21 | buf[i] = uniform () * 32767;
|
| 22 | |
| 23 | } |
| 24 | } |
| 25 | |
| 26 | |
| 27 | #ifndef LV_HAVE_SSE
|
| 28 | |
| 29 | void qa_32f_index_max_aligned16::t1(){
|
| 30 | printf("sse not available... no test performed\n");
|
| 31 | } |
| 32 | |
| 33 | #else
|
| 34 | |
| 35 | |
| 36 | void qa_32f_index_max_aligned16::t1(){
|
| 37 | |
| 38 | const int vlen = VEC_LEN; |
| 39 | |
| 40 | |
| 41 | volk_runtime_init(); |
| 42 | |
| 43 | volk_environment_init(); |
| 44 | int ret;
|
| 45 | |
| 46 | unsigned int* target_sse4_1; |
| 47 | unsigned int* target_sse; |
| 48 | unsigned int* target_generic; |
| 49 | float* src0 ;
|
| 50 | |
| 51 | |
| 52 | unsigned int i_target_sse4_1; |
| 53 | target_sse4_1 = &i_target_sse4_1; |
| 54 | unsigned int i_target_sse; |
| 55 | target_sse = &i_target_sse; |
| 56 | unsigned int i_target_generic; |
| 57 | target_generic = &i_target_generic; |
| 58 | |
| 59 | ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float)); |
| 60 | |
| 61 | random_floats((float*)src0, vlen);
|
| 62 | |
| 63 | printf("32f_index_max_aligned16\n");
|
| 64 | |
| 65 | clock_t start, end; |
| 66 | double total;
|
| 67 | |
| 68 | |
| 69 | start = clock(); |
| 70 | for(int k = 0; k < NUM_ITERS; ++k) { |
| 71 | volk_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic");
|
| 72 | } |
| 73 | end = clock(); |
| 74 | total = (double)(end-start)/(double)CLOCKS_PER_SEC; |
| 75 | printf("generic time: %f\n", total);
|
| 76 | |
| 77 | start = clock(); |
| 78 | for(int k = 0; k < NUM_ITERS; ++k) { |
| 79 | volk_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2");
|
| 80 | } |
| 81 | |
| 82 | end = clock(); |
| 83 | total = (double)(end-start)/(double)CLOCKS_PER_SEC; |
| 84 | printf("sse time: %f\n", total);
|
| 85 | |
| 86 | start = clock(); |
| 87 | for(int k = 0; k < NUM_ITERS; ++k) { |
| 88 | get_volk_runtime()->volk_32f_index_max_aligned16(target_sse4_1, src0, vlen); |
| 89 | } |
| 90 | |
| 91 | end = clock(); |
| 92 | total = (double)(end-start)/(double)CLOCKS_PER_SEC; |
| 93 | printf("sse4.1 time: %f\n", total);
|
| 94 | |
| 95 | |
| 96 | printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], target_sse[0], target_sse4_1[0]); |
| 97 | CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]); |
| 98 | CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]); |
| 99 | |
| 100 | free(src0); |
| 101 | } |
| 102 | |
| 103 | #endif /*LV_HAVE_SSE3*/ |