root / volk / lib / qa_32fc_index_max_aligned16.cc @ 69210086
History | View | Annotate | Download (1.7 kB)
| 1 | 23914465 | Tom Rondeau | #include <volk/volk.h> |
|---|---|---|---|
| 2 | 23914465 | Tom Rondeau | #include <qa_32fc_index_max_aligned16.h> |
| 3 | 23914465 | Tom Rondeau | #include <stdio.h> |
| 4 | 23914465 | Tom Rondeau | #include <stdlib.h> |
| 5 | 23914465 | Tom Rondeau | #include <time.h> |
| 6 | 23914465 | Tom Rondeau | |
| 7 | 23914465 | Tom Rondeau | #define ERR_DELTA (1e-4) |
| 8 | 23914465 | Tom Rondeau | #define NUM_ITERS 1000000 |
| 9 | 23914465 | Tom Rondeau | #define VEC_LEN 3096 |
| 10 | 23914465 | Tom Rondeau | static float uniform() { |
| 11 | 23914465 | Tom Rondeau | return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) |
| 12 | 23914465 | Tom Rondeau | } |
| 13 | 23914465 | Tom Rondeau | |
| 14 | 23914465 | Tom Rondeau | static void |
| 15 | 23914465 | Tom Rondeau | random_floats (float *buf, unsigned n) |
| 16 | 23914465 | Tom Rondeau | {
|
| 17 | 23914465 | Tom Rondeau | unsigned int i = 0; |
| 18 | 23914465 | Tom Rondeau | for (; i < n; i++) {
|
| 19 | 23914465 | Tom Rondeau | |
| 20 | 23914465 | Tom Rondeau | buf[i] = uniform () * 32767;
|
| 21 | 23914465 | Tom Rondeau | |
| 22 | 23914465 | Tom Rondeau | } |
| 23 | 23914465 | Tom Rondeau | } |
| 24 | 23914465 | Tom Rondeau | |
| 25 | 23914465 | Tom Rondeau | |
| 26 | 23914465 | Tom Rondeau | #ifndef LV_HAVE_SSE3
|
| 27 | 23914465 | Tom Rondeau | |
| 28 | 23914465 | Tom Rondeau | void qa_32fc_index_max_aligned16::t1(){
|
| 29 | 23914465 | Tom Rondeau | printf("sse3 not available... no test performed\n");
|
| 30 | 23914465 | Tom Rondeau | } |
| 31 | 23914465 | Tom Rondeau | |
| 32 | 23914465 | Tom Rondeau | #else
|
| 33 | 23914465 | Tom Rondeau | |
| 34 | 23914465 | Tom Rondeau | |
| 35 | 23914465 | Tom Rondeau | void qa_32fc_index_max_aligned16::t1(){
|
| 36 | 23914465 | Tom Rondeau | |
| 37 | 23914465 | Tom Rondeau | const int vlen = VEC_LEN; |
| 38 | 23914465 | Tom Rondeau | |
| 39 | 23914465 | Tom Rondeau | volk_environment_init(); |
| 40 | 23914465 | Tom Rondeau | int ret;
|
| 41 | 23914465 | Tom Rondeau | |
| 42 | 23914465 | Tom Rondeau | unsigned int* target; |
| 43 | 23914465 | Tom Rondeau | unsigned int* target_generic; |
| 44 | 23914465 | Tom Rondeau | std::complex<float>* src0 ;
|
| 45 | 23914465 | Tom Rondeau | |
| 46 | 23914465 | Tom Rondeau | |
| 47 | 23914465 | Tom Rondeau | unsigned int i_target; |
| 48 | 23914465 | Tom Rondeau | target = &i_target; |
| 49 | 23914465 | Tom Rondeau | unsigned int i_target_generic; |
| 50 | 23914465 | Tom Rondeau | target_generic = &i_target_generic; |
| 51 | 23914465 | Tom Rondeau | ret = posix_memalign((void**)&src0, 16, vlen << 3); |
| 52 | 23914465 | Tom Rondeau | |
| 53 | 23914465 | Tom Rondeau | random_floats((float*)src0, vlen * 2); |
| 54 | 23914465 | Tom Rondeau | |
| 55 | 23914465 | Tom Rondeau | printf("32fc_index_max_aligned16\n");
|
| 56 | 23914465 | Tom Rondeau | |
| 57 | 23914465 | Tom Rondeau | clock_t start, end; |
| 58 | 23914465 | Tom Rondeau | double total;
|
| 59 | 23914465 | Tom Rondeau | |
| 60 | 23914465 | Tom Rondeau | |
| 61 | 23914465 | Tom Rondeau | start = clock(); |
| 62 | 23914465 | Tom Rondeau | for(int k = 0; k < NUM_ITERS; ++k) { |
| 63 | 23914465 | Tom Rondeau | volk_32fc_index_max_aligned16_manual(target_generic, src0, vlen << 3, "generic"); |
| 64 | 23914465 | Tom Rondeau | } |
| 65 | 23914465 | Tom Rondeau | end = clock(); |
| 66 | 23914465 | Tom Rondeau | total = (double)(end-start)/(double)CLOCKS_PER_SEC; |
| 67 | 23914465 | Tom Rondeau | printf("generic time: %f\n", total);
|
| 68 | 23914465 | Tom Rondeau | |
| 69 | 23914465 | Tom Rondeau | start = clock(); |
| 70 | 23914465 | Tom Rondeau | for(int k = 0; k < NUM_ITERS; ++k) { |
| 71 | 23914465 | Tom Rondeau | volk_32fc_index_max_aligned16_manual(target, src0, vlen << 3, "sse3"); |
| 72 | 23914465 | Tom Rondeau | } |
| 73 | 23914465 | Tom Rondeau | |
| 74 | 23914465 | Tom Rondeau | end = clock(); |
| 75 | 23914465 | Tom Rondeau | total = (double)(end-start)/(double)CLOCKS_PER_SEC; |
| 76 | 23914465 | Tom Rondeau | printf("sse3 time: %f\n", total);
|
| 77 | 23914465 | Tom Rondeau | |
| 78 | 23914465 | Tom Rondeau | |
| 79 | 23914465 | Tom Rondeau | |
| 80 | 23914465 | Tom Rondeau | |
| 81 | 23914465 | Tom Rondeau | printf("generic: %u, sse3: %u\n", target_generic[0], target[0]); |
| 82 | 23914465 | Tom Rondeau | CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], 1.1); |
| 83 | 23914465 | Tom Rondeau | |
| 84 | 23914465 | Tom Rondeau | |
| 85 | 23914465 | Tom Rondeau | |
| 86 | 23914465 | Tom Rondeau | free(src0); |
| 87 | 23914465 | Tom Rondeau | } |
| 88 | 23914465 | Tom Rondeau | |
| 89 | 23914465 | Tom Rondeau | #endif /*LV_HAVE_SSE3*/ |