summaryrefslogtreecommitdiff
path: root/volk/lib
diff options
context:
space:
mode:
authorNick Foster <nick@nerdnetworks.org>2010-12-17 11:14:41 -0800
committerNick Foster <nick@nerdnetworks.org>2010-12-17 11:14:41 -0800
commitc6fff77de9b686761f93f0e1de237f8543f5e919 (patch)
tree91e8927ba4a30db3d6b93d6b14489b733b23eecb /volk/lib
parent15ad4b5398e474bfb52fdb7e826b69f3e398c0b0 (diff)
Volk: A bunch of new Orc routines plus a couple of build changes.
32fc_magnitude_16s fails test_all right now.
Diffstat (limited to 'volk/lib')
-rw-r--r--volk/lib/qa_16sc_deinterleave_16s_aligned16.cc12
-rw-r--r--volk/lib/qa_16sc_deinterleave_32f_aligned16.cc11
-rw-r--r--volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc9
-rw-r--r--volk/lib/qa_16sc_magnitude_16s_aligned16.cc5
-rw-r--r--volk/lib/qa_16sc_magnitude_32f_aligned16.cc6
-rw-r--r--volk/lib/qa_32f_max_aligned16.cc9
-rw-r--r--volk/lib/qa_32f_min_aligned16.cc9
-rw-r--r--volk/lib/qa_32fc_magnitude_16s_aligned16.cc8
-rw-r--r--volk/lib/qa_volk.cc1
9 files changed, 60 insertions, 10 deletions
diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
index e700ac72ce..7e9e31df57 100644
--- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
@@ -26,6 +26,8 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
int16_t output_generic1[vlen] __attribute__ ((aligned (16)));
int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
int16_t output_sse21[vlen] __attribute__ ((aligned (16)));
+ int16_t output_orc[vlen] __attribute__ ((aligned (16)));
+ int16_t output_orc1[vlen] __attribute__ ((aligned (16)));
int16_t output_ssse3[vlen] __attribute__ ((aligned (16)));
int16_t output_ssse31[vlen] __attribute__ ((aligned (16)));
@@ -44,6 +46,13 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_16sc_deinterleave_16s_aligned16_manual(output_orc, output_orc1, input0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2");
}
end = clock();
@@ -70,6 +79,9 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_ssse31[i]);
+
+ CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]);
+ CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_orc1[i]);
}
}
diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
index 6ee0769983..45100206d9 100644
--- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
@@ -26,6 +26,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
float output_generic1[vlen] __attribute__ ((aligned (16)));
float output_sse2[vlen] __attribute__ ((aligned (16)));
float output_sse21[vlen] __attribute__ ((aligned (16)));
+ float output_orc[vlen] __attribute__ ((aligned (16)));
+ float output_orc1[vlen] __attribute__ ((aligned (16)));
int16_t* loadInput = (int16_t*)input0;
for(int i = 0; i < vlen*2; ++i) {
@@ -42,6 +44,13 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_16sc_deinterleave_32f_aligned16_manual(output_orc, output_orc1, input0, 32768.0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse");
}
end = clock();
@@ -57,6 +66,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_orc1[i], fabs(output_generic1[i])*1e-4);
}
}
diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
index 5ab458bc90..d187d20c35 100644
--- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
@@ -24,6 +24,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
int8_t output_generic[vlen] __attribute__ ((aligned (16)));
int8_t output_ssse3[vlen] __attribute__ ((aligned (16)));
+ int8_t output_orc[vlen] __attribute__ ((aligned (16)));
int16_t* loadInput = (int16_t*)input0;
for(int i = 0; i < vlen*2; ++i) {
@@ -40,6 +41,13 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_16sc_deinterleave_real_8s_aligned16_manual(output_orc, input0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
}
end = clock();
@@ -54,6 +62,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
+ CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]);
}
}
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
index c8f13ff84a..dd4ae75ff2 100644
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
@@ -40,13 +40,14 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
- start = clock();
+/* start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("orc_time: %f\n", total);
+*/
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse");
@@ -72,7 +73,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
- CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
+ //CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
}
}
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
index e7178863c9..53d42e28ce 100644
--- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
@@ -90,14 +90,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
- start = clock();
+/* start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("orc_time: %f\n", total);
-
+*/
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
@@ -123,7 +123,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
- CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
+// CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
}
}
diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc
index 3ef3751768..cb1fd3627a 100644
--- a/volk/lib/qa_32f_max_aligned16.cc
+++ b/volk/lib/qa_32f_max_aligned16.cc
@@ -25,6 +25,7 @@ void qa_32f_max_aligned16::t1() {
float output0[vlen] __attribute__ ((aligned (16)));
float output01[vlen] __attribute__ ((aligned (16)));
+ float output02[vlen] __attribute__ ((aligned (16)));
for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -41,6 +42,13 @@ void qa_32f_max_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32f_max_aligned16_manual(output02, input0, input1, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse");
}
end = clock();
@@ -54,6 +62,7 @@ void qa_32f_max_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+ CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}
diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc
index 617e18b249..bf453f3606 100644
--- a/volk/lib/qa_32f_min_aligned16.cc
+++ b/volk/lib/qa_32f_min_aligned16.cc
@@ -25,6 +25,7 @@ void qa_32f_min_aligned16::t1() {
float output0[vlen] __attribute__ ((aligned (16)));
float output01[vlen] __attribute__ ((aligned (16)));
+ float output02[vlen] __attribute__ ((aligned (16)));
for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -41,6 +42,13 @@ void qa_32f_min_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32f_min_aligned16_manual(output02, input0, input1, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse");
}
end = clock();
@@ -54,6 +62,7 @@ void qa_32f_min_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+ CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
index c3e65866bc..105d32d0c6 100644
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
@@ -63,10 +63,10 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3_time: %f\n", total);
- for(int i = 0; i < 1; ++i) {
- //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
- //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
- }
+ //for(int i = 0; i < 10; ++i) {
+ // printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
+ // printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
+ //}
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc
index c3c27b69b1..f6a334da7c 100644
--- a/volk/lib/qa_volk.cc
+++ b/volk/lib/qa_volk.cc
@@ -118,7 +118,6 @@ CppUnit::TestSuite *
qa_volk::suite()
{
CppUnit::TestSuite *s = new CppUnit::TestSuite("volk");
-
s->addTest(qa_16s_quad_max_star_aligned16::suite());
s->addTest(qa_32fc_dot_prod_aligned16::suite());
s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite());