root / gnuradio-core / src / lib / filter / gr_fir_ccc_simd.cc @ 3d8074ac
History | View | Annotate | Download (4.2 kB)
| 1 | 5d69a524 | jcorgan | /* -*- c++ -*- */
|
|---|---|---|---|
| 2 | 5d69a524 | jcorgan | /*
|
| 3 | 97ee4acf | eb | * Copyright 2002,2007 Free Software Foundation, Inc. |
| 4 | f919f9dc | Tom Rondeau | * |
| 5 | 5d69a524 | jcorgan | * This file is part of GNU Radio |
| 6 | f919f9dc | Tom Rondeau | * |
| 7 | 5d69a524 | jcorgan | * GNU Radio is free software; you can redistribute it and/or modify |
| 8 | 5d69a524 | jcorgan | * it under the terms of the GNU General Public License as published by |
| 9 | 937b719d | eb | * the Free Software Foundation; either version 3, or (at your option) |
| 10 | 5d69a524 | jcorgan | * any later version. |
| 11 | f919f9dc | Tom Rondeau | * |
| 12 | 5d69a524 | jcorgan | * GNU Radio is distributed in the hope that it will be useful, |
| 13 | 5d69a524 | jcorgan | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | 5d69a524 | jcorgan | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | 5d69a524 | jcorgan | * GNU General Public License for more details. |
| 16 | f919f9dc | Tom Rondeau | * |
| 17 | 5d69a524 | jcorgan | * You should have received a copy of the GNU General Public License |
| 18 | 5d69a524 | jcorgan | * along with GNU Radio; see the file COPYING. If not, write to |
| 19 | 86f5c924 | eb | * the Free Software Foundation, Inc., 51 Franklin Street, |
| 20 | 86f5c924 | eb | * Boston, MA 02110-1301, USA. |
| 21 | 5d69a524 | jcorgan | */ |
| 22 | 5d69a524 | jcorgan | |
| 23 | 5d69a524 | jcorgan | #ifdef HAVE_CONFIG_H
|
| 24 | 5d69a524 | jcorgan | #include <config.h> |
| 25 | 5d69a524 | jcorgan | #endif
|
| 26 | 5d69a524 | jcorgan | #include <gr_fir_ccc_simd.h> |
| 27 | 5d69a524 | jcorgan | |
| 28 | 5d69a524 | jcorgan | #include <assert.h> |
| 29 | 5d69a524 | jcorgan | #include <malloc16.h> |
| 30 | 5d69a524 | jcorgan | #include <iostream> |
| 31 | 97ee4acf | eb | #include <stdexcept> |
| 32 | 5d69a524 | jcorgan | |
| 33 | 5d69a524 | jcorgan | using std::cerr;
|
| 34 | 5d69a524 | jcorgan | using std::endl;
|
| 35 | 5d69a524 | jcorgan | |
| 36 | 5d69a524 | jcorgan | gr_fir_ccc_simd::gr_fir_ccc_simd () |
| 37 | 5d69a524 | jcorgan | : gr_fir_ccc_generic () |
| 38 | 5d69a524 | jcorgan | {
|
| 39 | 5d69a524 | jcorgan | // cerr << "@@@ gr_fir_ccc_simd\n";
|
| 40 | 5d69a524 | jcorgan | |
| 41 | 5d69a524 | jcorgan | d_ccomplex_dotprod = 0;
|
| 42 | f919f9dc | Tom Rondeau | |
| 43 | 5d69a524 | jcorgan | d_aligned_taps[0] = 0; |
| 44 | 5d69a524 | jcorgan | d_aligned_taps[1] = 0; |
| 45 | 5d69a524 | jcorgan | d_aligned_taps[2] = 0; |
| 46 | 5d69a524 | jcorgan | d_aligned_taps[3] = 0; |
| 47 | 5d69a524 | jcorgan | } |
| 48 | 5d69a524 | jcorgan | |
| 49 | 5d69a524 | jcorgan | gr_fir_ccc_simd::gr_fir_ccc_simd (const std::vector<gr_complex> &new_taps)
|
| 50 | 5d69a524 | jcorgan | : gr_fir_ccc_generic (new_taps) |
| 51 | 5d69a524 | jcorgan | {
|
| 52 | 5d69a524 | jcorgan | // cerr << "@@@ gr_fir_ccc_simd\n";
|
| 53 | 5d69a524 | jcorgan | |
| 54 | 5d69a524 | jcorgan | d_ccomplex_dotprod = 0;
|
| 55 | f919f9dc | Tom Rondeau | |
| 56 | 5d69a524 | jcorgan | d_aligned_taps[0] = 0; |
| 57 | 5d69a524 | jcorgan | d_aligned_taps[1] = 0; |
| 58 | 5d69a524 | jcorgan | d_aligned_taps[2] = 0; |
| 59 | 5d69a524 | jcorgan | d_aligned_taps[3] = 0; |
| 60 | 5d69a524 | jcorgan | set_taps (new_taps); |
| 61 | 5d69a524 | jcorgan | } |
| 62 | 5d69a524 | jcorgan | |
| 63 | 5d69a524 | jcorgan | gr_fir_ccc_simd::~gr_fir_ccc_simd () |
| 64 | 5d69a524 | jcorgan | {
|
| 65 | 5d69a524 | jcorgan | free16Align (d_aligned_taps[0]);
|
| 66 | 5d69a524 | jcorgan | free16Align (d_aligned_taps[1]);
|
| 67 | 5d69a524 | jcorgan | free16Align (d_aligned_taps[2]);
|
| 68 | 5d69a524 | jcorgan | free16Align (d_aligned_taps[3]);
|
| 69 | 5d69a524 | jcorgan | } |
| 70 | 5d69a524 | jcorgan | |
| 71 | 5d69a524 | jcorgan | void
|
| 72 | 5d69a524 | jcorgan | gr_fir_ccc_simd::set_taps (const std::vector<gr_complex> &inew_taps)
|
| 73 | 5d69a524 | jcorgan | {
|
| 74 | 5d69a524 | jcorgan | gr_fir_ccc::set_taps (inew_taps); // call superclass
|
| 75 | 5d69a524 | jcorgan | |
| 76 | 5d69a524 | jcorgan | const std::vector<gr_complex> new_taps = gr_reverse(inew_taps);
|
| 77 | 5d69a524 | jcorgan | unsigned len = new_taps.size ();
|
| 78 | 5d69a524 | jcorgan | |
| 79 | 5d69a524 | jcorgan | // Make 4 copies of the coefficients, one for each data alignment
|
| 80 | 5d69a524 | jcorgan | // Note use of special 16-byte-aligned version of calloc()
|
| 81 | f919f9dc | Tom Rondeau | |
| 82 | 5d69a524 | jcorgan | for (unsigned i = 0; i < 4; i++){ |
| 83 | 5d69a524 | jcorgan | free16Align (d_aligned_taps[i]); // free old value
|
| 84 | 5d69a524 | jcorgan | |
| 85 | 5d69a524 | jcorgan | // this works because the bit representation of a IEEE floating point
|
| 86 | 5d69a524 | jcorgan | // +zero is all zeros. If you're using a different representation,
|
| 87 | 5d69a524 | jcorgan | // you'll need to explictly set the result to the appropriate 0.0 value.
|
| 88 | f919f9dc | Tom Rondeau | |
| 89 | 5d69a524 | jcorgan | d_aligned_taps[i] = (float *) calloc16Align (1 + (len + i - 1) / 2, |
| 90 | 5d69a524 | jcorgan | 2 * 4 * sizeof (float)); |
| 91 | 5d69a524 | jcorgan | if (d_aligned_taps[i] == 0){ |
| 92 | 5d69a524 | jcorgan | // throw something...
|
| 93 | 5d69a524 | jcorgan | cerr << "@@@ gr_fir_ccc_simd d_aligned_taps[" << i << "] == 0\n"; |
| 94 | 5d69a524 | jcorgan | } |
| 95 | 5d69a524 | jcorgan | |
| 96 | 5d69a524 | jcorgan | for (unsigned j = 0; j < len; j++) { |
| 97 | 5d69a524 | jcorgan | d_aligned_taps[i][2*(j+i)] = new_taps[j].real();
|
| 98 | 5d69a524 | jcorgan | d_aligned_taps[i][2*(j+i)+1] = new_taps[j].imag(); |
| 99 | 5d69a524 | jcorgan | } |
| 100 | 5d69a524 | jcorgan | } |
| 101 | 5d69a524 | jcorgan | } |
| 102 | 5d69a524 | jcorgan | |
| 103 | f919f9dc | Tom Rondeau | gr_complex |
| 104 | 5d69a524 | jcorgan | gr_fir_ccc_simd::filter (const gr_complex input[])
|
| 105 | 5d69a524 | jcorgan | {
|
| 106 | 5d69a524 | jcorgan | if (ntaps () == 0) |
| 107 | 5d69a524 | jcorgan | return 0.0; |
| 108 | 5d69a524 | jcorgan | |
| 109 | 97ee4acf | eb | if (((intptr_t) input & 0x7) != 0) |
| 110 | 97ee4acf | eb | throw std::invalid_argument("gr_complex must be 8-byte aligned"); |
| 111 | 5d69a524 | jcorgan | |
| 112 | 5d69a524 | jcorgan | // Round input data address down to 16 byte boundary
|
| 113 | 5d69a524 | jcorgan | // NB: depending on the alignment of input[], memory
|
| 114 | f919f9dc | Tom Rondeau | // before input[] will be accessed. The contents don't matter since
|
| 115 | 5d69a524 | jcorgan | // they'll be multiplied by zero coefficients. I can't conceive of any
|
| 116 | 5d69a524 | jcorgan | // situation where this could cause a segfault since memory protection
|
| 117 | 5d69a524 | jcorgan | // in the x86 machines is done on much larger boundaries.
|
| 118 | f919f9dc | Tom Rondeau | |
| 119 | 5d69a524 | jcorgan | const gr_complex *ar = (gr_complex *)((unsigned long) input & ~15); |
| 120 | 5d69a524 | jcorgan | |
| 121 | 5d69a524 | jcorgan | // Choose one of 4 sets of pre-shifted coefficients. al is both the
|
| 122 | 5d69a524 | jcorgan | // index into d_aligned_taps[] and the number of 0 words padded onto
|
| 123 | 5d69a524 | jcorgan | // that coefficients array for alignment purposes.
|
| 124 | 5d69a524 | jcorgan | |
| 125 | 5d69a524 | jcorgan | unsigned al = input - ar;
|
| 126 | 5d69a524 | jcorgan | |
| 127 | 5d69a524 | jcorgan | // call assembler routine to do the work, passing number of 2x4-float blocks.
|
| 128 | 5d69a524 | jcorgan | |
| 129 | 5d69a524 | jcorgan | // assert (((unsigned long) ar & 15) == 0);
|
| 130 | 5d69a524 | jcorgan | // assert (((unsigned long) d_aligned_taps[al] & 15) == 0);
|
| 131 | 5d69a524 | jcorgan | |
| 132 | 5d69a524 | jcorgan | // cerr << "ar: " << ar << " d_aligned_taps[ar]: " << d_aligned_taps[al]
|
| 133 | 5d69a524 | jcorgan | // << " (ntaps() + al - 1)/2 + 1: " << (ntaps() + al -1) / 2 + 1 << endl;
|
| 134 | 5d69a524 | jcorgan | |
| 135 | 5d69a524 | jcorgan | float result[2]; |
| 136 | 5d69a524 | jcorgan | |
| 137 | 5d69a524 | jcorgan | d_ccomplex_dotprod ((float*)ar, d_aligned_taps[al], (ntaps() + al - 1) / 2 + 1, result); |
| 138 | 5d69a524 | jcorgan | |
| 139 | 5d69a524 | jcorgan | // cerr << "result = " << result[0] << " " << result[1] << endl;
|
| 140 | 5d69a524 | jcorgan | |
| 141 | 5d69a524 | jcorgan | return gr_complex(result[0], result[1]); |
| 142 | 5d69a524 | jcorgan | } |