Statistics
| Branch: | Tag: | Revision:

root / gnuradio-core / src / lib / filter / gr_fir_ccc_simd.cc @ 3d8074ac

History | View | Annotate | Download (4.2 kB)

1 5d69a524 jcorgan
/* -*- c++ -*- */
2 5d69a524 jcorgan
/*
3 97ee4acf eb
 * Copyright 2002,2007 Free Software Foundation, Inc.
4 f919f9dc Tom Rondeau
 *
5 5d69a524 jcorgan
 * This file is part of GNU Radio
6 f919f9dc Tom Rondeau
 *
7 5d69a524 jcorgan
 * GNU Radio is free software; you can redistribute it and/or modify
8 5d69a524 jcorgan
 * it under the terms of the GNU General Public License as published by
9 937b719d eb
 * the Free Software Foundation; either version 3, or (at your option)
10 5d69a524 jcorgan
 * any later version.
11 f919f9dc Tom Rondeau
 *
12 5d69a524 jcorgan
 * GNU Radio is distributed in the hope that it will be useful,
13 5d69a524 jcorgan
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 5d69a524 jcorgan
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 5d69a524 jcorgan
 * GNU General Public License for more details.
16 f919f9dc Tom Rondeau
 *
17 5d69a524 jcorgan
 * You should have received a copy of the GNU General Public License
18 5d69a524 jcorgan
 * along with GNU Radio; see the file COPYING.  If not, write to
19 86f5c924 eb
 * the Free Software Foundation, Inc., 51 Franklin Street,
20 86f5c924 eb
 * Boston, MA 02110-1301, USA.
21 5d69a524 jcorgan
 */
22 5d69a524 jcorgan
23 5d69a524 jcorgan
#ifdef HAVE_CONFIG_H
24 5d69a524 jcorgan
#include <config.h>
25 5d69a524 jcorgan
#endif
26 5d69a524 jcorgan
#include <gr_fir_ccc_simd.h>
27 5d69a524 jcorgan
28 5d69a524 jcorgan
#include <assert.h>
29 5d69a524 jcorgan
#include <malloc16.h>
30 5d69a524 jcorgan
#include <iostream>
31 97ee4acf eb
#include <stdexcept>
32 5d69a524 jcorgan
33 5d69a524 jcorgan
using std::cerr;
34 5d69a524 jcorgan
using std::endl;
35 5d69a524 jcorgan
36 5d69a524 jcorgan
gr_fir_ccc_simd::gr_fir_ccc_simd ()
37 5d69a524 jcorgan
  : gr_fir_ccc_generic ()
38 5d69a524 jcorgan
{
39 5d69a524 jcorgan
  // cerr << "@@@ gr_fir_ccc_simd\n";
40 5d69a524 jcorgan
41 5d69a524 jcorgan
  d_ccomplex_dotprod = 0;
42 f919f9dc Tom Rondeau
43 5d69a524 jcorgan
  d_aligned_taps[0] = 0;
44 5d69a524 jcorgan
  d_aligned_taps[1] = 0;
45 5d69a524 jcorgan
  d_aligned_taps[2] = 0;
46 5d69a524 jcorgan
  d_aligned_taps[3] = 0;
47 5d69a524 jcorgan
}
48 5d69a524 jcorgan
49 5d69a524 jcorgan
gr_fir_ccc_simd::gr_fir_ccc_simd (const std::vector<gr_complex> &new_taps)
50 5d69a524 jcorgan
  : gr_fir_ccc_generic (new_taps)
51 5d69a524 jcorgan
{
52 5d69a524 jcorgan
  // cerr << "@@@ gr_fir_ccc_simd\n";
53 5d69a524 jcorgan
54 5d69a524 jcorgan
  d_ccomplex_dotprod = 0;
55 f919f9dc Tom Rondeau
56 5d69a524 jcorgan
  d_aligned_taps[0] = 0;
57 5d69a524 jcorgan
  d_aligned_taps[1] = 0;
58 5d69a524 jcorgan
  d_aligned_taps[2] = 0;
59 5d69a524 jcorgan
  d_aligned_taps[3] = 0;
60 5d69a524 jcorgan
  set_taps (new_taps);
61 5d69a524 jcorgan
}
62 5d69a524 jcorgan
63 5d69a524 jcorgan
gr_fir_ccc_simd::~gr_fir_ccc_simd ()
64 5d69a524 jcorgan
{
65 5d69a524 jcorgan
  free16Align (d_aligned_taps[0]);
66 5d69a524 jcorgan
  free16Align (d_aligned_taps[1]);
67 5d69a524 jcorgan
  free16Align (d_aligned_taps[2]);
68 5d69a524 jcorgan
  free16Align (d_aligned_taps[3]);
69 5d69a524 jcorgan
}
70 5d69a524 jcorgan
71 5d69a524 jcorgan
void
72 5d69a524 jcorgan
gr_fir_ccc_simd::set_taps (const std::vector<gr_complex> &inew_taps)
73 5d69a524 jcorgan
{
74 5d69a524 jcorgan
  gr_fir_ccc::set_taps (inew_taps);        // call superclass
75 5d69a524 jcorgan
76 5d69a524 jcorgan
  const std::vector<gr_complex> new_taps = gr_reverse(inew_taps);
77 5d69a524 jcorgan
  unsigned len = new_taps.size ();
78 5d69a524 jcorgan
79 5d69a524 jcorgan
  // Make 4 copies of the coefficients, one for each data alignment
80 5d69a524 jcorgan
  // Note use of special 16-byte-aligned version of calloc()
81 f919f9dc Tom Rondeau
82 5d69a524 jcorgan
  for (unsigned i = 0; i < 4; i++){
83 5d69a524 jcorgan
    free16Align (d_aligned_taps[i]);        // free old value
84 5d69a524 jcorgan
85 5d69a524 jcorgan
    // this works because the bit representation of a IEEE floating point
86 5d69a524 jcorgan
    // +zero is all zeros.  If you're using a different representation,
87 5d69a524 jcorgan
    // you'll need to explictly set the result to the appropriate 0.0 value.
88 f919f9dc Tom Rondeau
89 5d69a524 jcorgan
    d_aligned_taps[i] = (float *) calloc16Align (1 + (len + i - 1) / 2,
90 5d69a524 jcorgan
                                               2 * 4 * sizeof (float));
91 5d69a524 jcorgan
    if (d_aligned_taps[i] == 0){
92 5d69a524 jcorgan
      // throw something...
93 5d69a524 jcorgan
      cerr << "@@@ gr_fir_ccc_simd d_aligned_taps[" << i << "] == 0\n";
94 5d69a524 jcorgan
    }
95 5d69a524 jcorgan
96 5d69a524 jcorgan
    for (unsigned j = 0; j < len; j++) {
97 5d69a524 jcorgan
      d_aligned_taps[i][2*(j+i)] = new_taps[j].real();
98 5d69a524 jcorgan
      d_aligned_taps[i][2*(j+i)+1] = new_taps[j].imag();
99 5d69a524 jcorgan
    }
100 5d69a524 jcorgan
  }
101 5d69a524 jcorgan
}
102 5d69a524 jcorgan
103 f919f9dc Tom Rondeau
gr_complex
104 5d69a524 jcorgan
gr_fir_ccc_simd::filter (const gr_complex input[])
105 5d69a524 jcorgan
{
106 5d69a524 jcorgan
  if (ntaps () == 0)
107 5d69a524 jcorgan
    return 0.0;
108 5d69a524 jcorgan
109 97ee4acf eb
  if (((intptr_t) input & 0x7) != 0)
110 97ee4acf eb
    throw std::invalid_argument("gr_complex must be 8-byte aligned");
111 5d69a524 jcorgan
112 5d69a524 jcorgan
  // Round input data address down to 16 byte boundary
113 5d69a524 jcorgan
  // NB: depending on the alignment of input[], memory
114 f919f9dc Tom Rondeau
  // before input[] will be accessed. The contents don't matter since
115 5d69a524 jcorgan
  // they'll be multiplied by zero coefficients. I can't conceive of any
116 5d69a524 jcorgan
  // situation where this could cause a segfault since memory protection
117 5d69a524 jcorgan
  // in the x86 machines is done on much larger boundaries.
118 f919f9dc Tom Rondeau
119 5d69a524 jcorgan
  const gr_complex *ar = (gr_complex *)((unsigned long) input & ~15);
120 5d69a524 jcorgan
121 5d69a524 jcorgan
  // Choose one of 4 sets of pre-shifted coefficients. al is both the
122 5d69a524 jcorgan
  // index into d_aligned_taps[] and the number of 0 words padded onto
123 5d69a524 jcorgan
  // that coefficients array for alignment purposes.
124 5d69a524 jcorgan
125 5d69a524 jcorgan
  unsigned al = input - ar;
126 5d69a524 jcorgan
127 5d69a524 jcorgan
  // call assembler routine to do the work, passing number of 2x4-float blocks.
128 5d69a524 jcorgan
129 5d69a524 jcorgan
  // assert (((unsigned long) ar & 15) == 0);
130 5d69a524 jcorgan
  // assert (((unsigned long) d_aligned_taps[al] & 15) == 0);
131 5d69a524 jcorgan
132 5d69a524 jcorgan
  // cerr << "ar: " << ar << " d_aligned_taps[ar]: " << d_aligned_taps[al]
133 5d69a524 jcorgan
  //  << " (ntaps() + al - 1)/2 + 1: " << (ntaps() + al -1) / 2 + 1 << endl;
134 5d69a524 jcorgan
135 5d69a524 jcorgan
  float result[2];
136 5d69a524 jcorgan
137 5d69a524 jcorgan
  d_ccomplex_dotprod ((float*)ar, d_aligned_taps[al], (ntaps() + al - 1) / 2 + 1, result);
138 5d69a524 jcorgan
139 5d69a524 jcorgan
  // cerr << "result = " << result[0] << " " << result[1] << endl;
140 5d69a524 jcorgan
141 5d69a524 jcorgan
  return gr_complex(result[0], result[1]);
142 5d69a524 jcorgan
}