Statistics
| Branch: | Tag: | Revision:

root / gnuradio-core / src / lib / filter / gr_fir_fff_simd.cc @ 369834c8

History | View | Annotate | Download (3.9 kB)

1
/* -*- c++ -*- */
2
/*
3
 * Copyright 2002 Free Software Foundation, Inc.
4
 * 
5
 * This file is part of GNU Radio
6
 * 
7
 * GNU Radio is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 3, or (at your option)
10
 * any later version.
11
 * 
12
 * GNU Radio is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 * 
17
 * You should have received a copy of the GNU General Public License
18
 * along with GNU Radio; see the file COPYING.  If not, write to
19
 * the Free Software Foundation, Inc., 51 Franklin Street,
20
 * Boston, MA 02110-1301, USA.
21
 */
22
23
#ifdef HAVE_CONFIG_H
24
#include <config.h>
25
#endif
26
#include <gr_fir_fff_simd.h>
27
28
#include <assert.h>
29
#include <malloc16.h>
30
#include <iostream>
31
32
using std::cerr;
33
34
gr_fir_fff_simd::gr_fir_fff_simd ()
35
  : gr_fir_fff_generic ()
36
{
37
  // cerr << "@@@ gr_fir_fff_simd\n";
38
39
  d_float_dotprod = 0;
40
  
41
  d_aligned_taps[0] = 0;
42
  d_aligned_taps[1] = 0;
43
  d_aligned_taps[2] = 0;
44
  d_aligned_taps[3] = 0;
45
}
46
47
gr_fir_fff_simd::gr_fir_fff_simd (const std::vector<float> &new_taps)
48
  : gr_fir_fff_generic (new_taps)
49
{
50
  // cerr << "@@@ gr_fir_fff_simd\n";
51
52
  d_float_dotprod = 0;
53
  
54
  d_aligned_taps[0] = 0;
55
  d_aligned_taps[1] = 0;
56
  d_aligned_taps[2] = 0;
57
  d_aligned_taps[3] = 0;
58
  set_taps (new_taps);
59
}
60
61
gr_fir_fff_simd::~gr_fir_fff_simd ()
62
{
63
  free16Align (d_aligned_taps[0]);
64
  free16Align (d_aligned_taps[1]);
65
  free16Align (d_aligned_taps[2]);
66
  free16Align (d_aligned_taps[3]);
67
}
68
69
void
70
gr_fir_fff_simd::set_taps (const std::vector<float> &inew_taps)
71
{
72
  gr_fir_fff::set_taps (inew_taps);        // call superclass
73
  const std::vector<float> new_taps = gr_reverse(inew_taps);
74
75
  unsigned len = new_taps.size ();
76
77
  // Make 4 copies of the coefficients, one for each data alignment
78
  // Note use of special 16-byte-aligned version of calloc()
79
  
80
  for (unsigned i = 0; i < 4; i++){
81
    free16Align (d_aligned_taps[i]);        // free old value
82
83
    // this works because the bit representation of a IEEE floating point
84
    // +zero is all zeros.  If you're using a different representation,
85
    // you'll need to explictly set the result to the appropriate 0.0 value.
86
    
87
    d_aligned_taps[i] = (float *) calloc16Align (1 + (len + i - 1) / 4,
88
                                               4 * sizeof (float));
89
    if (d_aligned_taps[i] == 0){
90
      // throw something...
91
      cerr << "@@@ gr_fir_fff_simd d_aligned_taps[" << i << "] == 0\n";
92
    }
93
94
    for (unsigned j = 0; j < len; j++)
95
      d_aligned_taps[i][j+i] = new_taps[j];
96
  }
97
}
98
99
float 
100
gr_fir_fff_simd::filter (const float input[])
101
{
102
  if (ntaps () == 0)
103
    return 0.0;
104
105
106
  // Round input data address down to 16 byte boundary
107
  // NB: depending on the alignment of input[], memory
108
  // before input[] will be accessed. The contents don't matter since 
109
  // they'll be multiplied by zero coefficients. I can't conceive of any
110
  // situation where this could cause a segfault since memory protection
111
  // in the x86 machines is done on much larger boundaries.
112
  
113
  const float *ar = (float *)((unsigned long) input & ~15);
114
115
  // Choose one of 4 sets of pre-shifted coefficients. al is both the
116
  // index into d_aligned_taps[] and the number of 0 words padded onto
117
  // that coefficients array for alignment purposes.
118
119
  unsigned al = input - ar;
120
121
  // call assembler routine to do the work, passing number of 4-float blocks.
122
123
  // assert (((unsigned long) ar & 15) == 0);
124
  // assert (((unsigned long) d_aligned_taps[al] & 15) == 0);
125
126
  // cerr << "ar: " << ar << " d_aligned_taps[ar]: " << d_aligned_taps[al]
127
  // << " (ntaps() + al - 1)/4 + 1: " << (ntaps() + al -1) / 4 + 1 << endl;
128
  
129
  float r = d_float_dotprod (ar, d_aligned_taps[al], (ntaps() + al - 1) / 4 + 1);
130
131
  // cerr << "result = " << r << endl;
132
133
  return r;
134
}