Statistics
| Branch: | Tag: | Revision:

root / gnuradio-core / src / lib / filter / qa_ccomplex_dotprod_x86.cc @ 0f73b099

History | View | Annotate | Download (7.5 kB)

1
/* -*- c++ -*- */
2
/*
3
 * Copyright 2002 Free Software Foundation, Inc.
4
 * 
5
 * This file is part of GNU Radio
6
 * 
7
 * GNU Radio is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 3, or (at your option)
10
 * any later version.
11
 * 
12
 * GNU Radio is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 * 
17
 * You should have received a copy of the GNU General Public License
18
 * along with GNU Radio; see the file COPYING.  If not, write to
19
 * the Free Software Foundation, Inc., 51 Franklin Street,
20
 * Boston, MA 02110-1301, USA.
21
 */
22
23
#ifdef HAVE_CONFIG_H
24
#include <config.h>
25
#endif
26
27
#include <cppunit/TestAssert.h>
28
#include <qa_ccomplex_dotprod_x86.h>
29
#include <ccomplex_dotprod_x86.h>
30
#include <string.h>
31
#include <iostream>
32
#include <malloc16.h>
33
#include <sse_debug.h>
34
#include <cmath>
35
#include <gr_cpu.h>
36
#include <random.h>
37
38
using std::cerr;
39
40
/// Macro for primitive value comparisons
41
#define assertcomplexEqual(expected0,expected1,actual,delta)        \
42
  CPPUNIT_ASSERT_DOUBLES_EQUAL (expected0, actual[0], delta);        \
43
  CPPUNIT_ASSERT_DOUBLES_EQUAL (expected1, actual[1], delta);        
44
45
46
#define        MAX_BLKS        10
47
#define        FLOATS_PER_BLK         4
48
49
#define        ERR_DELTA        (1e-6)
50
51
static float
52
uniform ()
53
{
54
  return 2.0 * ((float) random () / RANDOM_MAX - 0.5);        // uniformly (-1, 1)
55
}
56
57
static void
58
random_floats (float *buf, unsigned n)
59
{
60
  for (unsigned i = 0; i < n; i++)
61
    buf[i] = rint (uniform () * 32767);
62
}
63
64
static void
65
zero_floats (float *buf, unsigned n)
66
{
67
  for (unsigned i = 0; i < n; i++)
68
    buf[i] = 0.0;
69
}
70
71
void
72
ref_ccomplex_dotprod (const float *input,
73
                      const float *taps, unsigned n_2_ccomplex_blocks,
74
                      float *result)
75
{
76
  float sum0[2] = {0,0};
77
  float sum1[2] = {0,0};
78
79
  do {
80
81
    sum0[0] += input[0] * taps[0] - input[1] * taps[1];
82
    sum0[1] += input[0] * taps[1] + input[1] * taps[0];
83
    sum1[0] += input[2] * taps[2] - input[3] * taps[3];
84
    sum1[1] += input[2] * taps[3] + input[3] * taps[2];
85
86
    input += 4;
87
    taps += 4;
88
89
  } while (--n_2_ccomplex_blocks != 0);
90
91
92
  result[0] = sum0[0] + sum1[0];
93
  result[1] = sum0[1] + sum1[1];
94
}
95
96
void 
97
qa_ccomplex_dotprod_x86::setUp ()
98
{
99
  taps = (float *) calloc16Align (MAX_BLKS, 
100
                                  sizeof (float) * FLOATS_PER_BLK);
101
102
  input = (float *) calloc16Align (MAX_BLKS,
103
                                   sizeof (float) * FLOATS_PER_BLK);
104
105
  if (taps == 0 || input == 0)
106
    abort ();
107
}
108
109
void
110
qa_ccomplex_dotprod_x86::tearDown ()
111
{
112
  free16Align (taps);
113
  free16Align (input);
114
  taps = 0;
115
  input = 0;
116
}
117
118
119
void 
120
qa_ccomplex_dotprod_x86::zb ()        // "zero both"
121
{
122
  zero_floats (taps, MAX_BLKS * FLOATS_PER_BLK);
123
  zero_floats (input, MAX_BLKS * FLOATS_PER_BLK);
124
}
125
126
// 
127
// t1 
128
//
129
130
void
131
qa_ccomplex_dotprod_x86::t1_base (ccomplex_dotprod_t ccomplex_dotprod)
132
{
133
  float result[2];
134
135
  // cerr << "Testing dump_xmm_regs\n";
136
  // dump_xmm_regs ();
137
138
  // test basic cases, 1 block
139
140
  zb ();
141
  ccomplex_dotprod (input, taps, 1, result);
142
  assertcomplexEqual (0.0, 0.0, result, ERR_DELTA);
143
144
  // vary each input
145
146
  zb ();
147
  input[0] = 1.0;        taps[0] = 1.0; taps[1] = -1.0;
148
  ccomplex_dotprod (input, taps, 1, result);
149
  //cerr << result[0] << " " << result[1] << "\n";
150
  assertcomplexEqual (1.0, -1.0, result, ERR_DELTA);
151
  
152
  zb ();
153
  input[1] = 2.0;        taps[0] = 1.0; taps[1] = -1.0;
154
  ccomplex_dotprod (input, taps, 1, result);
155
  assertcomplexEqual (2.0, 2.0, result, ERR_DELTA);
156
157
  zb ();
158
  input[2] = 3.0;        taps[2] = 1.0; taps[3] = -1.0;
159
  ccomplex_dotprod (input, taps, 1, result);
160
  assertcomplexEqual (3.0, -3.0, result, ERR_DELTA);
161
  
162
  zb ();
163
  input[3] = 4.0;        taps[2] = 1.0; taps[3] = -1.0;
164
  ccomplex_dotprod (input, taps, 1, result);
165
  assertcomplexEqual (4.0, 4.0, result, ERR_DELTA);
166
167
  // vary each tap
168
169
  zb ();
170
  input[0] = 1.0;        taps[0] = 0.5; taps[1] = -0.5;
171
  ccomplex_dotprod (input, taps, 1, result);
172
  assertcomplexEqual (0.5, -0.5, result, ERR_DELTA);
173
  
174
  zb ();
175
  input[0] = 1.0;        taps[0] = 2.0; taps[1] = -2.0;
176
  ccomplex_dotprod (input, taps, 1, result);
177
  assertcomplexEqual (2.0, -2.0, result, ERR_DELTA);
178
  
179
  zb ();
180
  input[0] = 1.0;        taps[0] = 3.0; taps[1] = -3.0;
181
  ccomplex_dotprod (input, taps, 1, result);
182
  assertcomplexEqual (3.0, -3.0, result, ERR_DELTA);
183
  
184
  zb ();
185
  input[0] = 1.0;        taps[0] = 4.0; taps[1] = -4.0;
186
  ccomplex_dotprod (input, taps, 1, result);
187
  assertcomplexEqual (4.0, -4.0, result, ERR_DELTA);
188
}
189
190
// 
191
// t2 
192
//
193
void
194
qa_ccomplex_dotprod_x86::t2_base (ccomplex_dotprod_t ccomplex_dotprod)
195
{
196
  float result[2];
197
198
  zb ();
199
  input[0] =  1.0; input[1] =  3.0;        taps[0] =  5.0;        taps[1] =  -2.0;
200
201
  //1*5-3*-2 =11, 1*-2+3*5=13
202
 
203
  ccomplex_dotprod (input, taps, 1, result);
204
  assertcomplexEqual (11.0, 13.0, result, ERR_DELTA);
205
206
  //7*5-13*-5 =100, 7*-5+13*5=30
207
208
  input[2] =  7.0; input[3] = 13.0;        taps[2] =  5.0;        taps[3] =  -5.0;
209
210
  ccomplex_dotprod (input, taps, 1, result);
211
  assertcomplexEqual (111.0, 43.0, result, ERR_DELTA);
212
213
  input[4] = 19; input[5] = -19;        taps[4] = 23.0;        taps[5] = -23.0;
214
215
  //19*23--19*-23 =0, 19*-23+-19*23=-874
216
217
  ccomplex_dotprod (input, taps, 2, result);
218
  assertcomplexEqual (111.0, -831.0, result, ERR_DELTA);
219
  
220
}
221
222
//
223
// t3
224
//
225
void
226
qa_ccomplex_dotprod_x86::t3_base (ccomplex_dotprod_t ccomplex_dotprod)
227
{
228
  srandom (0);        // we want reproducibility
229
230
  for (unsigned int i = 0; i < 10; i++){
231
    random_floats (input, MAX_BLKS * FLOATS_PER_BLK);
232
    random_floats (taps, MAX_BLKS * FLOATS_PER_BLK);
233
234
    // we use a sloppy error margin because on the x86 architecture,
235
    // our reference implementation is using 80 bit floating point
236
    // arithmetic, while the SSE version is using 32 bit float point
237
    // arithmetic.
238
239
    float ref[2];
240
    ref_ccomplex_dotprod (input, taps, MAX_BLKS, ref);
241
    float calc[2];
242
    ccomplex_dotprod (input, taps, MAX_BLKS, calc);
243
    CPPUNIT_ASSERT_DOUBLES_EQUAL (ref[0],
244
                            calc[0],
245
                        fabs (ref[0]) * 1e-4);
246
    CPPUNIT_ASSERT_DOUBLES_EQUAL (ref[1],
247
                            calc[1],
248
                        fabs (ref[1]) * 1e-4);
249
  }
250
}
251
252
void
253
qa_ccomplex_dotprod_x86::t1_3dnowext ()
254
{
255
  if (!gr_cpu::has_3dnowext ()){
256
    cerr << "No 3DNow!Ext support; not tested\n";
257
  }
258
  else
259
    t1_base (ccomplex_dotprod_3dnowext);
260
}
261
262
void 
263
qa_ccomplex_dotprod_x86::t2_3dnowext ()
264
{
265
  if (!gr_cpu::has_3dnowext ()){
266
    cerr << "No 3DNow!Ext support; not tested\n";
267
  }
268
  else
269
    t2_base (ccomplex_dotprod_3dnowext);
270
}
271
272
void 
273
qa_ccomplex_dotprod_x86::t3_3dnowext ()
274
{
275
  if (!gr_cpu::has_3dnowext ()){
276
    cerr << "No 3DNow!Ext support; not tested\n";
277
  }
278
  else
279
    t3_base (ccomplex_dotprod_3dnowext);
280
}
281
282
void
283
qa_ccomplex_dotprod_x86::t1_3dnow ()
284
{
285
  if (!gr_cpu::has_3dnow ()){
286
    cerr << "No 3DNow! support; not tested\n";
287
  }
288
  else
289
    t1_base (ccomplex_dotprod_3dnow);
290
}
291
292
void 
293
qa_ccomplex_dotprod_x86::t2_3dnow ()
294
{
295
  if (!gr_cpu::has_3dnow ()){
296
    cerr << "No 3DNow! support; not tested\n";
297
  }
298
  else
299
    t2_base (ccomplex_dotprod_3dnow);
300
}
301
302
void 
303
qa_ccomplex_dotprod_x86::t3_3dnow ()
304
{
305
  if (!gr_cpu::has_3dnow ()){
306
    cerr << "No 3DNow! support; not tested\n";
307
  }
308
  else
309
    t3_base (ccomplex_dotprod_3dnow);
310
}
311
312
void 
313
qa_ccomplex_dotprod_x86::t1_sse ()
314
{
315
  if (!gr_cpu::has_sse ()){
316
    cerr << "No SSE support; not tested\n";
317
  }
318
  else
319
    t1_base (ccomplex_dotprod_sse);
320
}
321
322
void 
323
qa_ccomplex_dotprod_x86::t2_sse ()
324
{
325
  if (!gr_cpu::has_sse ()){
326
    cerr << "No SSE support; not tested\n";
327
  }
328
  else
329
    t2_base (ccomplex_dotprod_sse);
330
}
331
332
void 
333
qa_ccomplex_dotprod_x86::t3_sse ()
334
{
335
  if (!gr_cpu::has_sse ()){
336
    cerr << "No SSE support; not tested\n";
337
  }
338
  else
339
    t3_base (ccomplex_dotprod_sse);
340
}
341