Statistics
| Branch: | Tag: | Revision:

root / volk / lib / qa_utils.cc @ bb0d5960

History | View | Annotate | Download (19 kB)

1
#include "qa_utils.h"
2
#include <cstring>
3
#include <boost/foreach.hpp>
4
#include <boost/assign/list_of.hpp>
5
#include <boost/tokenizer.hpp>
6
#include <iostream>
7
#include <vector>
8
#include <list>
9
#include <ctime>
10
#include <cmath>
11
#include <limits>
12
#include <boost/lexical_cast.hpp>
13
#include <volk/volk.h>
14
#include <volk/volk_cpu.h>
15
#include <volk/volk_common.h>
16
#include <boost/typeof/typeof.hpp>
17
#include <boost/type_traits.hpp>
18
#include <stdio.h>
19
20
float uniform() {
21
  return 2.0 * ((float) rand() / RAND_MAX - 0.5);        // uniformly (-1, 1)
22
}
23
24
template <class t>
25
void random_floats (t *buf, unsigned n)
26
{
27
  for (unsigned i = 0; i < n; i++)
28
    buf[i] = uniform ();
29
}
30
31
void load_random_data(void *data, volk_type_t type, unsigned int n) {
32
    if(type.is_complex) n *= 2;
33
    if(type.is_float) {
34
        if(type.size == 8) random_floats<double>((double *)data, n);
35
        else random_floats<float>((float *)data, n);
36
    } else {
37
        float int_max = float(uint64_t(2) << (type.size*8));
38
        if(type.is_signed) int_max /= 2.0;
39
        for(unsigned int i=0; i<n; i++) {
40
            float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * int_max;
41
            //man i really don't know how to do this in a more clever way, you have to cast down at some point
42
            switch(type.size) {
43
            case 8:
44
                if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand;
45
                else ((uint64_t *)data)[i] = (uint64_t) scaled_rand;
46
            break;
47
            case 4:
48
                if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand;
49
                else ((uint32_t *)data)[i] = (uint32_t) scaled_rand;
50
            break;
51
            case 2:
52
                if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand;
53
                else ((uint16_t *)data)[i] = (uint16_t) scaled_rand;
54
            break;
55
            case 1:
56
                if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand;
57
                else ((uint8_t *)data)[i] = (uint8_t) scaled_rand;
58
            break;
59
            default:
60
                throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here
61
            }
62
        }
63
    }
64
}
65
66
static std::vector<std::string> get_arch_list(struct volk_func_desc desc) {
67
    std::vector<std::string> archlist;
68
69
    for(int i = 0; i < desc.n_archs; i++) {
70
        //if(!(archs[i+1] & volk_get_lvarch())) continue; //this arch isn't available on this pc
71
        archlist.push_back(std::string(desc.indices[i]));
72
    }
73
74
    return archlist;
75
}
76
77
volk_type_t volk_type_from_string(std::string name) {
78
    volk_type_t type;
79
    type.is_float = false;
80
    type.is_scalar = false;
81
    type.is_complex = false;
82
    type.is_signed = false;
83
    type.size = 0;
84
    type.str = name;
85
86
    if(name.size() < 2) throw std::string("name too short to be a datatype");
87
88
    //is it a scalar?
89
    if(name[0] == 's') {
90
        type.is_scalar = true;
91
        name = name.substr(1, name.size()-1);
92
    }
93
94
    //get the data size
95
    size_t last_size_pos = name.find_last_of("0123456789");
96
    if(last_size_pos < 0) throw std::string("no size spec in type ").append(name);
97
    //will throw if malformed
98
    int size = boost::lexical_cast<int>(name.substr(0, last_size_pos+1));
99
100
    assert(((size % 8) == 0) && (size <= 64) && (size != 0));
101
    type.size = size/8; //in bytes
102
103
    for(size_t i=last_size_pos+1; i < name.size(); i++) {
104
        switch (name[i]) {
105
        case 'f':
106
            type.is_float = true;
107
            break;
108
        case 'i':
109
            type.is_signed = true;
110
            break;
111
        case 'c':
112
            type.is_complex = true;
113
            break;
114
        case 'u':
115
            type.is_signed = false;
116
            break;
117
        default:
118
            throw;
119
        }
120
    }
121
122
    return type;
123
}
124
125
static void get_signatures_from_name(std::vector<volk_type_t> &inputsig,
126
                                   std::vector<volk_type_t> &outputsig,
127
                                   std::string name) {
128
    boost::char_separator<char> sep("_");
129
    boost::tokenizer<boost::char_separator<char> > tok(name, sep);
130
    std::vector<std::string> toked;
131
    tok.assign(name);
132
    toked.assign(tok.begin(), tok.end());
133
134
    assert(toked[0] == "volk");
135
    toked.erase(toked.begin());
136
137
    //ok. we're assuming a string in the form
138
    //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment)
139
140
    enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT;
141
    std::string fn_name;
142
    volk_type_t type;
143
    BOOST_FOREACH(std::string token, toked) {
144
        try {
145
            type = volk_type_from_string(token);
146
            if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name...
147
148
            if(side == SIDE_INPUT) inputsig.push_back(type);
149
            else outputsig.push_back(type);
150
        } catch (...){
151
            if(token[0] == 'x') { //it's a multiplier
152
                if(side == SIDE_INPUT) assert(inputsig.size() > 0);
153
                else assert(outputsig.size() > 0);
154
                int multiplier = boost::lexical_cast<int>(token.substr(1, token.size()-1)); //will throw if invalid
155
                for(int i=1; i<multiplier; i++) {
156
                    if(side == SIDE_INPUT) inputsig.push_back(inputsig.back());
157
                    else outputsig.push_back(outputsig.back());
158
                }
159
            }
160
            else if(side == SIDE_INPUT) { //it's the function name, at least it better be
161
                side = SIDE_NAME;
162
                fn_name.append("_");
163
                fn_name.append(token);
164
            }
165
            else if(side == SIDE_OUTPUT) {
166
                if(token != toked.back()) throw; //the last token in the name is the alignment
167
            }
168
        }
169
    }
170
    //we don't need an output signature (some fn's operate on the input data, "in place"), but we do need at least one input!
171
    assert(inputsig.size() != 0);
172
    
173
}
174
175
inline void run_cast_test1(volk_fn_1arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
176
    while(iter--) func(buffs[0], vlen, arch.c_str());
177
}
178
179
inline void run_cast_test2(volk_fn_2arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
180
    while(iter--) func(buffs[0], buffs[1], vlen, arch.c_str());
181
}
182
183
inline void run_cast_test3(volk_fn_3arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
184
    while(iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str());
185
}
186
187
inline void run_cast_test4(volk_fn_4arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
188
    while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str());
189
}
190
191
inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
192
    while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
193
}
194
195
inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
196
    while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
197
}
198
199
inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
200
    while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
201
}
202
203
inline void run_cast_test1_s32fc(volk_fn_1arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
204
    while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
205
}
206
207
inline void run_cast_test2_s32fc(volk_fn_2arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
208
    while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
209
}
210
211
inline void run_cast_test3_s32fc(volk_fn_3arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
212
    while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
213
}
214
215
template <class t>
216
bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
217
    bool fail = false;
218
    int print_max_errs = 10;
219
    for(unsigned int i=0; i<vlen; i++) {
220
        if(((t *)(in1))[i] < 1e-30) continue; //this is a hack: below around here we'll start to get roundoff errors due to limited precision
221
        if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) {
222
            fail=true;
223
            if(print_max_errs-- > 0) {
224
                std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl;
225
            }
226
        }
227
    }
228
229
    return fail;
230
}
231
232
template <class t>
233
bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
234
    bool fail = false;
235
    int print_max_errs = 10;
236
    for(unsigned int i=0; i<vlen; i++) {
237
        if(abs(int(((t *)(in1))[i]) - int(((t *)(in2))[i])) > tol) {
238
            fail=true;
239
            if(print_max_errs-- > 0) {
240
                std::cout << "offset " << i << " in1: " << static_cast<int>(t(((t *)(in1))[i])) << " in2: " << static_cast<int>(t(((t *)(in2))[i])) << std::endl;
241
            }
242
        }
243
    }
244
245
    return fail;
246
}
247
248
class volk_qa_aligned_mem_pool{
249
public:
250
    void *get_new(size_t size){
251
        size_t alignment = volk_get_alignment();
252
        _mems.push_back(std::vector<char>(size + alignment-1, 0));
253
        size_t ptr = size_t(&_mems.back().front());
254
        return (void *)((ptr + alignment-1) & ~(alignment-1));
255
    }
256
private: std::list<std::vector<char> > _mems;
257
};
258
259
bool run_volk_tests(struct volk_func_desc desc,
260
                    void (*manual_func)(),
261
                    std::string name,
262
                    float tol,
263
                    lv_32fc_t scalar,
264
                    int vlen,
265
                    int iter,
266
                    std::vector<std::string> *best_arch_vector = 0,
267
                    std::string puppet_master_name = "NULL"
268
                   ) {
269
    std::cout << "RUN_VOLK_TESTS: " << name << std::endl;
270
271
    //first let's get a list of available architectures for the test
272
    std::vector<std::string> arch_list = get_arch_list(desc);
273
274
    if(arch_list.size() < 2) {
275
        std::cout << "no architectures to test" << std::endl;
276
        return false;
277
    }
278
279
    //something that can hang onto memory and cleanup when this function exits
280
    volk_qa_aligned_mem_pool mem_pool;
281
282
    //now we have to get a function signature by parsing the name
283
    std::vector<volk_type_t> inputsig, outputsig;
284
    get_signatures_from_name(inputsig, outputsig, name);
285
    
286
    //pull the input scalars into their own vector
287
    std::vector<volk_type_t> inputsc;
288
    for(size_t i=0; i<inputsig.size(); i++) {
289
        if(inputsig[i].is_scalar) {
290
            inputsc.push_back(inputsig[i]);
291
            inputsig.erase(inputsig.begin() + i);
292
            i -= 1;
293
        }
294
    }
295
    //for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i].str << std::endl;
296
    //for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i].str << std::endl;
297
    std::vector<void *> inbuffs;
298
    BOOST_FOREACH(volk_type_t sig, inputsig) {
299
        if(!sig.is_scalar) //we don't make buffers for scalars
300
          inbuffs.push_back(mem_pool.get_new(vlen*sig.size*(sig.is_complex ? 2 : 1)));
301
    }
302
    for(size_t i=0; i<inbuffs.size(); i++) {
303
        load_random_data(inbuffs[i], inputsig[i], vlen);
304
    }
305
306
    //ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch
307
    std::vector<std::vector<void *> > test_data;
308
    for(size_t i=0; i<arch_list.size(); i++) {
309
        std::vector<void *> arch_buffs;
310
        for(size_t j=0; j<outputsig.size(); j++) {
311
            arch_buffs.push_back(mem_pool.get_new(vlen*outputsig[j].size*(outputsig[j].is_complex ? 2 : 1)));
312
        }
313
        for(size_t j=0; j<inputsig.size(); j++) {
314
            arch_buffs.push_back(inbuffs[j]);
315
        }
316
        test_data.push_back(arch_buffs);
317
    }
318
319
    std::vector<volk_type_t> both_sigs;
320
    both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end());
321
    both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end());
322
323
    //now run the test
324
    clock_t start, end;
325
    std::vector<double> profile_times;
326
    for(size_t i = 0; i < arch_list.size(); i++) {
327
        start = clock();
328
329
        switch(both_sigs.size()) {
330
            case 1:
331
                if(inputsc.size() == 0) {
332
                    run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
333
                } else if(inputsc.size() == 1 && inputsc[0].is_float) {
334
                    if(inputsc[0].is_complex) {
335
                        run_cast_test1_s32fc((volk_fn_1arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
336
                    } else {
337
                        run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
338
                    }
339
                } else throw "unsupported 1 arg function >1 scalars";
340
                break;
341
            case 2:
342
                if(inputsc.size() == 0) {
343
                    run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
344
                } else if(inputsc.size() == 1 && inputsc[0].is_float) {
345
                    if(inputsc[0].is_complex) {
346
                        run_cast_test2_s32fc((volk_fn_2arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
347
                    } else {
348
                        run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
349
                    }
350
                } else throw "unsupported 2 arg function >1 scalars";
351
                break;
352
            case 3:
353
                if(inputsc.size() == 0) {
354
                    run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
355
                } else if(inputsc.size() == 1 && inputsc[0].is_float) {
356
                    if(inputsc[0].is_complex) {
357
                        run_cast_test3_s32fc((volk_fn_3arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
358
                    } else {
359
                        run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
360
                    }
361
                } else throw "unsupported 3 arg function >1 scalars";
362
                break;
363
            case 4:
364
                run_cast_test4((volk_fn_4arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
365
                break;
366
            default:
367
                throw "no function handler for this signature";
368
                break;
369
        }
370
371
        end = clock();
372
        double arch_time = (double)(end-start)/(double)CLOCKS_PER_SEC;
373
        std::cout << arch_list[i] << " completed in " << arch_time << "s" << std::endl;
374
375
        profile_times.push_back(arch_time);
376
    }
377
378
    //and now compare each output to the generic output
379
    //first we have to know which output is the generic one, they aren't in order...
380
    size_t generic_offset=0;
381
    for(size_t i=0; i<arch_list.size(); i++)
382
        if(arch_list[i] == "generic") generic_offset=i;
383
384
    //now compare
385
    //if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
386
387
    bool fail = false;
388
    bool fail_global = false;
389
    std::vector<bool> arch_results;
390
    for(size_t i=0; i<arch_list.size(); i++) {
391
        fail = false;
392
        if(i != generic_offset) {
393
            for(size_t j=0; j<both_sigs.size(); j++) {
394
                if(both_sigs[j].is_float) {
395
                    if(both_sigs[j].size == 8) {
396
                        fail = fcompare((double *) test_data[generic_offset][j], (double *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
397
                    } else {
398
                        fail = fcompare((float *) test_data[generic_offset][j], (float *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
399
                    }
400
                } else {
401
                    //i could replace this whole switch statement with a memcmp if i wasn't interested in printing the outputs where they differ
402
                    switch(both_sigs[j].size) {
403
                    case 8:
404
                        if(both_sigs[j].is_signed) {
405
                            fail = icompare((int64_t *) test_data[generic_offset][j], (int64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
406
                        } else {
407
                            fail = icompare((uint64_t *) test_data[generic_offset][j], (uint64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
408
                        }
409
                        break;
410
                    case 4:
411
                        if(both_sigs[j].is_signed) {
412
                            fail = icompare((int32_t *) test_data[generic_offset][j], (int32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
413
                        } else {
414
                            fail = icompare((uint32_t *) test_data[generic_offset][j], (uint32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
415
                        }
416
                        break;
417
                    case 2:
418
                        if(both_sigs[j].is_signed) {
419
                            fail = icompare((int16_t *) test_data[generic_offset][j], (int16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
420
                        } else {
421
                            fail = icompare((uint16_t *) test_data[generic_offset][j], (uint16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
422
                        }
423
                        break;
424
                    case 1:
425
                        if(both_sigs[j].is_signed) {
426
                            fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
427
                        } else {
428
                            fail = icompare((uint8_t *) test_data[generic_offset][j], (uint8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
429
                        }
430
                        break;
431
                    default:
432
                        fail=1;
433
                    }
434
                }
435
                if(fail) {
436
                    fail_global = true;
437
                    std::cout << name << ": fail on arch " << arch_list[i] << std::endl;
438
                }
439
                //fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
440
            }
441
        }
442
        arch_results.push_back(!fail);
443
    }
444
445
    double best_time = std::numeric_limits<double>::max();
446
    std::string best_arch = "generic";
447
    for(size_t i=0; i < arch_list.size(); i++) {
448
        if((profile_times[i] < best_time) && arch_results[i]) {
449
            best_time = profile_times[i];
450
            best_arch = arch_list[i];
451
        }
452
    }
453
454
    std::cout << "Best arch: " << best_arch << std::endl;
455
    if(best_arch_vector) {
456
        if(puppet_master_name == "NULL") {
457
            best_arch_vector->push_back(name + std::string(" ") + best_arch);
458
        }
459
        else {
460
            best_arch_vector->push_back(puppet_master_name + std::string(" ") + best_arch);
461
        }
462
    }
463
464
    return fail_global;
465
}
466
467