root / volk / lib / qa_utils.cc @ bb0d5960
History | View | Annotate | Download (19 kB)
| 1 | #include "qa_utils.h" |
|---|---|
| 2 | #include <cstring> |
| 3 | #include <boost/foreach.hpp> |
| 4 | #include <boost/assign/list_of.hpp> |
| 5 | #include <boost/tokenizer.hpp> |
| 6 | #include <iostream> |
| 7 | #include <vector> |
| 8 | #include <list> |
| 9 | #include <ctime> |
| 10 | #include <cmath> |
| 11 | #include <limits> |
| 12 | #include <boost/lexical_cast.hpp> |
| 13 | #include <volk/volk.h> |
| 14 | #include <volk/volk_cpu.h> |
| 15 | #include <volk/volk_common.h> |
| 16 | #include <boost/typeof/typeof.hpp> |
| 17 | #include <boost/type_traits.hpp> |
| 18 | #include <stdio.h> |
| 19 | |
| 20 | float uniform() {
|
| 21 | return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) |
| 22 | } |
| 23 | |
| 24 | template <class t> |
| 25 | void random_floats (t *buf, unsigned n) |
| 26 | {
|
| 27 | for (unsigned i = 0; i < n; i++) |
| 28 | buf[i] = uniform (); |
| 29 | } |
| 30 | |
| 31 | void load_random_data(void *data, volk_type_t type, unsigned int n) { |
| 32 | if(type.is_complex) n *= 2; |
| 33 | if(type.is_float) {
|
| 34 | if(type.size == 8) random_floats<double>((double *)data, n); |
| 35 | else random_floats<float>((float *)data, n); |
| 36 | } else {
|
| 37 | float int_max = float(uint64_t(2) << (type.size*8)); |
| 38 | if(type.is_signed) int_max /= 2.0; |
| 39 | for(unsigned int i=0; i<n; i++) { |
| 40 | float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * int_max; |
| 41 | //man i really don't know how to do this in a more clever way, you have to cast down at some point
|
| 42 | switch(type.size) {
|
| 43 | case 8: |
| 44 | if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand;
|
| 45 | else ((uint64_t *)data)[i] = (uint64_t) scaled_rand;
|
| 46 | break;
|
| 47 | case 4: |
| 48 | if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand;
|
| 49 | else ((uint32_t *)data)[i] = (uint32_t) scaled_rand;
|
| 50 | break;
|
| 51 | case 2: |
| 52 | if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand;
|
| 53 | else ((uint16_t *)data)[i] = (uint16_t) scaled_rand;
|
| 54 | break;
|
| 55 | case 1: |
| 56 | if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand;
|
| 57 | else ((uint8_t *)data)[i] = (uint8_t) scaled_rand;
|
| 58 | break;
|
| 59 | default:
|
| 60 | throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here |
| 61 | } |
| 62 | } |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | static std::vector<std::string> get_arch_list(struct volk_func_desc desc) { |
| 67 | std::vector<std::string> archlist;
|
| 68 | |
| 69 | for(int i = 0; i < desc.n_archs; i++) { |
| 70 | //if(!(archs[i+1] & volk_get_lvarch())) continue; //this arch isn't available on this pc
|
| 71 | archlist.push_back(std::string(desc.indices[i]));
|
| 72 | } |
| 73 | |
| 74 | return archlist;
|
| 75 | } |
| 76 | |
| 77 | volk_type_t volk_type_from_string(std::string name) {
|
| 78 | volk_type_t type; |
| 79 | type.is_float = false;
|
| 80 | type.is_scalar = false;
|
| 81 | type.is_complex = false;
|
| 82 | type.is_signed = false;
|
| 83 | type.size = 0;
|
| 84 | type.str = name; |
| 85 | |
| 86 | if(name.size() < 2) throw std::string("name too short to be a datatype"); |
| 87 | |
| 88 | //is it a scalar?
|
| 89 | if(name[0] == 's') { |
| 90 | type.is_scalar = true;
|
| 91 | name = name.substr(1, name.size()-1); |
| 92 | } |
| 93 | |
| 94 | //get the data size
|
| 95 | size_t last_size_pos = name.find_last_of("0123456789");
|
| 96 | if(last_size_pos < 0) throw std::string("no size spec in type ").append(name); |
| 97 | //will throw if malformed
|
| 98 | int size = boost::lexical_cast<int>(name.substr(0, last_size_pos+1)); |
| 99 | |
| 100 | assert(((size % 8) == 0) && (size <= 64) && (size != 0)); |
| 101 | type.size = size/8; //in bytes |
| 102 | |
| 103 | for(size_t i=last_size_pos+1; i < name.size(); i++) { |
| 104 | switch (name[i]) {
|
| 105 | case 'f': |
| 106 | type.is_float = true;
|
| 107 | break;
|
| 108 | case 'i': |
| 109 | type.is_signed = true;
|
| 110 | break;
|
| 111 | case 'c': |
| 112 | type.is_complex = true;
|
| 113 | break;
|
| 114 | case 'u': |
| 115 | type.is_signed = false;
|
| 116 | break;
|
| 117 | default:
|
| 118 | throw;
|
| 119 | } |
| 120 | } |
| 121 | |
| 122 | return type;
|
| 123 | } |
| 124 | |
| 125 | static void get_signatures_from_name(std::vector<volk_type_t> &inputsig, |
| 126 | std::vector<volk_type_t> &outputsig, |
| 127 | std::string name) {
|
| 128 | boost::char_separator<char> sep("_"); |
| 129 | boost::tokenizer<boost::char_separator<char> > tok(name, sep);
|
| 130 | std::vector<std::string> toked;
|
| 131 | tok.assign(name); |
| 132 | toked.assign(tok.begin(), tok.end()); |
| 133 | |
| 134 | assert(toked[0] == "volk"); |
| 135 | toked.erase(toked.begin()); |
| 136 | |
| 137 | //ok. we're assuming a string in the form
|
| 138 | //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment)
|
| 139 | |
| 140 | enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT;
|
| 141 | std::string fn_name;
|
| 142 | volk_type_t type; |
| 143 | BOOST_FOREACH(std::string token, toked) {
|
| 144 | try {
|
| 145 | type = volk_type_from_string(token); |
| 146 | if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name... |
| 147 | |
| 148 | if(side == SIDE_INPUT) inputsig.push_back(type);
|
| 149 | else outputsig.push_back(type);
|
| 150 | } catch (...){
|
| 151 | if(token[0] == 'x') { //it's a multiplier |
| 152 | if(side == SIDE_INPUT) assert(inputsig.size() > 0); |
| 153 | else assert(outputsig.size() > 0); |
| 154 | int multiplier = boost::lexical_cast<int>(token.substr(1, token.size()-1)); //will throw if invalid |
| 155 | for(int i=1; i<multiplier; i++) { |
| 156 | if(side == SIDE_INPUT) inputsig.push_back(inputsig.back());
|
| 157 | else outputsig.push_back(outputsig.back());
|
| 158 | } |
| 159 | } |
| 160 | else if(side == SIDE_INPUT) { //it's the function name, at least it better be |
| 161 | side = SIDE_NAME; |
| 162 | fn_name.append("_");
|
| 163 | fn_name.append(token); |
| 164 | } |
| 165 | else if(side == SIDE_OUTPUT) { |
| 166 | if(token != toked.back()) throw; //the last token in the name is the alignment |
| 167 | } |
| 168 | } |
| 169 | } |
| 170 | //we don't need an output signature (some fn's operate on the input data, "in place"), but we do need at least one input!
|
| 171 | assert(inputsig.size() != 0);
|
| 172 | |
| 173 | } |
| 174 | |
| 175 | inline void run_cast_test1(volk_fn_1arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) { |
| 176 | while(iter--) func(buffs[0], vlen, arch.c_str()); |
| 177 | } |
| 178 | |
| 179 | inline void run_cast_test2(volk_fn_2arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) { |
| 180 | while(iter--) func(buffs[0], buffs[1], vlen, arch.c_str()); |
| 181 | } |
| 182 | |
| 183 | inline void run_cast_test3(volk_fn_3arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) { |
| 184 | while(iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str()); |
| 185 | } |
| 186 | |
| 187 | inline void run_cast_test4(volk_fn_4arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) { |
| 188 | while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str()); |
| 189 | } |
| 190 | |
| 191 | inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { |
| 192 | while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); |
| 193 | } |
| 194 | |
| 195 | inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { |
| 196 | while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); |
| 197 | } |
| 198 | |
| 199 | inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { |
| 200 | while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); |
| 201 | } |
| 202 | |
| 203 | inline void run_cast_test1_s32fc(volk_fn_1arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { |
| 204 | while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); |
| 205 | } |
| 206 | |
| 207 | inline void run_cast_test2_s32fc(volk_fn_2arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { |
| 208 | while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); |
| 209 | } |
| 210 | |
| 211 | inline void run_cast_test3_s32fc(volk_fn_3arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { |
| 212 | while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); |
| 213 | } |
| 214 | |
| 215 | template <class t> |
| 216 | bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { |
| 217 | bool fail = false; |
| 218 | int print_max_errs = 10; |
| 219 | for(unsigned int i=0; i<vlen; i++) { |
| 220 | if(((t *)(in1))[i] < 1e-30) continue; //this is a hack: below around here we'll start to get roundoff errors due to limited precision |
| 221 | if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) {
|
| 222 | fail=true;
|
| 223 | if(print_max_errs-- > 0) { |
| 224 | std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl; |
| 225 | } |
| 226 | } |
| 227 | } |
| 228 | |
| 229 | return fail;
|
| 230 | } |
| 231 | |
| 232 | template <class t> |
| 233 | bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { |
| 234 | bool fail = false; |
| 235 | int print_max_errs = 10; |
| 236 | for(unsigned int i=0; i<vlen; i++) { |
| 237 | if(abs(int(((t *)(in1))[i]) - int(((t *)(in2))[i])) > tol) { |
| 238 | fail=true;
|
| 239 | if(print_max_errs-- > 0) { |
| 240 | std::cout << "offset " << i << " in1: " << static_cast<int>(t(((t *)(in1))[i])) << " in2: " << static_cast<int>(t(((t *)(in2))[i])) << std::endl; |
| 241 | } |
| 242 | } |
| 243 | } |
| 244 | |
| 245 | return fail;
|
| 246 | } |
| 247 | |
| 248 | class volk_qa_aligned_mem_pool{ |
| 249 | public:
|
| 250 | void *get_new(size_t size){
|
| 251 | size_t alignment = volk_get_alignment(); |
| 252 | _mems.push_back(std::vector<char>(size + alignment-1, 0)); |
| 253 | size_t ptr = size_t(&_mems.back().front()); |
| 254 | return (void *)((ptr + alignment-1) & ~(alignment-1)); |
| 255 | } |
| 256 | private: std::list<std::vector<char> > _mems; |
| 257 | }; |
| 258 | |
| 259 | bool run_volk_tests(struct volk_func_desc desc, |
| 260 | void (*manual_func)(),
|
| 261 | std::string name,
|
| 262 | float tol,
|
| 263 | lv_32fc_t scalar, |
| 264 | int vlen,
|
| 265 | int iter,
|
| 266 | std::vector<std::string> *best_arch_vector = 0, |
| 267 | std::string puppet_master_name = "NULL" |
| 268 | ) {
|
| 269 | std::cout << "RUN_VOLK_TESTS: " << name << std::endl;
|
| 270 | |
| 271 | //first let's get a list of available architectures for the test
|
| 272 | std::vector<std::string> arch_list = get_arch_list(desc);
|
| 273 | |
| 274 | if(arch_list.size() < 2) { |
| 275 | std::cout << "no architectures to test" << std::endl;
|
| 276 | return false; |
| 277 | } |
| 278 | |
| 279 | //something that can hang onto memory and cleanup when this function exits
|
| 280 | volk_qa_aligned_mem_pool mem_pool; |
| 281 | |
| 282 | //now we have to get a function signature by parsing the name
|
| 283 | std::vector<volk_type_t> inputsig, outputsig; |
| 284 | get_signatures_from_name(inputsig, outputsig, name); |
| 285 | |
| 286 | //pull the input scalars into their own vector
|
| 287 | std::vector<volk_type_t> inputsc; |
| 288 | for(size_t i=0; i<inputsig.size(); i++) { |
| 289 | if(inputsig[i].is_scalar) {
|
| 290 | inputsc.push_back(inputsig[i]); |
| 291 | inputsig.erase(inputsig.begin() + i); |
| 292 | i -= 1;
|
| 293 | } |
| 294 | } |
| 295 | //for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i].str << std::endl;
|
| 296 | //for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i].str << std::endl;
|
| 297 | std::vector<void *> inbuffs;
|
| 298 | BOOST_FOREACH(volk_type_t sig, inputsig) {
|
| 299 | if(!sig.is_scalar) //we don't make buffers for scalars |
| 300 | inbuffs.push_back(mem_pool.get_new(vlen*sig.size*(sig.is_complex ? 2 : 1))); |
| 301 | } |
| 302 | for(size_t i=0; i<inbuffs.size(); i++) { |
| 303 | load_random_data(inbuffs[i], inputsig[i], vlen); |
| 304 | } |
| 305 | |
| 306 | //ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch
|
| 307 | std::vector<std::vector<void *> > test_data;
|
| 308 | for(size_t i=0; i<arch_list.size(); i++) { |
| 309 | std::vector<void *> arch_buffs;
|
| 310 | for(size_t j=0; j<outputsig.size(); j++) { |
| 311 | arch_buffs.push_back(mem_pool.get_new(vlen*outputsig[j].size*(outputsig[j].is_complex ? 2 : 1))); |
| 312 | } |
| 313 | for(size_t j=0; j<inputsig.size(); j++) { |
| 314 | arch_buffs.push_back(inbuffs[j]); |
| 315 | } |
| 316 | test_data.push_back(arch_buffs); |
| 317 | } |
| 318 | |
| 319 | std::vector<volk_type_t> both_sigs; |
| 320 | both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end()); |
| 321 | both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end()); |
| 322 | |
| 323 | //now run the test
|
| 324 | clock_t start, end; |
| 325 | std::vector<double> profile_times;
|
| 326 | for(size_t i = 0; i < arch_list.size(); i++) { |
| 327 | start = clock(); |
| 328 | |
| 329 | switch(both_sigs.size()) {
|
| 330 | case 1: |
| 331 | if(inputsc.size() == 0) { |
| 332 | run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); |
| 333 | } else if(inputsc.size() == 1 && inputsc[0].is_float) { |
| 334 | if(inputsc[0].is_complex) { |
| 335 | run_cast_test1_s32fc((volk_fn_1arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); |
| 336 | } else {
|
| 337 | run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); |
| 338 | } |
| 339 | } else throw "unsupported 1 arg function >1 scalars"; |
| 340 | break;
|
| 341 | case 2: |
| 342 | if(inputsc.size() == 0) { |
| 343 | run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); |
| 344 | } else if(inputsc.size() == 1 && inputsc[0].is_float) { |
| 345 | if(inputsc[0].is_complex) { |
| 346 | run_cast_test2_s32fc((volk_fn_2arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); |
| 347 | } else {
|
| 348 | run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); |
| 349 | } |
| 350 | } else throw "unsupported 2 arg function >1 scalars"; |
| 351 | break;
|
| 352 | case 3: |
| 353 | if(inputsc.size() == 0) { |
| 354 | run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); |
| 355 | } else if(inputsc.size() == 1 && inputsc[0].is_float) { |
| 356 | if(inputsc[0].is_complex) { |
| 357 | run_cast_test3_s32fc((volk_fn_3arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); |
| 358 | } else {
|
| 359 | run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); |
| 360 | } |
| 361 | } else throw "unsupported 3 arg function >1 scalars"; |
| 362 | break;
|
| 363 | case 4: |
| 364 | run_cast_test4((volk_fn_4arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); |
| 365 | break;
|
| 366 | default:
|
| 367 | throw "no function handler for this signature"; |
| 368 | break;
|
| 369 | } |
| 370 | |
| 371 | end = clock(); |
| 372 | double arch_time = (double)(end-start)/(double)CLOCKS_PER_SEC; |
| 373 | std::cout << arch_list[i] << " completed in " << arch_time << "s" << std::endl; |
| 374 | |
| 375 | profile_times.push_back(arch_time); |
| 376 | } |
| 377 | |
| 378 | //and now compare each output to the generic output
|
| 379 | //first we have to know which output is the generic one, they aren't in order...
|
| 380 | size_t generic_offset=0;
|
| 381 | for(size_t i=0; i<arch_list.size(); i++) |
| 382 | if(arch_list[i] == "generic") generic_offset=i; |
| 383 | |
| 384 | //now compare
|
| 385 | //if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
|
| 386 | |
| 387 | bool fail = false; |
| 388 | bool fail_global = false; |
| 389 | std::vector<bool> arch_results;
|
| 390 | for(size_t i=0; i<arch_list.size(); i++) { |
| 391 | fail = false;
|
| 392 | if(i != generic_offset) {
|
| 393 | for(size_t j=0; j<both_sigs.size(); j++) { |
| 394 | if(both_sigs[j].is_float) {
|
| 395 | if(both_sigs[j].size == 8) { |
| 396 | fail = fcompare((double *) test_data[generic_offset][j], (double *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); |
| 397 | } else {
|
| 398 | fail = fcompare((float *) test_data[generic_offset][j], (float *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); |
| 399 | } |
| 400 | } else {
|
| 401 | //i could replace this whole switch statement with a memcmp if i wasn't interested in printing the outputs where they differ
|
| 402 | switch(both_sigs[j].size) {
|
| 403 | case 8: |
| 404 | if(both_sigs[j].is_signed) {
|
| 405 | fail = icompare((int64_t *) test_data[generic_offset][j], (int64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); |
| 406 | } else {
|
| 407 | fail = icompare((uint64_t *) test_data[generic_offset][j], (uint64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); |
| 408 | } |
| 409 | break;
|
| 410 | case 4: |
| 411 | if(both_sigs[j].is_signed) {
|
| 412 | fail = icompare((int32_t *) test_data[generic_offset][j], (int32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); |
| 413 | } else {
|
| 414 | fail = icompare((uint32_t *) test_data[generic_offset][j], (uint32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); |
| 415 | } |
| 416 | break;
|
| 417 | case 2: |
| 418 | if(both_sigs[j].is_signed) {
|
| 419 | fail = icompare((int16_t *) test_data[generic_offset][j], (int16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); |
| 420 | } else {
|
| 421 | fail = icompare((uint16_t *) test_data[generic_offset][j], (uint16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); |
| 422 | } |
| 423 | break;
|
| 424 | case 1: |
| 425 | if(both_sigs[j].is_signed) {
|
| 426 | fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); |
| 427 | } else {
|
| 428 | fail = icompare((uint8_t *) test_data[generic_offset][j], (uint8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); |
| 429 | } |
| 430 | break;
|
| 431 | default:
|
| 432 | fail=1;
|
| 433 | } |
| 434 | } |
| 435 | if(fail) {
|
| 436 | fail_global = true;
|
| 437 | std::cout << name << ": fail on arch " << arch_list[i] << std::endl;
|
| 438 | } |
| 439 | //fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
|
| 440 | } |
| 441 | } |
| 442 | arch_results.push_back(!fail); |
| 443 | } |
| 444 | |
| 445 | double best_time = std::numeric_limits<double>::max(); |
| 446 | std::string best_arch = "generic"; |
| 447 | for(size_t i=0; i < arch_list.size(); i++) { |
| 448 | if((profile_times[i] < best_time) && arch_results[i]) {
|
| 449 | best_time = profile_times[i]; |
| 450 | best_arch = arch_list[i]; |
| 451 | } |
| 452 | } |
| 453 | |
| 454 | std::cout << "Best arch: " << best_arch << std::endl;
|
| 455 | if(best_arch_vector) {
|
| 456 | if(puppet_master_name == "NULL") { |
| 457 | best_arch_vector->push_back(name + std::string(" ") + best_arch); |
| 458 | } |
| 459 | else {
|
| 460 | best_arch_vector->push_back(puppet_master_name + std::string(" ") + best_arch); |
| 461 | } |
| 462 | } |
| 463 | |
| 464 | return fail_global;
|
| 465 | } |
| 466 | |
| 467 |