From bf6133d0e617ad60a8529997a7be2820507be6ba Mon Sep 17 00:00:00 2001 From: Joshua Moerman Date: Fri, 17 Jan 2014 11:47:34 +0100 Subject: [PATCH] Puts program options in a struct, because we have to explicitly share it --- wavelet/wavelet_parallel_mockup.cpp | 85 +++++++++++++++++------------ 1 file changed, 51 insertions(+), 34 deletions(-) diff --git a/wavelet/wavelet_parallel_mockup.cpp b/wavelet/wavelet_parallel_mockup.cpp index d895016..e4d5d27 100644 --- a/wavelet/wavelet_parallel_mockup.cpp +++ b/wavelet/wavelet_parallel_mockup.cpp @@ -6,25 +6,26 @@ #include "wavelet.hpp" #include "wavelet_parallel.hpp" -// Number of iterations to improve time measurements -static unsigned int iterations = 1; - -// Static :(, will be set in main -static unsigned int P; -static unsigned int N; - -// Static vectors for correctness checking +// These can be set by the user, putting them together in a structs makes it easy to bsp::put +static struct { + unsigned int P; // doesn't need to be global, as we have bsp::nprocs() + unsigned int N; + unsigned int iterations; + bool check_results; +} globals; + +// Static vectors for correctness checking (allocated on precessor 0) static std::vector par_result; static std::vector seq_result; // fake data static double data(unsigned int global_index){ - return global_index - N/2.0 + 0.5 + std::sin(0.1337*global_index); + return global_index - globals.N/2.0 + 0.5 + std::sin(0.1337*global_index); } // NOTE: does not synchronize static void read_and_distribute_data(wvlt::par::proc_info const & d, wvlt::par::plan_1D plan, double* x){ - std::vector r(plan.b); + std::vector r; for(unsigned int t = 0; t < d.p; ++t){ r.assign(plan.b, 0.0); for(unsigned int i = 0; i < plan.b; ++i){ @@ -34,10 +35,21 @@ static void read_and_distribute_data(wvlt::par::proc_info const & d, wvlt::par:: } } +// gets globals from processor 0 +static void get_globals(){ + bsp::push_reg(&globals); + bsp::sync(); + bsp::get(0, &globals, 0, &globals); + bsp::sync(); + bsp::pop_reg(&globals); +} + static void par_wavelet(){ - bsp::begin(P); + bsp::begin(globals.P); + get_globals(); + const wvlt::par::proc_info d(bsp::nprocs(), bsp::pid()); - const wvlt::par::plan_1D plan(N, N/d.p, 2); + const wvlt::par::plan_1D plan(globals.N, globals.N/d.p, 2); // We allocate and push everything up front, since we need it anyways // (so peak memory is the same). This saves us 1 bsp::sync() @@ -49,7 +61,6 @@ static void par_wavelet(){ bsp::push_reg(x.data(), x.size()); bsp::push_reg(next.data(), next.size()); bsp::push_reg(proczero.data(), proczero.size()); - bsp::sync(); // processor zero reads data from file @@ -59,7 +70,7 @@ static void par_wavelet(){ // do the parallel wavelet!!! double time1 = bsp::time(); - for(unsigned int i = 0; i < iterations; ++i){ + for(unsigned int i = 0; i < globals.iterations; ++i){ wvlt::par::wavelet(d, plan, x.data(), next.data(), proczero.data()); bsp::sync(); } @@ -73,30 +84,33 @@ static void par_wavelet(){ next.clear(); proczero.clear(); - bsp::push_reg(par_result.data(), par_result.size()); - bsp::sync(); + if(globals.check_results){ + bsp::push_reg(par_result.data(), par_result.size()); + bsp::sync(); - bsp::put(0, x.data(), par_result.data(), d.s * plan.b, plan.b); - bsp::sync(); + bsp::put(0, x.data(), par_result.data(), d.s * plan.b, plan.b); + bsp::sync(); - bsp::pop_reg(par_result.data()); + bsp::pop_reg(par_result.data()); + } bsp::pop_reg(x.data()); bsp::end(); } static void seq_wavelet(){ - std::vector v(N); - for(unsigned int i = 0; i < N; ++i) v[i] = data(i); + std::vector v(globals.N); + for(unsigned int i = 0; i < v.size(); ++i) v[i] = data(i); { auto time1 = timer::clock::now(); - for(unsigned int i = 0; i < iterations; ++i){ + for(unsigned int i = 0; i < globals.iterations; ++i){ wvlt::wavelet(v.data(), v.size(), 1); } auto time2 = timer::clock::now(); printf("sequential version\t%f\n", timer::from_dur(time2 - time1)); } - std::copy(v.begin(), v.end(), seq_result.begin()); + if(globals.check_results) + std::copy(v.begin(), v.end(), seq_result.begin()); } // square difference, used to calculate root mean squared error @@ -117,6 +131,7 @@ static void compare_results(std::vector const & lh, std::vector } int main(int argc, char** argv){ + bsp::init(par_wavelet, argc, argv); namespace po = boost::program_options; // Describe program options @@ -141,12 +156,13 @@ int main(int argc, char** argv){ return 0; } - N = vm["n"].as(); - P = vm["p"].as(); - iterations = vm["iterations"].as(); + globals.N = vm["n"].as(); + globals.P = vm["p"].as(); + globals.iterations = vm["iterations"].as(); + globals.check_results = vm["check"].as(); - if(!is_pow_of_two(N)) throw po::error("n is not a power of two"); - if(!is_pow_of_two(P)) throw po::error("p is not a power of two"); + if(!is_pow_of_two(globals.N)) throw po::error("n is not a power of two"); + if(!is_pow_of_two(globals.P)) throw po::error("p is not a power of two"); } catch(std::exception& e){ std::cout << colors::red("ERROR: ") << e.what() << std::endl; std::cout << opts << std::endl; @@ -154,25 +170,26 @@ int main(int argc, char** argv){ } if(vm["show-input"].as()){ - std::cout << "n\t" << N << "\np\t" << P << std::endl; + std::cout << "n\t" << globals.N << "\np\t" << globals.P << std::endl; } // Initialise stuff - par_result.assign(N, 0.0); - seq_result.assign(N, 0.0); - bsp::init(par_wavelet, argc, argv); + if(globals.check_results){ + par_result.assign(globals.N, 0.0); + seq_result.assign(globals.N, 0.0); + } // Run both versions (will print timings) seq_wavelet(); par_wavelet(); // Checking equality of algorithms - if(vm["check"].as()){ + if(globals.check_results){ double threshold = 1.0e-8; std::cout << "Checking results "; compare_results(seq_result, par_result, threshold); - for(unsigned int i = 0; i < iterations; ++i) wvlt::unwavelet(seq_result.data(), seq_result.size(), 1); + for(unsigned int i = 0; i < globals.iterations; ++i) wvlt::unwavelet(seq_result.data(), seq_result.size(), 1); for(unsigned int i = 0; i < par_result.size(); ++i) par_result[i] = data(i); std::cout << "Checking inverse ";