|
|
@ -6,25 +6,26 @@ |
|
|
|
#include "wavelet.hpp" |
|
|
|
#include "wavelet_parallel.hpp" |
|
|
|
|
|
|
|
// Number of iterations to improve time measurements
|
|
|
|
static unsigned int iterations = 1; |
|
|
|
|
|
|
|
// Static :(, will be set in main
|
|
|
|
static unsigned int P; |
|
|
|
static unsigned int N; |
|
|
|
|
|
|
|
// Static vectors for correctness checking
|
|
|
|
// These can be set by the user, putting them together in a structs makes it easy to bsp::put
|
|
|
|
static struct { |
|
|
|
unsigned int P; // doesn't need to be global, as we have bsp::nprocs()
|
|
|
|
unsigned int N; |
|
|
|
unsigned int iterations; |
|
|
|
bool check_results; |
|
|
|
} globals; |
|
|
|
|
|
|
|
// Static vectors for correctness checking (allocated on precessor 0)
|
|
|
|
static std::vector<double> par_result; |
|
|
|
static std::vector<double> seq_result; |
|
|
|
|
|
|
|
// fake data
|
|
|
|
static double data(unsigned int global_index){ |
|
|
|
return global_index - N/2.0 + 0.5 + std::sin(0.1337*global_index); |
|
|
|
return global_index - globals.N/2.0 + 0.5 + std::sin(0.1337*global_index); |
|
|
|
} |
|
|
|
|
|
|
|
// NOTE: does not synchronize
|
|
|
|
static void read_and_distribute_data(wvlt::par::proc_info const & d, wvlt::par::plan_1D plan, double* x){ |
|
|
|
std::vector<double> r(plan.b); |
|
|
|
std::vector<double> r; |
|
|
|
for(unsigned int t = 0; t < d.p; ++t){ |
|
|
|
r.assign(plan.b, 0.0); |
|
|
|
for(unsigned int i = 0; i < plan.b; ++i){ |
|
|
@ -34,10 +35,21 @@ static void read_and_distribute_data(wvlt::par::proc_info const & d, wvlt::par:: |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// gets globals from processor 0
|
|
|
|
static void get_globals(){ |
|
|
|
bsp::push_reg(&globals); |
|
|
|
bsp::sync(); |
|
|
|
bsp::get(0, &globals, 0, &globals); |
|
|
|
bsp::sync(); |
|
|
|
bsp::pop_reg(&globals); |
|
|
|
} |
|
|
|
|
|
|
|
static void par_wavelet(){ |
|
|
|
bsp::begin(P); |
|
|
|
bsp::begin(globals.P); |
|
|
|
get_globals(); |
|
|
|
|
|
|
|
const wvlt::par::proc_info d(bsp::nprocs(), bsp::pid()); |
|
|
|
const wvlt::par::plan_1D plan(N, N/d.p, 2); |
|
|
|
const wvlt::par::plan_1D plan(globals.N, globals.N/d.p, 2); |
|
|
|
|
|
|
|
// We allocate and push everything up front, since we need it anyways
|
|
|
|
// (so peak memory is the same). This saves us 1 bsp::sync()
|
|
|
@ -49,7 +61,6 @@ static void par_wavelet(){ |
|
|
|
bsp::push_reg(x.data(), x.size()); |
|
|
|
bsp::push_reg(next.data(), next.size()); |
|
|
|
bsp::push_reg(proczero.data(), proczero.size()); |
|
|
|
|
|
|
|
bsp::sync(); |
|
|
|
|
|
|
|
// processor zero reads data from file
|
|
|
@ -59,7 +70,7 @@ static void par_wavelet(){ |
|
|
|
|
|
|
|
// do the parallel wavelet!!!
|
|
|
|
double time1 = bsp::time(); |
|
|
|
for(unsigned int i = 0; i < iterations; ++i){ |
|
|
|
for(unsigned int i = 0; i < globals.iterations; ++i){ |
|
|
|
wvlt::par::wavelet(d, plan, x.data(), next.data(), proczero.data()); |
|
|
|
bsp::sync(); |
|
|
|
} |
|
|
@ -73,30 +84,33 @@ static void par_wavelet(){ |
|
|
|
next.clear(); |
|
|
|
proczero.clear(); |
|
|
|
|
|
|
|
bsp::push_reg(par_result.data(), par_result.size()); |
|
|
|
bsp::sync(); |
|
|
|
if(globals.check_results){ |
|
|
|
bsp::push_reg(par_result.data(), par_result.size()); |
|
|
|
bsp::sync(); |
|
|
|
|
|
|
|
bsp::put(0, x.data(), par_result.data(), d.s * plan.b, plan.b); |
|
|
|
bsp::sync(); |
|
|
|
bsp::put(0, x.data(), par_result.data(), d.s * plan.b, plan.b); |
|
|
|
bsp::sync(); |
|
|
|
|
|
|
|
bsp::pop_reg(par_result.data()); |
|
|
|
bsp::pop_reg(par_result.data()); |
|
|
|
} |
|
|
|
bsp::pop_reg(x.data()); |
|
|
|
bsp::end(); |
|
|
|
} |
|
|
|
|
|
|
|
static void seq_wavelet(){ |
|
|
|
std::vector<double> v(N); |
|
|
|
for(unsigned int i = 0; i < N; ++i) v[i] = data(i); |
|
|
|
std::vector<double> v(globals.N); |
|
|
|
for(unsigned int i = 0; i < v.size(); ++i) v[i] = data(i); |
|
|
|
|
|
|
|
{ auto time1 = timer::clock::now(); |
|
|
|
for(unsigned int i = 0; i < iterations; ++i){ |
|
|
|
for(unsigned int i = 0; i < globals.iterations; ++i){ |
|
|
|
wvlt::wavelet(v.data(), v.size(), 1); |
|
|
|
} |
|
|
|
auto time2 = timer::clock::now(); |
|
|
|
printf("sequential version\t%f\n", timer::from_dur(time2 - time1)); |
|
|
|
} |
|
|
|
|
|
|
|
std::copy(v.begin(), v.end(), seq_result.begin()); |
|
|
|
if(globals.check_results) |
|
|
|
std::copy(v.begin(), v.end(), seq_result.begin()); |
|
|
|
} |
|
|
|
|
|
|
|
// square difference, used to calculate root mean squared error
|
|
|
@ -117,6 +131,7 @@ static void compare_results(std::vector<double> const & lh, std::vector<double> |
|
|
|
} |
|
|
|
|
|
|
|
int main(int argc, char** argv){ |
|
|
|
bsp::init(par_wavelet, argc, argv); |
|
|
|
namespace po = boost::program_options; |
|
|
|
|
|
|
|
// Describe program options
|
|
|
@ -141,12 +156,13 @@ int main(int argc, char** argv){ |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
N = vm["n"].as<unsigned int>(); |
|
|
|
P = vm["p"].as<unsigned int>(); |
|
|
|
iterations = vm["iterations"].as<unsigned int>(); |
|
|
|
globals.N = vm["n"].as<unsigned int>(); |
|
|
|
globals.P = vm["p"].as<unsigned int>(); |
|
|
|
globals.iterations = vm["iterations"].as<unsigned int>(); |
|
|
|
globals.check_results = vm["check"].as<bool>(); |
|
|
|
|
|
|
|
if(!is_pow_of_two(N)) throw po::error("n is not a power of two"); |
|
|
|
if(!is_pow_of_two(P)) throw po::error("p is not a power of two"); |
|
|
|
if(!is_pow_of_two(globals.N)) throw po::error("n is not a power of two"); |
|
|
|
if(!is_pow_of_two(globals.P)) throw po::error("p is not a power of two"); |
|
|
|
} catch(std::exception& e){ |
|
|
|
std::cout << colors::red("ERROR: ") << e.what() << std::endl; |
|
|
|
std::cout << opts << std::endl; |
|
|
@ -154,25 +170,26 @@ int main(int argc, char** argv){ |
|
|
|
} |
|
|
|
|
|
|
|
if(vm["show-input"].as<bool>()){ |
|
|
|
std::cout << "n\t" << N << "\np\t" << P << std::endl; |
|
|
|
std::cout << "n\t" << globals.N << "\np\t" << globals.P << std::endl; |
|
|
|
} |
|
|
|
|
|
|
|
// Initialise stuff
|
|
|
|
par_result.assign(N, 0.0); |
|
|
|
seq_result.assign(N, 0.0); |
|
|
|
bsp::init(par_wavelet, argc, argv); |
|
|
|
if(globals.check_results){ |
|
|
|
par_result.assign(globals.N, 0.0); |
|
|
|
seq_result.assign(globals.N, 0.0); |
|
|
|
} |
|
|
|
|
|
|
|
// Run both versions (will print timings)
|
|
|
|
seq_wavelet(); |
|
|
|
par_wavelet(); |
|
|
|
|
|
|
|
// Checking equality of algorithms
|
|
|
|
if(vm["check"].as<bool>()){ |
|
|
|
if(globals.check_results){ |
|
|
|
double threshold = 1.0e-8; |
|
|
|
std::cout << "Checking results "; |
|
|
|
compare_results(seq_result, par_result, threshold); |
|
|
|
|
|
|
|
for(unsigned int i = 0; i < iterations; ++i) wvlt::unwavelet(seq_result.data(), seq_result.size(), 1); |
|
|
|
for(unsigned int i = 0; i < globals.iterations; ++i) wvlt::unwavelet(seq_result.data(), seq_result.size(), 1); |
|
|
|
for(unsigned int i = 0; i < par_result.size(); ++i) par_result[i] = data(i); |
|
|
|
|
|
|
|
std::cout << "Checking inverse "; |
|
|
|