From bf6133d0e617ad60a8529997a7be2820507be6ba Mon Sep 17 00:00:00 2001
From: Joshua Moerman <lakseru@gmail.com>
Date: Fri, 17 Jan 2014 11:47:34 +0100
Subject: [PATCH] Puts program options in a struct, because we have to
 explicitly share it

---
 wavelet/wavelet_parallel_mockup.cpp | 85 +++++++++++++++++------------
 1 file changed, 51 insertions(+), 34 deletions(-)
diff --git a/wavelet/wavelet_parallel_mockup.cpp b/wavelet/wavelet_parallel_mockup.cpp
index d895016..e4d5d27 100644
--- a/wavelet/wavelet_parallel_mockup.cpp
+++ b/wavelet/wavelet_parallel_mockup.cpp
@@ -6,25 +6,26 @@
 #include "wavelet.hpp"
 #include "wavelet_parallel.hpp"
 
-// Number of iterations to improve time measurements
-static unsigned int iterations = 1;
-
-// Static :(, will be set in main
-static unsigned int P;
-static unsigned int N;
-
-// Static vectors for correctness checking
+// These can be set by the user, putting them together in a structs makes it easy to bsp::put
+static struct {
+	unsigned int P; // doesn't need to be global, as we have bsp::nprocs()
+	unsigned int N;
+	unsigned int iterations;
+	bool check_results;
+} globals;
+
+// Static vectors for correctness checking (allocated on precessor 0)
 static std::vector<double> par_result;
 static std::vector<double> seq_result;
 
 // fake data
 static double data(unsigned int global_index){
-	return global_index - N/2.0 + 0.5 + std::sin(0.1337*global_index);
+	return global_index - globals.N/2.0 + 0.5 + std::sin(0.1337*global_index);
 }
 
 // NOTE: does not synchronize
 static void read_and_distribute_data(wvlt::par::proc_info const & d, wvlt::par::plan_1D plan, double* x){
-	std::vector<double> r(plan.b);
+	std::vector<double> r;
 	for(unsigned int t = 0; t < d.p; ++t){
 		r.assign(plan.b, 0.0);
 		for(unsigned int i = 0; i < plan.b; ++i){
@@ -34,10 +35,21 @@ static void read_and_distribute_data(wvlt::par::proc_info const & d, wvlt::par::
 	}
 }
 
+// gets globals from processor 0
+static void get_globals(){
+	bsp::push_reg(&globals);
+	bsp::sync();
+	bsp::get(0, &globals, 0, &globals);
+	bsp::sync();
+	bsp::pop_reg(&globals);
+}
+
 static void par_wavelet(){
-	bsp::begin(P);
+	bsp::begin(globals.P);
+	get_globals();
+
 	const wvlt::par::proc_info d(bsp::nprocs(), bsp::pid());
-	const wvlt::par::plan_1D plan(N, N/d.p, 2);
+	const wvlt::par::plan_1D plan(globals.N, globals.N/d.p, 2);
 
 	// We allocate and push everything up front, since we need it anyways
 	// (so peak memory is the same). This saves us 1 bsp::sync()
@@ -49,7 +61,6 @@ static void par_wavelet(){
 	bsp::push_reg(x.data(), x.size());
 	bsp::push_reg(next.data(), next.size());
 	bsp::push_reg(proczero.data(), proczero.size());
-
 	bsp::sync();
 
 	// processor zero reads data from file
@@ -59,7 +70,7 @@ static void par_wavelet(){
 
 	// do the parallel wavelet!!!
 	double time1 = bsp::time();
-	for(unsigned int i = 0; i < iterations; ++i){
+	for(unsigned int i = 0; i < globals.iterations; ++i){
 		wvlt::par::wavelet(d, plan, x.data(), next.data(), proczero.data());
 		bsp::sync();
 	}
@@ -73,30 +84,33 @@ static void par_wavelet(){
 	next.clear();
 	proczero.clear();
 
-	bsp::push_reg(par_result.data(), par_result.size());
-	bsp::sync();
+	if(globals.check_results){
+		bsp::push_reg(par_result.data(), par_result.size());
+		bsp::sync();
 
-	bsp::put(0, x.data(), par_result.data(), d.s * plan.b, plan.b);
-	bsp::sync();
+		bsp::put(0, x.data(), par_result.data(), d.s * plan.b, plan.b);
+		bsp::sync();
 
-	bsp::pop_reg(par_result.data());
+		bsp::pop_reg(par_result.data());
+	}
 	bsp::pop_reg(x.data());
 	bsp::end();
 }
 
 static void seq_wavelet(){
-	std::vector<double> v(N);
-	for(unsigned int i = 0; i < N; ++i) v[i] = data(i);
+	std::vector<double> v(globals.N);
+	for(unsigned int i = 0; i < v.size(); ++i) v[i] = data(i);
 
 	{	auto time1 = timer::clock::now();
-		for(unsigned int i = 0; i < iterations; ++i){
+		for(unsigned int i = 0; i < globals.iterations; ++i){
 			wvlt::wavelet(v.data(), v.size(), 1);
 		}
 		auto time2 = timer::clock::now();
 		printf("sequential version\t%f\n", timer::from_dur(time2 - time1));
 	}
 
-	std::copy(v.begin(), v.end(), seq_result.begin());
+	if(globals.check_results)
+		std::copy(v.begin(), v.end(), seq_result.begin());
 }
 
 // square difference, used to calculate root mean squared error
@@ -117,6 +131,7 @@ static void compare_results(std::vector<double> const & lh, std::vector<double>
 }
 
 int main(int argc, char** argv){
+	bsp::init(par_wavelet, argc, argv);
 	namespace po = boost::program_options;
 
 	// Describe program options
@@ -141,12 +156,13 @@ int main(int argc, char** argv){
 			return 0;
 		}
 
-		N = vm["n"].as<unsigned int>();
-		P = vm["p"].as<unsigned int>();
-		iterations = vm["iterations"].as<unsigned int>();
+		globals.N = vm["n"].as<unsigned int>();
+		globals.P = vm["p"].as<unsigned int>();
+		globals.iterations = vm["iterations"].as<unsigned int>();
+		globals.check_results = vm["check"].as<bool>();
 
-		if(!is_pow_of_two(N)) throw po::error("n is not a power of two");
-		if(!is_pow_of_two(P)) throw po::error("p is not a power of two");
+		if(!is_pow_of_two(globals.N)) throw po::error("n is not a power of two");
+		if(!is_pow_of_two(globals.P)) throw po::error("p is not a power of two");
 	} catch(std::exception& e){
 		std::cout << colors::red("ERROR: ") << e.what() << std::endl;
 		std::cout << opts << std::endl;
@@ -154,25 +170,26 @@ int main(int argc, char** argv){
 	}
 
 	if(vm["show-input"].as<bool>()){
-		std::cout << "n\t" << N << "\np\t" << P << std::endl;
+		std::cout << "n\t" << globals.N << "\np\t" << globals.P << std::endl;
 	}
 
 	// Initialise stuff
-	par_result.assign(N, 0.0);
-	seq_result.assign(N, 0.0);
-	bsp::init(par_wavelet, argc, argv);
+	if(globals.check_results){
+		par_result.assign(globals.N, 0.0);
+		seq_result.assign(globals.N, 0.0);
+	}
 
 	// Run both versions (will print timings)
 	seq_wavelet();
 	par_wavelet();
 
 	// Checking equality of algorithms
-	if(vm["check"].as<bool>()){
+	if(globals.check_results){
 		double threshold = 1.0e-8;
 		std::cout << "Checking results ";
 		compare_results(seq_result, par_result, threshold);
 
-		for(unsigned int i = 0; i < iterations; ++i) wvlt::unwavelet(seq_result.data(), seq_result.size(), 1);
+		for(unsigned int i = 0; i < globals.iterations; ++i) wvlt::unwavelet(seq_result.data(), seq_result.size(), 1);
 		for(unsigned int i = 0; i < par_result.size(); ++i) par_result[i] = data(i);
 
 		std::cout << "Checking inverse ";