Browse Source

Adds an optimalisation in pow_two, Updates some comments, reordered some stuff

master
Joshua Moerman 11 years ago
parent
commit
572d257e1d
  1. 9
      include/utilities.hpp
  2. 37
      wavelet/wavelet_parallel.hpp
  3. 9
      wavelet/wavelet_parallel_mockup.cpp

9
include/utilities.hpp

@ -20,10 +20,15 @@ inline unsigned int two_log(unsigned int x){
return 8*sizeof(unsigned int) - unsigned(__builtin_clz(x-1)); return 8*sizeof(unsigned int) - unsigned(__builtin_clz(x-1));
} }
// calculates 2^x (NOTE: can be improved by exponentiation by squaring) // calculates 2^x (by squaring)
inline unsigned int pow_two(unsigned int x){ inline unsigned int pow_two(unsigned int x){
unsigned int base = 2;
unsigned int y = 1; unsigned int y = 1;
while(x--) y *= 2; while(x){
if(x & 1) y *= base;
x >>= 1;
base *= base;
}
return y; return y;
} }

37
wavelet/wavelet_parallel.hpp

@ -6,17 +6,17 @@
#include "wavelet.hpp" #include "wavelet.hpp"
/* In the following function we assume any in-parameter to be already /* In the following function we assume any in-parameter to be already
* bsp::pushed. And the functions won't do any bsp::sync at the end. Both * bsp::pushed, if needed. And the functions won't do any bsp::sync at the end.
* conventions make it possible to chains functions with lesser syncs. * Both conventions make it possible to chain functions with lesser syncs.
* *
* Distribution is block distribution. * Distribution is block distribution. Wavelet is in-place.
*/ */
namespace wvlt { namespace wvlt {
namespace par { namespace par {
// The structs proc_info and plan_1D contain some often // The structs proc_info and plan_1D contain some often used
// used values in the parallel algorithm, they also // values in the parallel algorithm, they also precompute some
// precompute some constants. // constants.
// p = nproc(), s = pid() // p = nproc(), s = pid()
// prev/next = previous and next processor index // prev/next = previous and next processor index
@ -29,8 +29,9 @@ namespace wvlt {
}; };
// n = inputisze, b = blocksize, m = step_size // n = inputisze, b = blocksize, m = step_size
// Cm = communication size // Cm = communication size, small_steps = total number of steps
// TODO: describe other vars // in the wavelet transform, big_steps = number of supersteps
// doing m small steps, remainder = small_steps - m*big_steps.
struct plan_1D { struct plan_1D {
unsigned int n, b, m, Cm, small_steps, big_steps, remainder; unsigned int n, b, m, Cm, small_steps, big_steps, remainder;
@ -46,13 +47,14 @@ namespace wvlt {
return plan; return plan;
} }
inline void comm_step(proc_info const & pi, plan_1D const & plan, double* x, double* other, unsigned int size, unsigned int stride){ // Does one big step: so 1 comm. step and m comp. steps
inline void step(proc_info const & d, plan_1D const & plan, double* x, double* other, unsigned int size, unsigned int stride){
// Comminication
for(unsigned int i = 0; i < plan.Cm; ++i){ for(unsigned int i = 0; i < plan.Cm; ++i){
bsp::put(pi.prev, &x[stride*i], other, i, 1); bsp::put(d.prev, &x[stride*i], other, i, 1);
} }
} bsp::sync();
// Computation
inline void comp_step(proc_info const & d, plan_1D const & plan, double* x, double* other, unsigned int size, unsigned int stride){
unsigned int end = pow_two(plan.m); unsigned int end = pow_two(plan.m);
for(unsigned int i = 1; i < end; i <<= 1){ for(unsigned int i = 1; i < end; i <<= 1){
wavelet_mul(x, other[0], other[i], size, stride*i); wavelet_mul(x, other[0], other[i], size, stride*i);
@ -60,12 +62,7 @@ namespace wvlt {
} }
} }
inline void step(proc_info const & d, plan_1D const & plan, double* x, double* other, unsigned int size, unsigned int stride){ // Does the local part of the algorithm
comm_step(d, plan, x, other, size, stride);
bsp::sync();
comp_step(d, plan, x, other, size, stride);
}
inline void base(proc_info const & d, plan_1D const & plan, double* x, double* other, unsigned int size){ inline void base(proc_info const & d, plan_1D const & plan, double* x, double* other, unsigned int size){
// do steps of size m // do steps of size m
unsigned int stride = 1; unsigned int stride = 1;
@ -79,7 +76,7 @@ namespace wvlt {
step(d, get_remainder(plan), x, other, size, stride); step(d, get_remainder(plan), x, other, size, stride);
} }
// block distributed parallel wavelet, result is also in block distribution (in-place in x) // The whole parallel algorithm
inline void wavelet(proc_info const & d, plan_1D const & plan, double* x, double* next, double* proczero){ inline void wavelet(proc_info const & d, plan_1D const & plan, double* x, double* next, double* proczero){
// First do the local part // First do the local part
base(d, plan, x, next, plan.b); base(d, plan, x, next, plan.b);

9
wavelet/wavelet_parallel_mockup.cpp

@ -82,8 +82,8 @@ static void par_wavelet(){
// So this is not part of the parallel program anymore // So this is not part of the parallel program anymore
bsp::pop_reg(proczero.data()); bsp::pop_reg(proczero.data());
bsp::pop_reg(next.data()); bsp::pop_reg(next.data());
next.clear();
proczero.clear(); proczero.clear();
next.clear();
if(globals.check_results){ if(globals.check_results){
bsp::push_reg(par_result.data(), par_result.size()); bsp::push_reg(par_result.data(), par_result.size());
@ -144,6 +144,7 @@ int main(int argc, char** argv){
("iterations", po::value<unsigned int>()->default_value(5), "number of iterations") ("iterations", po::value<unsigned int>()->default_value(5), "number of iterations")
("help", po::bool_switch(), "show this help") ("help", po::bool_switch(), "show this help")
("show-input", po::bool_switch(), "shows the given input") ("show-input", po::bool_switch(), "shows the given input")
("seq", po::bool_switch(), "also runs the sequential algorithm")
("check", po::bool_switch(), "enables correctness checks"); ("check", po::bool_switch(), "enables correctness checks");
po::variables_map vm; po::variables_map vm;
@ -182,8 +183,10 @@ int main(int argc, char** argv){
seq_result.assign(globals.N, 0.0); seq_result.assign(globals.N, 0.0);
} }
// Run both versions (will print timings) // Run sequential algorithm if needed
seq_wavelet(); if(globals.check_results || vm["seq"].as<bool>())
seq_wavelet();
// Always run parallel algorithm
par_wavelet(); par_wavelet();
// Checking equality of algorithms // Checking equality of algorithms