compress/wavelet/wavelet_parallel.hpp

#pragma once

#include <includes.hpp>
#include <utilities.hpp>
#include <bsp.hpp>
#include "wavelet.hpp"

/* In the following function we assume any in-parameter to be already
 * bsp::pushed, if needed. And the functions won't do any bsp::sync at the end.
 * Both conventions make it possible to chain functions with lesser syncs.
 *
 * Distribution is block distribution. Wavelet is in-place.
 */

namespace wvlt {
	namespace par {
		// The structs proc_info and plan_1D contain some often used
		// values in the parallel algorithm, they also precompute some
		// constants.

		// p = nproc(), s = pid()
		// prev/next = previous and next processor index
		struct proc_info {
			unsigned int p, s, prev, next;

			proc_info(unsigned int p_, unsigned int s_)
			: p(p_), s(s_), prev((s-1+p)%p), next((s+1)%p)
			{}
		};

		// n = inputisze, b = blocksize, m = step_size
		// Cm = communication size, small_steps = total number of steps
		// in the wavelet transform, big_steps = number of supersteps
		// doing m small steps, remainder = small_steps - m*big_steps.
		struct plan_1D {
			unsigned int n, b, m, Cm, small_steps, big_steps, remainder;

			plan_1D(unsigned int n_, unsigned int b_, unsigned int m_)
			: n(n_), b(b_), m(m_), Cm(pow_two(m+1) - 2), small_steps(two_log(b)), big_steps((small_steps-1)/m), remainder(small_steps - m*big_steps)
			{}
		};

		inline plan_1D get_remainder(plan_1D plan){
			plan.m = plan.remainder;
			plan.Cm = pow_two(plan.m+1) - 2;
			plan.remainder = 0;
			return plan;
		}

		// Does one big step: so 1 comm. step and m comp. steps
		inline void step(proc_info const & d, plan_1D const & plan, double* x, double* other, unsigned int size, unsigned int stride){
			// Comminication
			for(unsigned int i = 0; i < plan.Cm; ++i){
				bsp::put(d.prev, &x[stride*i], other, i, 1);
			}
			bsp::sync();
			// Computation
			unsigned int end = pow_two(plan.m);
			for(unsigned int i = 1; i < end; i <<= 1){
				wavelet_mul(x, other[0], other[i], size, stride*i);
				if(i < end/2) wavelet_mul_base(other, 2*end - 2*i, i);
			}
		}

		// Does the local part of the algorithm
		inline void base(proc_info const & d, plan_1D const & plan, double* x, double* other, unsigned int size){
			// do steps of size m
			unsigned int stride = 1;
			for(unsigned int i = plan.big_steps; i; i--){
				step(d, plan, x, other, size, stride);
				stride <<= plan.m;
			}

			// in the case m didn't divide the total number of small steps, do the remaining part
			if(plan.remainder)
				step(d, get_remainder(plan), x, other, size, stride);
		}

		// The whole parallel algorithm
		inline void wavelet(proc_info const & d, plan_1D const & plan, double* x, double* next, double* proczero){
			// First do the local part
			base(d, plan, x, next, plan.b);

			// we only have to finish centralized if p >= 4
			if(d.p <= 2) return;

			// Then do a fan in (i.e. 2 elements to proc zero)
			bsp::put(0, x, proczero, d.s);
			bsp::sync();

			// proc zero has the privilige/duty to finish the job
			if(d.s == 0) {
				wvlt::wavelet(proczero, d.p, 1);
				// and to send it back to everyone
				for(unsigned int t = 0; t < d.p; ++t){
					bsp::put(t, &proczero[t], x);
				}
			}
		}
	}
}
Adds program options, puts parallel wvlt in separate header 11 years ago			`#pragma once`

			`#include <includes.hpp>`
			`#include <utilities.hpp>`
			`#include <bsp.hpp>`
			`#include "wavelet.hpp"`

			`/* In the following function we assume any in-parameter to be already`
Adds an optimalisation in pow_two, Updates some comments, reordered some stuff 11 years ago			`* bsp::pushed, if needed. And the functions won't do any bsp::sync at the end.`
			`* Both conventions make it possible to chain functions with lesser syncs.`
Adds program options, puts parallel wvlt in separate header 11 years ago			`*`
Adds an optimalisation in pow_two, Updates some comments, reordered some stuff 11 years ago			`* Distribution is block distribution. Wavelet is in-place.`
Adds program options, puts parallel wvlt in separate header 11 years ago			`*/`

			`namespace wvlt {`
			`namespace par {`
Adds an optimalisation in pow_two, Updates some comments, reordered some stuff 11 years ago			`// The structs proc_info and plan_1D contain some often used`
			`// values in the parallel algorithm, they also precompute some`
			`// constants.`
Splits distribution into proc_info and plan. Refacters some variables. 11 years ago
			`// p = nproc(), s = pid()`
			`// prev/next = previous and next processor index`
			`struct proc_info {`
			`unsigned int p, s, prev, next;`

			`proc_info(unsigned int p_, unsigned int s_)`
			`: p(p_), s(s_), prev((s-1+p)%p), next((s+1)%p)`
			`{}`
			`};`

			`// n = inputisze, b = blocksize, m = step_size`
Adds an optimalisation in pow_two, Updates some comments, reordered some stuff 11 years ago			`// Cm = communication size, small_steps = total number of steps`
			`// in the wavelet transform, big_steps = number of supersteps`
			`// doing m small steps, remainder = small_steps - m*big_steps.`
Splits distribution into proc_info and plan. Refacters some variables. 11 years ago			`struct plan_1D {`
			`unsigned int n, b, m, Cm, small_steps, big_steps, remainder;`

			`plan_1D(unsigned int n_, unsigned int b_, unsigned int m_)`
(committing very old stuff) No clue what has changed 9 years ago			`: n(n_), b(b_), m(m_), Cm(pow_two(m+1) - 2), small_steps(two_log(b)), big_steps((small_steps-1)/m), remainder(small_steps - m*big_steps)`
Adds program options, puts parallel wvlt in separate header 11 years ago			`{}`
			`};`

Splits distribution into proc_info and plan. Refacters some variables. 11 years ago			`inline plan_1D get_remainder(plan_1D plan){`
			`plan.m = plan.remainder;`
			`plan.Cm = pow_two(plan.m+1) - 2;`
			`plan.remainder = 0;`
			`return plan;`
Adds program options, puts parallel wvlt in separate header 11 years ago			`}`

Adds an optimalisation in pow_two, Updates some comments, reordered some stuff 11 years ago			`// Does one big step: so 1 comm. step and m comp. steps`
			`inline void step(proc_info const & d, plan_1D const & plan, double* x, double* other, unsigned int size, unsigned int stride){`
			`// Comminication`
Splits distribution into proc_info and plan. Refacters some variables. 11 years ago			`for(unsigned int i = 0; i < plan.Cm; ++i){`
Adds an optimalisation in pow_two, Updates some comments, reordered some stuff 11 years ago			`bsp::put(d.prev, &x[stride*i], other, i, 1);`
Adds program options, puts parallel wvlt in separate header 11 years ago			`}`
Adds an optimalisation in pow_two, Updates some comments, reordered some stuff 11 years ago			`bsp::sync();`
			`// Computation`
Splits distribution into proc_info and plan. Refacters some variables. 11 years ago			`unsigned int end = pow_two(plan.m);`
Adds program options, puts parallel wvlt in separate header 11 years ago			`for(unsigned int i = 1; i < end; i <<= 1){`
			`wavelet_mul(x, other[0], other[i], size, stride*i);`
			`if(i < end/2) wavelet_mul_base(other, 2end - 2i, i);`
			`}`
			`}`

Adds an optimalisation in pow_two, Updates some comments, reordered some stuff 11 years ago			`// Does the local part of the algorithm`
Splits distribution into proc_info and plan. Refacters some variables. 11 years ago			`inline void base(proc_info const & d, plan_1D const & plan, double* x, double* other, unsigned int size){`
			`// do steps of size m`
Adds program options, puts parallel wvlt in separate header 11 years ago			`unsigned int stride = 1;`
Splits distribution into proc_info and plan. Refacters some variables. 11 years ago			`for(unsigned int i = plan.big_steps; i; i--){`
			`step(d, plan, x, other, size, stride);`
			`stride <<= plan.m;`
Adds program options, puts parallel wvlt in separate header 11 years ago			`}`

Splits distribution into proc_info and plan. Refacters some variables. 11 years ago			`// in the case m didn't divide the total number of small steps, do the remaining part`
			`if(plan.remainder)`
			`step(d, get_remainder(plan), x, other, size, stride);`
Adds program options, puts parallel wvlt in separate header 11 years ago			`}`

Adds an optimalisation in pow_two, Updates some comments, reordered some stuff 11 years ago			`// The whole parallel algorithm`
Splits distribution into proc_info and plan. Refacters some variables. 11 years ago			`inline void wavelet(proc_info const & d, plan_1D const & plan, double* x, double* next, double* proczero){`
Adds program options, puts parallel wvlt in separate header 11 years ago			`// First do the local part`
Splits distribution into proc_info and plan. Refacters some variables. 11 years ago			`base(d, plan, x, next, plan.b);`
Adds program options, puts parallel wvlt in separate header 11 years ago
(committing very old stuff) No clue what has changed 9 years ago			`// we only have to finish centralized if p >= 4`
			`if(d.p <= 2) return;`

Adds program options, puts parallel wvlt in separate header 11 years ago			`// Then do a fan in (i.e. 2 elements to proc zero)`
(committing very old stuff) No clue what has changed 9 years ago			`bsp::put(0, x, proczero, d.s);`
Adds program options, puts parallel wvlt in separate header 11 years ago			`bsp::sync();`

			`// proc zero has the privilige/duty to finish the job`
			`if(d.s == 0) {`
(committing very old stuff) No clue what has changed 9 years ago			`wvlt::wavelet(proczero, d.p, 1);`
Adds program options, puts parallel wvlt in separate header 11 years ago			`// and to send it back to everyone`
			`for(unsigned int t = 0; t < d.p; ++t){`
(committing very old stuff) No clue what has changed 9 years ago			`bsp::put(t, &proczero[t], x);`
Adds program options, puts parallel wvlt in separate header 11 years ago			`}`
			`}`
			`}`
			`}`
			`}`