Browse Source

Already works (wav -> png), with blackman-window, does a lot of ffts (hardcoded length, etc)

master
Joshua Moerman 10 years ago
commit
ebb73ba5fa
  1. 6
      .gitignore
  2. 17
      CMakeLists.txt
  3. 10
      src/CMakeLists.txt
  4. 101
      src/main.cpp
  5. 119
      src/wav.hpp

6
.gitignore

@ -0,0 +1,6 @@
.DS_Store
*.user
build*
*.png
*.wav

17
CMakeLists.txt

@ -0,0 +1,17 @@
cmake_minimum_required (VERSION 2.6)
include_directories(SYSTEM "${PROJECT_SOURCE_DIR}/include/")
add_definitions( -std=c++1y )
find_package(Boost REQUIRED COMPONENTS program_options filesystem system)
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
set(libs ${libs} ${Boost_LIBRARIES})
find_package(PNG REQUIRED)
include_directories(SYSTEM ${PNG_INCLUDE_DIRS})
set(libs ${libs} ${PNG_LIBRARIES})
set(libs ${libs} fftw3)
add_subdirectory("src")

10
src/CMakeLists.txt

@ -0,0 +1,10 @@
file(GLOB sources *.cpp)
file(GLOB headers *.hpp)
foreach(source ${sources})
get_filename_component(exec ${source} NAME_WE)
add_executable(${exec} ${source} ${headers})
target_link_libraries(${exec} ${libs})
endforeach()

101
src/main.cpp

@ -0,0 +1,101 @@
#include <iostream>
#include <vector>
#include <complex>
#include <algorithm>
#include <iterator>
#include <cmath>
#include <fftw3.h>
#include <png.hpp>
#include <utilities.hpp>
#include "wav.hpp"
namespace fftw {
using complex = std::complex<double>;
static_assert(sizeof(fftw::complex) == sizeof(fftw_complex), "Types should match");
struct plan {
// fftw_plan fftw_plan_dft_1d(int n, fftw_complex *in, fftw_complex *out, int sign, unsigned flags);
// These pointers can be equal, indicating an in-place transform.
plan(int N, fftw_complex* in, fftw_complex* out){
p = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
}
plan(plan &&) = delete;
plan(plan const &) = delete;
void operator()(){
fftw_execute(p);
}
~plan(){
fftw_destroy_plan(p);
}
private:
// is actually a fftw_plan_s*
fftw_plan p;
};
// the k-th output corresponds to the frequency k/n
// The frequency -k/n is the same as the frequency (n-k)/n.
// For real-valued data the second half is redundant: out[i] is the conjugate of out[n-i]
}
std::vector<double> generate_window(unsigned int N){
std::vector<double> window(N);
auto a0 = 0.3635819;
auto a1 = 0.4891775;
auto a2 = 0.1365995;
auto a3 = 0.0106411;
for(int n = 0; n < N; ++n){
window[n] = a0 - a1*std::cos(2*M_PI*n / double(N)) + a2*std::cos(4*M_PI*n / double(N)) - a3*std::cos(6*M_PI*n / double(N));
}
return window;
}
int main(){
const int N = 1 << 13;
const int H = 1 << 13;
const int W = 1 << 9;
auto sqrtn = 1.0 / std::sqrt(N);
std::vector<fftw::complex> input(N);
auto window = generate_window(N);
// in place
fftw::plan p(N, reinterpret_cast<fftw_complex*>(input.data()), reinterpret_cast<fftw_complex*>(input.data()));
png::gray_ostream image(W, H, "dmt_song.png");
wav::handle sound("samples/dmt_song.wav");
timer t("generating image");
for(int y = 0; y < H; ++y){
sound.rewind(y * 512);
auto i = 0;
auto end = input.size();
// copy a part to the array
for(auto&& x : sound){
input[i++] = x * window[i];
if(i == end) break;
}
// pad with zero
while(i != end){
input[i++] = 0.0;
}
// fft tah shit
p();
// write to image
auto pos = W;
for(auto&& x : input){
if(!pos--) break;
auto v = sqrtn * std::abs(x);
image << png::gray_ostream::pixel(v);
}
}
}

119
src/wav.hpp

@ -0,0 +1,119 @@
#pragma once
#include <cassert>
#include <cstdint>
#include <cstring>
#include <string>
#include <fstream>
#include <boost/iterator/iterator_facade.hpp>
namespace wav {
struct riff_chunk {
char chunk_id[4]; // "RIFF"
uint32_t chunk_size; // size of rest of file
char format[4]; // "WAVE"
};
//8-bit samples are stored as unsigned bytes, ranging
// from 0 to 255. 16-bit samples are stored as 2's-complement
// signed integers, ranging from -32768 to 32767. :(
struct fmt_chunk {
char chunk_id[4]; // "fmt "
uint32_t chunk_size; // size of rest of subchunk
uint16_t audio_format; // 1 == PCM
uint16_t channels; // 1 == mono, 2 = stereo
uint32_t sample_rate; // eg. 44100
uint32_t byte_rate; // == sample_rate * channels * bits_per_sample/8
uint16_t block_align; // channels * bits_per_sample/8
uint16_t bits_per_sample; // 8
};
struct data_chunk {
char chunk_id[4]; // "data"
uint32_t chunk_size; // size of actual sound
// + data
};
static_assert(sizeof(riff_chunk) == 12, "wrong size");
static_assert(sizeof(fmt_chunk) == 24, "wrong size");
static_assert(sizeof(data_chunk) == 8, "wrong size");
class handle{
struct const_iterator; // forward decl
public:
handle(std::string filename){
file.open(filename, std::ios_base::in);
read(riff);
assert(strncmp("RIFF", riff.chunk_id, 4) == 0);
assert(strncmp("WAVE", riff.format, 4) == 0);
read(fmt);
assert(strncmp("fmt ", fmt.chunk_id, 4) == 0);
assert(fmt.audio_format == 1);
assert(fmt.channels == 1);
assert(fmt.bits_per_sample == 8);
read(data);
assert(strncmp("data", data.chunk_id, 4) == 0);
}
const_iterator begin(){
return {this};
}
const_iterator end(){
return {};
}
void rewind(unsigned int sample){
auto start = sizeof(riff) + sizeof(fmt) + sizeof(data);
auto offset = sample * fmt.bits_per_sample / 8 * fmt.channels;
file.pubseekpos(start + offset, std::ios_base::in);
}
private:
struct const_iterator : public boost::iterator_facade<const_iterator, double const, boost::forward_traversal_tag>{
const_iterator() = default;
const_iterator(handle* bp)
: back_ptr(bp)
{
++*this;
}
private:
friend class boost::iterator_core_access;
void increment() {
uint8_t data = 0;
if(back_ptr->file.sgetn(reinterpret_cast<char*>(&data), 1) > 0){
value = data / 255.0 - 0.5;
} else {
back_ptr = nullptr;
}
}
bool equal(const_iterator const& other) const {
return back_ptr == other.back_ptr;
}
double const & dereference() const {
return value;
}
handle* back_ptr = nullptr;
double value;
};
template<typename T>
void read(T& thing){
file.sgetn(reinterpret_cast<char*>(&thing), sizeof(T));
}
std::filebuf file;
riff_chunk riff;
fmt_chunk fmt;
data_chunk data;
};
}