Browse Source

Some more abstraction for libav* stuff.

New: mosaic with unique tiles.
master
Joshua Moerman 11 years ago
parent
commit
1d14e1f48e
  1. BIN
      lib/.DS_Store
  2. 71
      lib/av.hpp
  3. 21
      lib/av/av.cpp
  4. 45
      lib/av/av.hpp
  5. 45
      lib/av/av_base.hpp
  6. 28
      lib/av/sws.cpp
  7. 25
      lib/av/sws.hpp
  8. 2
      lib/fingerprint_traits.hpp
  9. 5
      lib/fingerprints.hpp
  10. 63
      lib/fingerprints/downscale.cpp
  11. 27
      lib/fingerprints/downscale.hpp
  12. 108
      lib/fingerprints/wavelet.cpp
  13. 47
      lib/fingerprints/wavelet.hpp
  14. 50
      lib/image_database.hpp
  15. 15
      lib/image_io.cpp
  16. 4
      lib/image_io.hpp
  17. 53
      lib/mozaic.hpp
  18. 4
      lib/read_database.hpp
  19. 3
      lib/wavelet.hpp
  20. 109
      lib/wavelet_2.hpp
  21. 37
      lib/wavelet_constants.hpp
  22. 2
      src/compress.cpp
  23. 104
      src/main.cpp

BIN
lib/.DS_Store

Binary file not shown.

71
lib/av.hpp

@ -1,71 +0,0 @@
#pragma once
extern "C" {
#include <libavutil/mem.h>
#include <libavutil/pixfmt.h>
typedef struct AVDictionary AVDictionary;
typedef struct AVFormatContext AVFormatContext;
typedef struct AVInputFormat AVInputFormat;
typedef struct AVCodecContext AVCodecContext;
typedef struct AVCodec AVCodec;
typedef struct AVPacket AVPacket;
typedef struct AVFrame AVFrame;
}
#include <memory>
#include <stdexcept>
namespace av {
// Generic error class
struct error : public std::runtime_error {
using std::runtime_error::runtime_error;
};
// Type of a free function (for unique_ptr)
template <typename T>
using deleter = void(*)(T*);
// AVFormatContext related
using format_context = std::unique_ptr<AVFormatContext, deleter<AVFormatContext>>;
format_context format_open_input(std::string const & filename, AVInputFormat* format, AVDictionary** options);
format_context format_alloc_context();
// AVCodec related
using open_codec = std::unique_ptr<AVCodecContext, deleter<AVCodecContext>>;
open_codec codec_open(AVCodecContext* ctx, AVCodec* codec, AVDictionary** options);
// AVPacket related (this is somewhat strange, but matches the usecase)
// I need to rethink this
using packet_buffer = AVPacket;
using packet = std::unique_ptr<AVPacket, deleter<AVPacket>>;
packet read_frame(format_context & ctx, packet_buffer & p);
// AVFrame related
using frame = std::unique_ptr<AVFrame, deleter<AVFrame>>;
frame frame_alloc();
frame frame_clone(frame const & f); // creates a clone with the *same* buffer
AVPixelFormat get_format(frame const & f);
// Allocator
template <typename T>
struct allocator {
using value_type = T;
using size_type = size_t;
T* allocate(size_type n) const {
auto ptr = av_malloc(n * sizeof(T));
if(!ptr) throw std::bad_alloc();
return static_cast<T*>(ptr);
}
void deallocate(T* ptr, size_type /*n*/) const noexcept {
av_free(ptr);
}
};
template <typename T, typename S>
bool operator==(allocator<T> const &, allocator<S> const &) noexcept { return true; }
template <typename T, typename S>
bool operator!=(allocator<T> const &, allocator<S> const &) noexcept { return false; }
}

21
lib/av.cpp → lib/av/av.cpp

@ -4,7 +4,6 @@ extern "C" {
#include <libavutil/frame.h>
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libswscale/swscale.h>
}
#include <string>
@ -61,4 +60,24 @@ namespace av {
return static_cast<AVPixelFormat>(f->format);
}
void crop(frame& f, int left, int top, int width, int height){
f = crop(std::move(f), left, top, width, height);
}
frame crop(frame&& f, int left, int top, int width, int height){
if(left + width > f->width || top + height > f->height) throw error("Crop sizes do not match");
auto ptr = reinterpret_cast<AVPicture*>(f.get());
auto ret = av_picture_crop(ptr, ptr, av::get_format(f), top, left);
if(ret < 0) throw error("Could not crop");
f->width = width;
f->height = height;
return std::move(f);
}
frame crop(const frame& f, int left, int top, int width, int height){
return crop(frame_clone(f), left, top, width, height);
}
}

45
lib/av/av.hpp

@ -0,0 +1,45 @@
#pragma once
#include "av_base.hpp"
extern "C" {
#include <libavutil/pixfmt.h>
typedef struct AVDictionary AVDictionary;
typedef struct AVFormatContext AVFormatContext;
typedef struct AVInputFormat AVInputFormat;
typedef struct AVCodecContext AVCodecContext;
typedef struct AVCodec AVCodec;
typedef struct AVPacket AVPacket;
typedef struct AVFrame AVFrame;
}
#include <memory>
namespace av {
// AVFormatContext related
using format_context = av::unique_ptr<AVFormatContext>;
format_context format_open_input(std::string const & filename, AVInputFormat* format, AVDictionary** options);
format_context format_alloc_context();
// AVCodec related
using open_codec = av::unique_ptr<AVCodecContext>;
open_codec codec_open(AVCodecContext* ctx, AVCodec* codec, AVDictionary** options);
// AVPacket related (this is somewhat strange, but matches the usecase)
// I need to rethink this
using packet_buffer = AVPacket;
using packet = av::unique_ptr<AVPacket>;
packet read_frame(format_context & ctx, packet_buffer & p);
// AVFrame related
using frame = av::unique_ptr<AVFrame>;
frame frame_alloc();
frame frame_clone(frame const & f); // creates a clone with the *same* buffer
AVPixelFormat get_format(frame const & f);
// Does not copy data, it acts as a view
void crop(frame & f, int left, int top, int width, int height);
frame crop(frame && f, int left, int top, int width, int height);
frame crop(frame const & f, int left, int top, int width, int height);
}

45
lib/av/av_base.hpp

@ -0,0 +1,45 @@
#pragma once
extern "C" {
#include <libavutil/mem.h>
}
#include <stdexcept>
#include <memory>
namespace av {
// Generic error class
struct error : public std::runtime_error {
using runtime_error::runtime_error;
};
// Type of a freeing function (for unique_ptr)
template <typename T>
using deleter = void(*)(T*);
// Often used type
template <typename T>
using unique_ptr = std::unique_ptr<T, deleter<T>>;
// Allocator
template <typename T>
struct allocator {
using value_type = T;
using size_type = size_t;
T* allocate(size_type n) const {
auto ptr = av_malloc(n * sizeof(T));
if(!ptr) throw std::bad_alloc();
return static_cast<T*>(ptr);
}
void deallocate(T* ptr, size_type /*n*/) const noexcept {
av_free(ptr);
}
};
template <typename T, typename S>
bool operator==(allocator<T> const &, allocator<S> const &) noexcept { return true; }
template <typename T, typename S>
bool operator!=(allocator<T> const &, allocator<S> const &) noexcept { return false; }
}

28
lib/av/sws.cpp

@ -0,0 +1,28 @@
#include "sws.hpp"
extern "C" {
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
}
//auto sws_context = sws_getContext(input->width, input->height, av::get_format(input), frame_part->width, frame_part->height, av::get_format(frame_part), 0, nullptr, nullptr, nullptr);
//if(!sws_context) throw std::runtime_error("boem sws context");
//sws_scale (sws_context, {input->data}, {input->linesize}, 0, input->height, {frame_part->data}, {frame_part->linesize});
//sws_freeContext(sws_context);
namespace sws {
context create_context(av::frame const & src, av::frame const & dest, int flags, SwsFilter * src_filter, SwsFilter * dest_filter, double * params){
auto ptr = sws_getContext(src->width, src->height, av::get_format(src), dest->width, dest->height, av::get_format(dest), flags, src_filter, dest_filter, params);
if(!ptr) throw error("boem getContext");
return {ptr, &sws_freeContext};
}
void scale(context const & c, av::frame const & src, av::frame const & dest){
auto ret = sws_scale(c.get(), {src->data}, {src->linesize}, 0, src->height, {dest->data}, {dest->linesize});
if(ret < 0) throw error("boem scale");
}
}

25
lib/av/sws.hpp

@ -0,0 +1,25 @@
#pragma once
#include "av.hpp"
extern "C" {
typedef struct SwsFilter SwsFilter;
typedef struct SwsContext SwsContext;
}
#include <memory>
#include <stdexcept>
#include <vector>
namespace sws{
template <typename T>
using deleter = av::deleter<T>;
struct error : std::runtime_error {
using runtime_error::runtime_error;
};
using context = std::unique_ptr<SwsContext, deleter<SwsContext>>;
context create_context(av::frame const & src, av::frame const & dest, int flags = 0, SwsFilter * src_filter = nullptr, SwsFilter * dest_filter = nullptr, double * params = nullptr);
void scale(context const & c, av::frame const & src, av::frame const & dest);
}

2
lib/fingerprint_traits.hpp

@ -1,6 +1,6 @@
#pragma once
#include"av.hpp"
#include <av/av.hpp>
#include <utility>
// A fingerprint is used to speed up comparison. However, this is done assymetrically. Only the

5
lib/fingerprints.hpp

@ -1,4 +1,5 @@
#pragma once
#include "fingerprints/wavelet.hpp"
#include "fingerprints/downscale.hpp"
#include "fingerprint_traits.hpp"
#include "fingerprints/rgb.hpp"
#include "fingerprints/wvlt_rgb.hpp"

63
lib/fingerprints/downscale.cpp

@ -1,63 +0,0 @@
#include "downscale.hpp"
#include <image_io.hpp>
extern "C" {
#include <libavutil/frame.h>
#include <libswscale/swscale.h>
}
#include <algorithm>
#include <iostream>
static raw_rgb_image downscale_step(AVFrame const * frame, int factor) {
raw_rgb_image image(frame->width / factor, frame->height / factor);
auto context = sws_getContext(frame->width, frame->height, static_cast<AVPixelFormat>(frame->format), image.width(), image.height(), image.format(), 0, nullptr, nullptr, nullptr);
if(!context) throw std::runtime_error("boem sws context");
sws_scale (context, {frame->data}, {frame->linesize}, 0, frame->height, {image.frame->data}, {image.frame->linesize});
sws_freeContext(context);
return image;
}
static raw_rgb_image downscale_to(const av::frame &frame, int w, int h){
// ffmpeg doesnt let us downscale all the way to 5 at once :(, so we do a loop
raw_rgb_image image;
auto new_frame = frame.get();
while(new_frame->width > 8*w && new_frame->height > 8*h){
image = downscale_step(new_frame, 4);
new_frame = image.frame.get();
}
return to_raw_rgb_image(image.frame, w, h);
}
downscale downscale::pre_calculate(av::frame const & frame){
auto const image = downscale_to(crop_to_square(frame), 5, 5);
downscale ret;
ret.data.assign(image.data.size(), 0);
std::copy(image.data.begin(), image.data.end(), ret.data.begin());
return ret;
}
downscale downscale::calculate(const av::frame& frame){
return pre_calculate(frame);
}
static double square(double x){
return x*x;
}
double downscale::distance_to(const downscale& fingerprint) const {
assert(data.size() == fingerprint.data.size());
double distance = 0;
for(size_t i = 0; i < data.size(); ++i){
distance += square(data[i] - fingerprint.data[i]);
}
return distance;
}

27
lib/fingerprints/downscale.hpp

@ -1,27 +0,0 @@
#pragma once
#include <av.hpp>
#include <fingerprint_traits.hpp>
#include <boost/serialization/access.hpp>
#include <boost/serialization/vector.hpp>
#include <iosfwd>
#include <vector>
struct downscale {
std::vector<uint8_t> data;
static downscale pre_calculate(av::frame const & frame);
static downscale calculate(av::frame const & frame);
double distance_to(downscale const & fingerprint) const;
private:
friend class boost::serialization::access;
template<class Archive>
void serialize(Archive & ar, const unsigned int /*version*/){
ar & data;
}
};
std::ostream& operator<<(std::ostream& out, downscale const & x);

108
lib/fingerprints/wavelet.cpp

@ -1,108 +0,0 @@
#include "wavelet.hpp"
#include <wavelet.hpp>
#include <utilities.hpp>
#include <image_io.hpp>
#include <ostream>
#include <cmath>
static const int size = 512;
rgb_wavelet_coefficients::pre_fingerprint rgb_wavelet_coefficients::pre_calculate(av::frame const & frame) {
auto const image = to_raw_rgb_image(crop_to_square(frame), size, size);
rgb_wavelet_coefficients::pre_fingerprint ret;
// for every color
for(unsigned int color = 0; color < 3; ++color){
auto & vector = ret[color];
vector.assign(make_u(image.width() * image.height()), 0);
for(unsigned int n = 0; n < make_u(image.width() * image.height()); ++n){
vector[n] = 2.0 * image.data[3*n + color] / double(255) - 1.0;
}
wvlt::wavelet_2D(vector.data(), make_u(image.width()), make_u(image.height()));
}
return ret;
}
rgb_wavelet_coefficients rgb_wavelet_coefficients::calculate(av::frame const & frame){
auto const image = to_raw_rgb_image(crop_to_square(frame), size, size);
rgb_wavelet_coefficients ret;
std::vector<double> vector(make_u(image.width() * image.height()), 0);
// for every color
for(unsigned int color = 0; color < 3; ++color){
auto& coefficient_array = color == 0 ? ret.reds : (color == 1 ? ret.greens : ret.blues);
unsigned int array_index = 0;
for(unsigned int n = 0; n < make_u(image.width() * image.height()); ++n){
vector[n] = 2.0 * image.data[3*n + color] / double(255) - 1.0;
}
wvlt::wavelet_2D(vector.data(), make_u(image.width()), make_u(image.height()));
auto copy = vector;
for(auto & x : copy) x = std::abs(x);
auto const n_coefficients = coefficient_array.size();
std::nth_element(copy.begin(), copy.begin() + n_coefficients, copy.end(), std::greater<double>());
auto const threshold = copy[n_coefficients-1];
for(unsigned int n = 0; n < vector.size(); ++n){
auto const x = vector[n];
if(std::abs(x) >= threshold) {
coefficient_array[array_index++] = std::make_pair(n, x);
}
if(array_index >= coefficient_array.size()) {
break;
}
}
}
return ret;
}
static double square(double x){
return x*x;
}
double rgb_wavelet_coefficients::distance_to(pre_fingerprint const & fingerprint) const {
double distance = 0;
for(unsigned int color = 0; color < 3; ++color){
auto const & coefficients = color == 0 ? reds : (color == 1 ? greens : blues);
for(auto&& p : coefficients){
auto const x = p.second;
auto const y = fingerprint[color][p.first];
distance += square(x - y) - square(y);
}
}
return distance;
}
namespace std {
template <typename U, typename V>
ostream& operator<<(ostream& out, pair<U, V> const & p){
return out << '(' << p.first << ", " << p.second << ')';
}
}
std::ostream& operator<<(std::ostream& out, rgb_wavelet_coefficients const & x){
out << "rgb_wavelet_coefficients" << std::endl;
for(int color = 0; color < 3; ++color){
auto const & coefficient_array = color == 0 ? x.reds : (color == 1 ? x.greens : x.blues);
out << '[' << coefficient_array[0];
for(unsigned int i = 1; i < coefficient_array.size(); ++i) {
out << ", " << coefficient_array[i];
}
out << ']' << std::endl;
}
return out;
}

47
lib/fingerprints/wavelet.hpp

@ -1,47 +0,0 @@
#pragma once
#include <av.hpp>
#include <fingerprint_traits.hpp>
#include <boost/serialization/access.hpp>
#include <boost/serialization/array.hpp>
#include <boost/serialization/utility.hpp>
#include <utility>
#include <array>
#include <iosfwd>
#include <vector>
namespace boost {
namespace serialization {
template<class Archive, class T, size_t N>
void serialize(Archive & ar, std::array<T,N> & a, const unsigned int /*version*/) {
ar & make_array(a.data(), a.size());
}
} // namespace serialization
} // namespace boost
struct rgb_wavelet_coefficients {
// a double for (x, y) location represented in a single int
using coefficient = std::pair<int, double>;
using pre_fingerprint = std::array<std::vector<double>, 3>;
std::array<coefficient, 20> reds;
std::array<coefficient, 20> greens;
std::array<coefficient, 20> blues;
static pre_fingerprint pre_calculate(av::frame const & frame);
static rgb_wavelet_coefficients calculate(av::frame const & frame);
double distance_to(pre_fingerprint const & fingerprint) const;
private:
friend class boost::serialization::access;
template<class Archive>
void serialize(Archive & ar, const unsigned int /*version*/){
ar & reds;
ar & greens;
ar & blues;
}
};
std::ostream& operator<<(std::ostream& out, rgb_wavelet_coefficients const & x);

50
lib/image_database.hpp

@ -1,50 +0,0 @@
#pragma once
#include <image_io.hpp>
#include <fingerprints.hpp>
#include <boost/serialization/access.hpp>
#include <boost/serialization/vector.hpp>
#include <vector>
#include <string>
template <typename Fingerprint, typename Traits = fingerprint_traits<Fingerprint>>
struct image_database {
using index = size_t;
auto filename(index i) const { return filenames[i]; }
auto fingerprint(index i) const { return fingerprints[i]; }
auto size() const { return fingerprints.size(); }
void add(std::string const & filename){
auto && image = open_image(filename);
auto && fingerprint = Traits::calculate(image);
filenames.push_back(filename);
fingerprints.push_back(fingerprint);
}
//! returns a list of distances along with their index
auto distances_for_image(av::frame const & image) const {
std::vector<std::pair<typename Traits::distance_type, index>> ret;
ret.reserve(size());
auto const pre_fingerprint = Traits::pre_calculate(image);
for(auto&& fingerprint : fingerprints){
ret.emplace_back(Traits::distance(fingerprint, pre_fingerprint), ret.size());
}
return ret;
}
private:
friend class boost::serialization::access;
template<class Archive>
void serialize(Archive & ar, const unsigned int /*version*/){
ar & filenames;
ar & fingerprints;
}
std::vector<std::string> filenames;
std::vector<Fingerprint> fingerprints;
};

15
lib/image_io.cpp

@ -9,10 +9,10 @@ extern "C" {
#include <libswscale/swscale.h>
}
#include <stdexcept>
#include <iostream>
#include <algorithm>
#include <cassert>
#include <iostream>
#include <stdexcept>
raw_rgb_image::raw_rgb_image()
@ -95,17 +95,12 @@ av::frame open_image(std::string const & filename){
void crop_to_square(av::frame& frame){
int diff = frame->height - frame->width;
int ret = 0;
int diff = frame->width - frame->height;
if(diff > 0) {
ret = av_picture_crop(reinterpret_cast<AVPicture*>(frame.get()), reinterpret_cast<AVPicture*>(frame.get()), av::get_format(frame), diff/2, 0);
frame->height = frame->width;
av::crop(frame, diff/2, 0, frame->height, frame->height);
} else if(diff < 0) {
ret = av_picture_crop(reinterpret_cast<AVPicture*>(frame.get()), reinterpret_cast<AVPicture*>(frame.get()), av::get_format(frame), 0, -diff/2);
frame->width = frame->height;
av::crop(frame, 0, -diff/2, frame->width, frame->width);
}
if(ret < 0) throw std::runtime_error("boem crop");
}
av::frame crop_to_square(const av::frame& frame){

4
lib/image_io.hpp

@ -1,11 +1,11 @@
#pragma once
#include "av.hpp"
#include <av/av.hpp>
#include <functional>
#include <memory>
#include <string>
#include <vector>
#include <functional>
// Basic image representation
// 3 bytes per pixel (rgb), so size of data is width*height*3

53
lib/mozaic.hpp

@ -1,9 +1,11 @@
#pragma once
#include <utilities.hpp>
#include <vector>
#include <string>
#include <algorithm>
#include <random>
#include <set>
#include <string>
#include <vector>
template <typename T>
struct Mozaic {
@ -39,13 +41,54 @@ auto mozaic_fmap(Mozaic<T> const & mozaic, F&& f){
//! Currently takes one of the (slack+1) best fitting images
template <typename Distance>
auto optimize(Mozaic<std::vector<Distance>> distances, int slack){
auto optimize(Mozaic<std::vector<Distance>> const & distances, int slack){
std::random_device rd;
std::uniform_int_distribution<int> dist(0, slack);
auto rand = [&]{ return dist(rd); };
return mozaic_fmap(distances, [&](auto d){
std::nth_element(begin(d), begin(d) + slack, end(d));
return (begin(d) + rand())->second;
auto o = rand();
std::nth_element(begin(d), begin(d) + o, end(d));
return (begin(d) + o)->second;
});
}
static double square(double x) { return x * x; }
//! Chooses best tiles left, starting from the center
template <typename Distance>
auto optimize_unique(Mozaic<std::vector<Distance>> const & distances){
std::vector<std::tuple<double, size_t, size_t>> queue;
queue.reserve(make_u(distances.h_tiles * distances.v_tiles));
for(auto r = 0; r < distances.v_tiles; ++r){
for(auto c = 0; c < distances.h_tiles; ++c){
auto const aspect = distances.h_tiles / double(distances.v_tiles);
auto const d = square(0.5*(distances.h_tiles) - c) + square(aspect*(0.5*(distances.v_tiles) - r));
queue.emplace_back(d, r, c);
}
}
std::random_device random_device;
std::mt19937 generator(random_device());
std::shuffle(begin(queue), end(queue), generator);
std::sort(begin(queue), end(queue));
using index = decltype(distances[0][0][0].second);
std::set<index> used;
Mozaic<index> ret(distances.h_tiles, distances.v_tiles);
for(auto&& t : queue){
auto const r = std::get<1>(t);
auto const c = std::get<2>(t);
auto v = distances[r][c];
std::partial_sort(begin(v), begin(v) + used.size() + 1, end(v));
for(auto&& p : v){
if(used.count(p.second)) continue;
used.insert(p.second);
ret[r][c] = p.second;
break;
}
}
return ret;
}

4
lib/read_database.hpp

@ -0,0 +1,4 @@
#ifndef READ_DATABASE_HPP
#define READ_DATABASE_HPP
#endif // READ_DATABASE_HPP

3
lib/wavelet.hpp

@ -1,3 +0,0 @@
#pragma once
#include "wavelet_2.hpp"

109
lib/wavelet_2.hpp

@ -1,109 +0,0 @@
#pragma once
#include <cassert>
#include "utilities.hpp"
#include "wavelet_constants.hpp"
/* Rewrite of the basic functions
* This will make the adaption for the parallel case easier,
* because we can explicitly pass the two elements which are out of range
* (these are normally wrap-around values)
*
* These are also faster (testcase: size = 8, stride = 1, iterations = 100000)
* V2 0.00377901
* V1 0.0345114
*
* But also less abstract (which can be both a good thing and bad thing)
*
* wavelet function does not shuffle!
*/
namespace wvlt{
inline namespace V2 {
inline double inner_product(double* x, double const* coef, unsigned int stride){
return x[0] * coef[0] + x[stride] * coef[1] + x[2*stride] * coef[2] + x[3*stride] * coef[3];
}
// will calculate part of wavelete transform (in place!)
// size is size of vector x (so x[size-1] is valid)
// does not calculate "last two" elements (it does not assume periodicity)
// calculates size/stride - 2 elements of the output
inline void wavelet_mul_base(double* x, unsigned int size, unsigned int stride){
assert(x && is_even(size) && is_pow_of_two(stride) && 4*stride <= size);
for(unsigned int i = 0; i < size - 2*stride; i += 2*stride){
double y1 = inner_product(x + i, evn_coef, stride);
double y2 = inner_product(x + i, odd_coef, stride);
x[i] = y1;
x[i+stride] = y2;
}
}
// x1 and x2 are next elements, or wrap around
// calculates size/stride elements of the output
inline void wavelet_mul(double* x, double x1, double x2, unsigned int size, unsigned int stride){
assert(x && is_even(size) && is_pow_of_two(stride) && 2*stride <= size);
if(4*stride <= size)
wavelet_mul_base(x, size, stride);
unsigned int i = size - 2*stride;
double y1 = x[i] * evn_coef[0] + x[i+stride] * evn_coef[1] + x1 * evn_coef[2] + x2 * evn_coef[3];
double y2 = x[i] * odd_coef[0] + x[i+stride] * odd_coef[1] + x1 * odd_coef[2] + x2 * odd_coef[3];
x[i] = y1;
x[i+stride] = y2;
}
// will overwrite x, x2 and x1 are previous elements, or wrap around
// size is size of vector x (so x[size-1] is valid)
inline void wavelet_inv(double* x, double x1, double x2, unsigned int size, unsigned int stride){
assert(x && is_even(size) && is_pow_of_two(stride) && 4*stride <= size);
for(unsigned int i = size - 2*stride; i >= 2*stride; i -= 2*stride){
double y1 = inner_product(x + i - 2*stride, evn_coef_inv, stride);
double y2 = inner_product(x + i - 2*stride, odd_coef_inv, stride);
x[i] = y1;
x[i+stride] = y2;
}
unsigned int i = 0;
double y1 = x2 * evn_coef_inv[0] + x1 * evn_coef_inv[1] + x[i] * evn_coef_inv[2] + x[i+stride] * evn_coef_inv[3];
double y2 = x2 * odd_coef_inv[0] + x1 * odd_coef_inv[1] + x[i] * odd_coef_inv[2] + x[i+stride] * odd_coef_inv[3];
x[i] = y1;
x[i+stride] = y2;
}
// size indicates number of elements to process (so this is different from above!)
inline void wavelet(double* x, unsigned int size, unsigned int stride){
assert(x && is_pow_of_two(size) && size >= 4);
auto full_size = stride*size;
for(unsigned int i = 1; i <= size / 4; i <<= 1){
auto j = stride * i;
wavelet_mul(x, x[0], x[j], full_size, j);
}
}
inline void wavelet_2D(double* in, unsigned int width, unsigned int height){
for(unsigned int y = 0; y < height; ++y)
wavelet(in + y*width, width, 1);
for(unsigned int x = 0; x < width; ++x)
wavelet(in + x, height, width);
}
// size indicates number of elements to process (so this is different from above!)
inline void unwavelet(double* x, unsigned int size, unsigned int stride){
assert(x && is_pow_of_two(size) && size >= 4);
auto full_size = stride*size;
for(unsigned int i = size / 4; i >= 1; i >>= 1){
auto j = stride * i;
wavelet_inv(x, x[full_size-j], x[full_size-2*j], full_size, j);
}
}
inline void unwavelet_2D(double* in, unsigned int width, unsigned int height){
for(unsigned int x = 0; x < width; ++x)
unwavelet(in + x, height, width);
for(unsigned int y = 0; y < height; ++y)
unwavelet(in + y*width, width, 1);
}
}
}

37
lib/wavelet_constants.hpp

@ -1,37 +0,0 @@
#pragma once
#include <cmath>
namespace wvlt {
// first row of the matrix Wn
static double const evn_coef[] = {
(1.0 + std::sqrt(3.0))/(std::sqrt(32.0)),
(3.0 + std::sqrt(3.0))/(std::sqrt(32.0)),
(3.0 - std::sqrt(3.0))/(std::sqrt(32.0)),
(1.0 - std::sqrt(3.0))/(std::sqrt(32.0))
};
// second row of the matrix Wn
static double const odd_coef[] = {
evn_coef[3],
-evn_coef[2],
evn_coef[1],
-evn_coef[0]
};
// first (shifted) row of the matrix Wn^-1
static double const evn_coef_inv[] = {
evn_coef[2],
evn_coef[1],
evn_coef[0],
evn_coef[3]
};
// second (shifted) row of the matrix Wn^-1
static double const odd_coef_inv[] = {
evn_coef[3],
-evn_coef[0],
evn_coef[1],
-evn_coef[2]
};
}

2
src/compress.cpp

@ -5,7 +5,7 @@
*/
#include <image_io.hpp>
#include <wavelet.hpp>
#include <wvlt/wavelet.hpp>
#include <boost/filesystem.hpp>

104
src/main.cpp

@ -1,12 +1,12 @@
#include <image_io.hpp>
#include <wavelet.hpp>
#include <image_database.hpp>
#include <database.hpp>
#include <fingerprints.hpp>
#include <image_io.hpp>
#include <mozaic.hpp>
#include <read_database.hpp>
#include <utilities.hpp>
#include <av/sws.hpp>
#include <boost/filesystem.hpp>
#include <boost/archive/binary_oarchive.hpp>
#include <boost/archive/binary_iarchive.hpp>
extern "C" {
#include <libavformat/avformat.h>
@ -15,50 +15,17 @@ extern "C" {
#include <libswscale/swscale.h>
}
#include <iostream>
#include <fstream>
#include <algorithm>
#include <cmath>
#include <map>
#include <set>
#include <iostream>
#include <utility>
static const int tile_width = 128;
static const int tile_height = 128;
static const int tile_width = 500;
static const int tile_height = 500;
using namespace std;
namespace fs = boost::filesystem;
namespace ar = boost::archive;
using Database = image_database<downscale>;
using Mozaiq = Mozaic<string>;
static Database read_database(string const & database_directory){
Database db;
auto const database_file = database_directory + ".db";
if (!boost::filesystem::exists(database_file)){
fs::path const directory(database_directory);
fs::directory_iterator eod;
for(fs::directory_iterator it(directory); it != eod; ++it){
auto const path = it->path();
auto const ext = path.extension();
if(ext != ".png" && ext != ".jpg") continue;
cout << colors::green("adding: ") << path.string() << endl;
db.add(path.string());
}
ofstream file(database_file);
ar::binary_oarchive archive(file);
archive << db;
} else {
ifstream file(database_file);
ar::binary_iarchive archive(file);
archive >> db;
}
return db;
}
using Metric = downscale;
using Database = image_database<Metric>;
static auto calculate_all_distances(Database const & db, string const & filename, int h_tiles, int v_tiles){
using return_type = decltype(db.distances_for_image(std::declval<av::frame const &>()));
@ -71,50 +38,33 @@ static auto calculate_all_distances(Database const & db, string const & filename
return mozaic;
}
template <typename Mozaiq>
static void save_mozaic(Mozaiq const & mozaic, string filename){
auto const pix_fmt = AV_PIX_FMT_YUVJ444P;
auto const codec_id= AV_CODEC_ID_MJPEG;
// Open all files we need
map<string, av::frame> frames;
for(auto&& y : mozaic.tiles){
for(auto&& x : y){
// only open a file once
if(frames.count(x) > 0) continue;
frames.emplace(x, crop_to_square(open_image(x)));
}
}
auto const total_width = mozaic.h_tiles * tile_width;
auto const total_height = mozaic.v_tiles * tile_height;
// Create output frame
std::vector<uint8_t, av::allocator<uint8_t>> data(make_u(avpicture_get_size(pix_fmt, total_width, total_height)), 0);
av::frame frame = av::frame_alloc();
const av::frame frame = av::frame_alloc();
avpicture_fill(reinterpret_cast<AVPicture*>(frame.get()), data.data(), pix_fmt, total_width, total_height);
frame->width = total_width;
frame->height = total_height;
frame->format = pix_fmt;
// For each tile: get the part, copy input to it
av::frame frame_part = av::frame_clone(frame);
for(int r = 0; r < mozaic.v_tiles; ++r) {
for(int c = 0; c < mozaic.h_tiles; ++c){
av_picture_crop(reinterpret_cast<AVPicture*>(frame_part.get()), reinterpret_cast<AVPicture*>(frame.get()), av::get_format(frame), r * tile_height, c * tile_width);
frame_part->width = tile_width;
frame_part->height = tile_height;
auto&& input = frames.at(mozaic[r][c]);
auto sws_context = sws_getContext(input->width, input->height, av::get_format(input), frame_part->width, frame_part->height, av::get_format(frame_part), 0, nullptr, nullptr, nullptr);
if(!sws_context) throw std::runtime_error("boem sws context");
sws_scale (sws_context, {input->data}, {input->linesize}, 0, input->height, {frame_part->data}, {frame_part->linesize});
sws_freeContext(sws_context);
auto frame_part = av::crop(frame, c * tile_width, r * tile_height, tile_width, tile_height);
auto input = crop_to_square(open_image(mozaic[r][c]));
auto context = sws::create_context(input, frame_part);
sws::scale(context, input, frame_part);
}
}
// Done with the input
frames.clear();
// Encode
auto codec = avcodec_find_encoder(codec_id);
if(!codec) throw av::error("Could not find codec");
@ -143,21 +93,19 @@ int main(){
av_log_set_level(AV_LOG_QUIET);
// TODO: use boost::program_options
string const database_directory = "database";
string const filename = "image.jpg";
string const output = "output.jpg";
int const h_tiles = 4 * 7;
int const v_tiles = 3 * 7;
// NOTE: we could almost do the rest on one line ;D
// TODO: make every step parallel (this should be easy, except for the IO steps)
auto const db = read_database(database_directory);
string const database_directory = "vakantie";
string const filename = "vakantie.jpg";
string const output = "output/vakantie.jpg";
int const h_tiles = 4*4;
int const v_tiles = 3*4;
auto const db = read_database<Metric>(database_directory);
cout << colors::green("Read database: ") << db.size() << endl;
auto const distances = calculate_all_distances(db, filename, h_tiles, v_tiles);
cout << colors::green("Calculated distances for ") << filename << endl;
auto const best_indices = optimize(distances, 1);
auto const best_indices = optimize_unique(distances);
cout << colors::green("Picked best distances") << endl;
auto const mozaic = mozaic_fmap(best_indices, [&](auto i){ return db.filename(i); });

Loading…
Cancel
Save