From 328dcbc47151dfc98b82d503a285ae0e7381221b Mon Sep 17 00:00:00 2001 From: Joshua Moerman Date: Mon, 18 Aug 2014 20:05:38 +0200 Subject: [PATCH] Woah, first iteration (with DAU4 wavelet metric) --- .gitignore | 6 ++ CMakeLists.txt | 15 ++++ lib/.DS_Store | Bin 0 -> 6148 bytes lib/CMakeLists.txt | 9 +++ lib/av.cpp | 64 +++++++++++++++ lib/av.hpp | 71 +++++++++++++++++ lib/fingerprint.cpp | 105 ++++++++++++++++++++++++ lib/fingerprint.hpp | 55 +++++++++++++ lib/image_database.cpp | 2 + lib/image_database.hpp | 63 +++++++++++++++ lib/image_io.cpp | 150 +++++++++++++++++++++++++++++++++++ lib/image_io.hpp | 42 ++++++++++ lib/utilities.hpp | 114 +++++++++++++++++++++++++++ lib/wavelet.hpp | 3 + lib/wavelet_2.hpp | 109 +++++++++++++++++++++++++ lib/wavelet_constants.hpp | 37 +++++++++ src/CMakeLists.txt | 12 +++ src/compress.cpp | 55 +++++++++++++ src/fingerprint_test.cpp | 19 +++++ src/main.cpp | 162 ++++++++++++++++++++++++++++++++++++++ src/needle.cpp | 47 +++++++++++ src/writer.cpp | 75 ++++++++++++++++++ 22 files changed, 1215 insertions(+) create mode 100644 .gitignore create mode 100644 CMakeLists.txt create mode 100644 lib/.DS_Store create mode 100644 lib/CMakeLists.txt create mode 100644 lib/av.cpp create mode 100644 lib/av.hpp create mode 100644 lib/fingerprint.cpp create mode 100644 lib/fingerprint.hpp create mode 100644 lib/image_database.cpp create mode 100644 lib/image_database.hpp create mode 100644 lib/image_io.cpp create mode 100644 lib/image_io.hpp create mode 100644 lib/utilities.hpp create mode 100644 lib/wavelet.hpp create mode 100644 lib/wavelet_2.hpp create mode 100644 lib/wavelet_constants.hpp create mode 100644 src/CMakeLists.txt create mode 100644 src/compress.cpp create mode 100644 src/fingerprint_test.cpp create mode 100644 src/main.cpp create mode 100644 src/needle.cpp create mode 100644 src/writer.cpp diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5d28424 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.DS_Store +build +database* +*.user +*.jpg + diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..77ad062 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,15 @@ +project(Mozaic) +cmake_minimum_required(VERSION 2.8) + +add_definitions(-std=c++1y) + +find_package(Boost REQUIRED COMPONENTS program_options filesystem system serialization) +include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) +set(libs ${libs} ${Boost_LIBRARIES}) + +# add_subdirectory("contrib") +add_subdirectory("lib") +add_subdirectory("src") + +# file(GLOB resources "resources/*") +# file(COPY ${resources} DESTINATION ${CMAKE_BINARY_DIR}) diff --git a/lib/.DS_Store b/lib/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 +#include +#include +#include +} + +#include +#include + +namespace av { + + format_context format_open_input(const std::string& filename, AVInputFormat* format, AVDictionary** options){ + AVFormatContext * ctx = nullptr; + avformat_open_input(&ctx, filename.c_str(), format, options); + if(!ctx) throw error("Unable to open input " + filename); + // This might be optional, but AFAIK people always do this after opening input + avformat_find_stream_info(ctx, options); + return {ctx, [](auto x){ avformat_close_input(&x); }}; + } + + format_context format_alloc_context(){ + auto ptr = avformat_alloc_context(); + if(!ptr) throw error("Could not allocate AVFormatContext"); + return {avformat_alloc_context(), &avformat_free_context}; + } + + + open_codec codec_open(AVCodecContext* ctx, AVCodec* codec, AVDictionary** options){ + if(!ctx) throw error("Invalid codec context"); + if(!codec) throw error("Invalid codec"); + if(avcodec_open2(ctx, codec, options) < 0) throw error("Could not open codec"); + return {ctx, [](auto x){ avcodec_close(x); }}; + } + + + packet read_frame(format_context& ctx, packet_buffer& p){ + if(!av_read_frame(ctx.get(), &p)){ + return {nullptr, &av_free_packet}; + } else { + return {&p, &av_free_packet}; + } + } + + + frame frame_alloc() { + auto ptr = av_frame_alloc(); + if(!ptr) throw error("Could not allocate AVFrame"); + return {ptr, [](auto x){ av_frame_free(&x); }}; + } + + frame frame_clone(const frame& f){ + auto ptr = av_frame_clone(f.get()); + if(!ptr) throw error("Could not clone AVFrame"); + return {ptr, [](auto x){ av_frame_free(&x); }}; + } + + AVPixelFormat get_format(const frame& f){ + return static_cast(f->format); + } + +} diff --git a/lib/av.hpp b/lib/av.hpp new file mode 100644 index 0000000..55ef0cc --- /dev/null +++ b/lib/av.hpp @@ -0,0 +1,71 @@ +#pragma once + +extern "C" { +#include +#include + +typedef struct AVDictionary AVDictionary; +typedef struct AVFormatContext AVFormatContext; +typedef struct AVInputFormat AVInputFormat; +typedef struct AVCodecContext AVCodecContext; +typedef struct AVCodec AVCodec; +typedef struct AVPacket AVPacket; +typedef struct AVFrame AVFrame; +} + +#include +#include + +namespace av { + // Generic error class + struct error : public std::runtime_error { + using std::runtime_error::runtime_error; + }; + + // Type of a free function (for unique_ptr) + template + using deleter = void(*)(T*); + + // AVFormatContext related + using format_context = std::unique_ptr>; + format_context format_open_input(std::string const & filename, AVInputFormat* format, AVDictionary** options); + format_context format_alloc_context(); + + // AVCodec related + using open_codec = std::unique_ptr>; + open_codec codec_open(AVCodecContext* ctx, AVCodec* codec, AVDictionary** options); + + // AVPacket related (this is somewhat strange, but matches the usecase) + // I need to rethink this + using packet_buffer = AVPacket; + using packet = std::unique_ptr>; + packet read_frame(format_context & ctx, packet_buffer & p); + + // AVFrame related + using frame = std::unique_ptr>; + frame frame_alloc(); + frame frame_clone(frame const & f); + AVPixelFormat get_format(frame const & f); + + // Allocator + template + struct allocator { + using value_type = T; + using size_type = size_t; + + T* allocate(size_type n) const { + auto ptr = av_malloc(n * sizeof(T)); + if(!ptr) throw std::bad_alloc(); + return static_cast(ptr); + } + + void deallocate(T* ptr, size_type /*n*/) const noexcept { + av_free(ptr); + } + }; + + template + bool operator==(allocator const &, allocator const &) noexcept { return true; } + template + bool operator!=(allocator const &, allocator const &) noexcept { return false; } +} diff --git a/lib/fingerprint.cpp b/lib/fingerprint.cpp new file mode 100644 index 0000000..0c33728 --- /dev/null +++ b/lib/fingerprint.cpp @@ -0,0 +1,105 @@ +#include "fingerprint.hpp" +#include "wavelet.hpp" +#include "utilities.hpp" + +#include +#include + +rgb_wavelet_coefficients rgb_wavelet_coefficients::calculate(const raw_rgb_image& image){ + rgb_wavelet_coefficients ret; + + std::vector vector(make_u(image.width() * image.height())); + + // for every color + for(unsigned int color = 0; color < 3; ++color){ + auto& coefficient_array = color == 0 ? ret.reds : (color == 1 ? ret.greens : ret.blues); + unsigned int array_index = 0; + + for(unsigned int n = 0; n < make_u(image.width() * image.height()); ++n){ + vector[n] = 2.0 * image.data[3*n + color] / double(255) - 1.0; + } + + wvlt::wavelet_2D(vector.data(), make_u(image.width()), make_u(image.height())); + + auto copy = vector; + for(auto & x : copy) x = std::abs(x); + + auto n_coefficients = coefficient_array.size(); + std::nth_element(copy.begin(), copy.begin() + n_coefficients, copy.end(), std::greater()); + double threshold = copy[n_coefficients-1]; + + for(unsigned int n = 0; n < vector.size(); ++n){ + auto x = vector[n]; + if(std::abs(x) >= threshold) { + coefficient_array[array_index++] = std::make_pair(n, x); + } + if(array_index >= coefficient_array.size()) { + break; + } + } + } + + return ret; +} + +static double square(double x){ + return x*x; +} + +double rgb_wavelet_coefficients::distance_to(const rgb_wavelet_coefficients& y) const { + double distance = 0; + + for(unsigned int color = 0; color < 3; ++color){ + unsigned int i = 0, j = 0; + auto& x_array = color == 0 ? reds : (color == 1 ? greens : blues); + auto& y_array = color == 0 ? y.reds : (color == 1 ? y.greens : y.blues); + + // "merge" + while(i < x_array.size() && j < y_array.size()){ + auto x_pair = x_array[i]; + auto y_pair = y_array[j]; + + if(x_pair.first == y_pair.first) { + distance += square(y_pair.second - x_pair.second); + ++i; + ++j; + } else if(x_pair.first < y_pair.first) { + distance += square(x_pair.second); + ++i; + } else { + distance += square(y_pair.second); + ++j; + } + } + + // remaining part, either x or y + for(; i < x_array.size(); ++i){ + distance += square(x_array[i].second); + } + for(; j < y_array.size(); ++j){ + distance += square(y_array[j].second); + } + } + + return distance; +} + +namespace std { + template + ostream& operator<<(ostream& out, pair const & p){ + return out << '(' << p.first << ", " << p.second << ')'; + } +} + +std::ostream& operator<<(std::ostream& out, rgb_wavelet_coefficients const & x){ + out << "rgb_wavelet_coefficients" << std::endl; + + for(int color = 0; color < 3; ++color){ + auto& coefficient_array = color == 0 ? x.reds : (color == 1 ? x.greens : x.blues); + + out << '[' << coefficient_array[0]; + for(unsigned int i = 1; i < coefficient_array.size(); ++i) out << ", " << coefficient_array[i]; + out << ']' << std::endl; + } + return out; +} diff --git a/lib/fingerprint.hpp b/lib/fingerprint.hpp new file mode 100644 index 0000000..0c6c16e --- /dev/null +++ b/lib/fingerprint.hpp @@ -0,0 +1,55 @@ +#pragma once + +#include "image_io.hpp" + +#include +#include +#include + +#include +#include +#include + +namespace boost { + namespace serialization { + template + void serialize(Archive & ar, std::array & a, const unsigned int /*version*/) { + ar & make_array(a.data(), a.size()); + } + } // namespace serialization +} // namespace boost + +// Default implementation +template +struct fingerprint_traits { + static Fingerprint calculate(raw_rgb_image const & image) { + return Fingerprint::calculate(image); + } + + static auto distance(Fingerprint const & x, Fingerprint const & y) { + return x.distance_to(y); + } +}; + +struct rgb_wavelet_coefficients { + // a double for (x, y) location represented in a single int + using coefficient = std::pair; + + std::array reds; + std::array greens; + std::array blues; + + static rgb_wavelet_coefficients calculate(raw_rgb_image const & image); + double distance_to(rgb_wavelet_coefficients const & y) const; + +private: + friend class boost::serialization::access; + template + void serialize(Archive & ar, const unsigned int /*version*/){ + ar & reds; + ar & greens; + ar & blues; + } +}; + +std::ostream& operator<<(std::ostream& out, rgb_wavelet_coefficients const & x); diff --git a/lib/image_database.cpp b/lib/image_database.cpp new file mode 100644 index 0000000..ebefd50 --- /dev/null +++ b/lib/image_database.cpp @@ -0,0 +1,2 @@ +#include "image_database.hpp" + diff --git a/lib/image_database.hpp b/lib/image_database.hpp new file mode 100644 index 0000000..61642fc --- /dev/null +++ b/lib/image_database.hpp @@ -0,0 +1,63 @@ +#pragma once + +#include +#include + +#include +#include + +#include +#include + +template > +struct image_database { + using index = size_t; + + void add(std::string filename){ + auto image = open_as_rgb(filename); + auto fingerprint = Traits::calculate(image); + + filenames.push_back(filename); + fingerprints.push_back(fingerprint); + } + + std::string filename(index i) const { + return filenames.at(i); + } + + Fingerprint fingerprint(index i) const { + return fingerprints.at(i); + } + + auto size() const { + return fingerprints.size(); + } + + index nearest_image(raw_rgb_image const & image) const { + auto fingerprint = Traits::calculate(image); + + index best_index = 0; + auto best_distance = Traits::distance(fingerprint, fingerprints[0]); + + for(index i = 1; i < fingerprints.size(); ++i){ + auto distance = Traits::distance(fingerprint, fingerprints[i]); + if(distance < best_distance) { + best_distance = distance; + best_index = i; + } + } + + return best_index; + } + +private: + friend class boost::serialization::access; + template + void serialize(Archive & ar, const unsigned int /*version*/){ + ar & filenames; + ar & fingerprints; + } + + std::vector filenames; + std::vector fingerprints; +}; diff --git a/lib/image_io.cpp b/lib/image_io.cpp new file mode 100644 index 0000000..13c8004 --- /dev/null +++ b/lib/image_io.cpp @@ -0,0 +1,150 @@ +#include "image_io.hpp" +#include "utilities.hpp" + +extern "C" { +#include +#include +#include +#include +#include +} + +#include +#include +#include +#include + + +raw_rgb_image::raw_rgb_image(int W, int H) +: data(make_u(avpicture_get_size(AV_PIX_FMT_RGB24, W, H))) +, frame(av::frame_alloc()) +{ + avpicture_fill(reinterpret_cast(frame.get()), data.data(), AV_PIX_FMT_RGB24, W, H); + frame->width = W; + frame->height = H; + frame->format = AV_PIX_FMT_RGB24; +} + +int raw_rgb_image::width() const { return frame->width; } +int raw_rgb_image::height() const { return frame->height; } +AVPixelFormat raw_rgb_image::format() const { return av::get_format(frame); } + + +void save_as_ppm(raw_rgb_image const & image, std::string const & filename) { + // Open file + FILE* file = fopen(filename.c_str(), "wb"); + if(!file) throw std::runtime_error("cannot save"); + + // Write header + fprintf(stderr, "P6\n%d %d\n255\n", image.width(), image.height()); + fprintf(file, "P6\n%d %d\n255\n", image.width(), image.height()); + + // Write pixel data + for(int y = 0; y < image.height(); y++) + fwrite(image.data.data() + 3*y*image.width(), 1, make_u(3*image.width()), file); + + // Close file + fclose(file); +} + + +av::frame open_image(std::string const & filename){ + // Open the file + auto format_context = av::format_open_input(filename, nullptr, nullptr); + + // Get the codec and let us own the buffers + auto codec_context = format_context->streams[0]->codec; + auto codec = avcodec_find_decoder(codec_context->codec_id); + codec_context->refcounted_frames = 1; + + // Open the codec + auto opened_codec = av::codec_open(codec_context, codec, nullptr); + + // Allocate frame + av::frame frame = av::frame_alloc(); + + // things to read and decode it + av::packet_buffer empty_packet; + int finished = 0; + while(auto packet = av::read_frame(format_context, empty_packet)) { + if(packet->stream_index != 0) continue; + + int ret = avcodec_decode_video2(opened_codec.get(), frame.get(), &finished, packet.get()); + if (ret <= 0) { + printf("Error [%d] while decoding frame: %s\n", ret, strerror(AVERROR(ret))); + throw std::runtime_error("boem packet"); + } + + // we only need the first frame + if(finished) break; + } + + // some decoders need extra passes + while(!finished) { + avcodec_decode_video2(opened_codec.get(), frame.get(), &finished, &empty_packet); + av_free_packet(&empty_packet); + } + + return frame; +} + + +void crop_to_square(av::frame& frame){ + int diff = frame->height - frame->width; + int ret = 0; + if(diff > 0) { + ret = av_picture_crop(reinterpret_cast(frame.get()), reinterpret_cast(frame.get()), av::get_format(frame), diff/2, 0); + frame->height = frame->width; + } else if(diff < 0) { + ret = av_picture_crop(reinterpret_cast(frame.get()), reinterpret_cast(frame.get()), av::get_format(frame), 0, -diff/2); + frame->width = frame->height; + } + + if(ret < 0) throw std::runtime_error("boem crop"); +} + +av::frame crop_to_square(av::frame && frame){ + crop_to_square(frame); + return std::move(frame); +} + +raw_rgb_image to_raw_rgb_image(av::frame const & frame, int new_width, int new_height){ + raw_rgb_image image(new_width, new_height); + + auto c = sws_getContext(frame->width, frame->height, av::get_format(frame), image.width(), image.height(), image.format(), 0, nullptr, nullptr, nullptr); + if(!c) throw std::runtime_error("boem sws context"); + sws_scale (c, {frame->data}, {frame->linesize}, 0, frame->height, {image.frame->data}, {image.frame->linesize}); + sws_freeContext(c); + + return image; +} + + +void apply_to_tiles(const std::string& filename, int h_tiles, int v_tiles, std::function fun){ + auto org_frame = open_image(filename); + + // create clone to crop + av::frame cropped_frame = av::frame_clone(org_frame); + + // create raw buffer for the callback + raw_rgb_image image(512, 512); + + // create the tiles + cropped_frame->width = org_frame->width / h_tiles; + cropped_frame->height = org_frame->height / v_tiles; + for(int r = 0; r < v_tiles; ++r){ + for(int c = 0; c < h_tiles; ++c){ + int x_crop = c * cropped_frame->width; + int y_crop = r * cropped_frame->height; + //std::cout << "crop " << x_crop << ", " << y_crop << std::endl; + av_picture_crop(reinterpret_cast(cropped_frame.get()), reinterpret_cast(org_frame.get()), av::get_format(org_frame), y_crop, x_crop); + + auto context = sws_getContext(cropped_frame->width, cropped_frame->height, av::get_format(org_frame), image.width(), image.height(), image.format(), 0, nullptr, nullptr, nullptr); + if(!context) throw std::runtime_error("boem sws context"); + sws_scale (context, {cropped_frame->data}, {cropped_frame->linesize}, 0, cropped_frame->height, {image.frame->data}, {image.frame->linesize}); + sws_freeContext(context); + + fun(c, r, image); + } + } +} diff --git a/lib/image_io.hpp b/lib/image_io.hpp new file mode 100644 index 0000000..7c67bf1 --- /dev/null +++ b/lib/image_io.hpp @@ -0,0 +1,42 @@ +#pragma once + +#include "av.hpp" + +#include +#include +#include +#include + +// Basic image representation +// 3 bytes per pixel (rgb), so size of data is width*height*3 +struct raw_rgb_image { + std::vector> data; + av::frame frame; + + raw_rgb_image(int W, int H); + + int width() const; + int height() const; + AVPixelFormat format() const; +}; + +// dumps image in ppm format +void save_as_ppm(raw_rgb_image const & image, std::string const & filename); + +// opens an image in its own format +av::frame open_image(std::string const & filename); + +// crops to the bottom right square (cheap operation) +void crop_to_square(av::frame & frame); +av::frame crop_to_square(av::frame && frame); + +// converts and resizes +raw_rgb_image to_raw_rgb_image(av::frame const & frame, int new_width, int new_height); + +// Legacy +inline raw_rgb_image open_as_rgb(const std::string &filename){ + return to_raw_rgb_image(crop_to_square(open_image(filename)), 512, 512); +} + +// apply function to every tile, fun :: Column, Row, Image -> Void +void apply_to_tiles(std::string const & filename, int h_tiles, int v_tiles, std::function fun); diff --git a/lib/utilities.hpp b/lib/utilities.hpp new file mode 100644 index 0000000..e3e8846 --- /dev/null +++ b/lib/utilities.hpp @@ -0,0 +1,114 @@ +#pragma once + +#include +#include +#include +#include +#include + +template +bool is_pow_of_two(Int n){ + return n && !(n & (n - 1)); +} + +template +bool is_even(Int n){ + return (n & 1) == 0; +} + +// Used to silence warnings, will assert in debug build +inline unsigned int make_u(int x){ + assert(x >= 0); + return static_cast(x); +} + +// calculates integer 2-log such that: +// 2^(two_log(x)) >= x > 2^(two_log(x) - 1) +inline unsigned int two_log(unsigned int x){ + if(x <= 1) return 0; + return 8*sizeof(unsigned int) - unsigned(__builtin_clz(x-1)); +} + +// calculates 2^x (by squaring) +inline unsigned int pow_two(unsigned int x){ + unsigned int base = 2; + unsigned int y = 1; + while(x){ + if(x & 1) y *= base; + x >>= 1; + base *= base; + } + return y; +} + +inline uint8_t to_uint8_t(double x){ + if(x >= 1) return 255; + if(x <= 0) return 0; + return static_cast(255*x); +} + +// Makes numbers human-readable with one decimal +// eg 2350000 becomes 2.3M +template +inline std::string human_string(Int n, std::string suffix = ""){ + static const std::string names [] = {"", "K", "M", "G"}; + unsigned int i = 0; + Int m = 10*n; + while(n > 1000 && i < sizeof(names)){ + n /= 1000; + m /= 1000; + ++i; + } + // cast is to make the old gcc 4.4 happy (it doesn't have all overloads of to_string) + return std::to_string(n) + "." + std::to_string(m % 10) + names[i] + suffix; +} + +inline std::string field(std::string const & str){ + const int length = 12; + if(str.size() > length) return str + ":\t"; + auto add = length - str.size(); + return str + ":" + std::string(add, ' ') + "\t"; +} + +// Prints a vector with brackets and commas +// Does not work recursively! +template +void print_vec(std::vector const & v){ + auto it = v.begin(), end = v.end(); + std::cout << "{" << *it++; + while(it != end) std::cout << ", " << *it++; + std::cout << "}\n"; +} + +// RAII struct for timing +struct timer{ + typedef std::chrono::high_resolution_clock clock; + typedef std::chrono::time_point time; + typedef std::chrono::duration seconds; + + std::string name; + time begin; + + timer(std::string name_) + : name(name_) + , begin(clock::now()) + {} + + ~timer(){ + time end = clock::now(); + std::cout << name << "\t" << from_dur(end - begin) << std::endl; + } + + static double from_dur(seconds s){ + return s.count(); + } +}; + +namespace colors { + inline std::string red(std::string s){ + return "\x1b[31m" + s + "\x1b[39m"; + } + inline std::string green(std::string s){ + return "\x1b[32m" + s + "\x1b[39m"; + } +} diff --git a/lib/wavelet.hpp b/lib/wavelet.hpp new file mode 100644 index 0000000..900c981 --- /dev/null +++ b/lib/wavelet.hpp @@ -0,0 +1,3 @@ +#pragma once + +#include "wavelet_2.hpp" diff --git a/lib/wavelet_2.hpp b/lib/wavelet_2.hpp new file mode 100644 index 0000000..b4f987a --- /dev/null +++ b/lib/wavelet_2.hpp @@ -0,0 +1,109 @@ +#pragma once + +#include +#include "utilities.hpp" +#include "wavelet_constants.hpp" + +/* Rewrite of the basic functions + * This will make the adaption for the parallel case easier, + * because we can explicitly pass the two elements which are out of range + * (these are normally wrap-around values) + * + * These are also faster (testcase: size = 8, stride = 1, iterations = 100000) + * V2 0.00377901 + * V1 0.0345114 + * + * But also less abstract (which can be both a good thing and bad thing) + * + * wavelet function does not shuffle! + */ + +namespace wvlt{ + inline namespace V2 { + inline double inner_product(double* x, double const* coef, unsigned int stride){ + return x[0] * coef[0] + x[stride] * coef[1] + x[2*stride] * coef[2] + x[3*stride] * coef[3]; + } + + // will calculate part of wavelete transform (in place!) + // size is size of vector x (so x[size-1] is valid) + // does not calculate "last two" elements (it does not assume periodicity) + // calculates size/stride - 2 elements of the output + inline void wavelet_mul_base(double* x, unsigned int size, unsigned int stride){ + assert(x && is_even(size) && is_pow_of_two(stride) && 4*stride <= size); + + for(unsigned int i = 0; i < size - 2*stride; i += 2*stride){ + double y1 = inner_product(x + i, evn_coef, stride); + double y2 = inner_product(x + i, odd_coef, stride); + x[i] = y1; + x[i+stride] = y2; + } + } + + // x1 and x2 are next elements, or wrap around + // calculates size/stride elements of the output + inline void wavelet_mul(double* x, double x1, double x2, unsigned int size, unsigned int stride){ + assert(x && is_even(size) && is_pow_of_two(stride) && 2*stride <= size); + if(4*stride <= size) + wavelet_mul_base(x, size, stride); + + unsigned int i = size - 2*stride; + double y1 = x[i] * evn_coef[0] + x[i+stride] * evn_coef[1] + x1 * evn_coef[2] + x2 * evn_coef[3]; + double y2 = x[i] * odd_coef[0] + x[i+stride] * odd_coef[1] + x1 * odd_coef[2] + x2 * odd_coef[3]; + x[i] = y1; + x[i+stride] = y2; + } + + // will overwrite x, x2 and x1 are previous elements, or wrap around + // size is size of vector x (so x[size-1] is valid) + inline void wavelet_inv(double* x, double x1, double x2, unsigned int size, unsigned int stride){ + assert(x && is_even(size) && is_pow_of_two(stride) && 4*stride <= size); + + for(unsigned int i = size - 2*stride; i >= 2*stride; i -= 2*stride){ + double y1 = inner_product(x + i - 2*stride, evn_coef_inv, stride); + double y2 = inner_product(x + i - 2*stride, odd_coef_inv, stride); + x[i] = y1; + x[i+stride] = y2; + } + + unsigned int i = 0; + double y1 = x2 * evn_coef_inv[0] + x1 * evn_coef_inv[1] + x[i] * evn_coef_inv[2] + x[i+stride] * evn_coef_inv[3]; + double y2 = x2 * odd_coef_inv[0] + x1 * odd_coef_inv[1] + x[i] * odd_coef_inv[2] + x[i+stride] * odd_coef_inv[3]; + x[i] = y1; + x[i+stride] = y2; + } + + // size indicates number of elements to process (so this is different from above!) + inline void wavelet(double* x, unsigned int size, unsigned int stride){ + assert(x && is_pow_of_two(size) && size >= 4); + auto full_size = stride*size; + for(unsigned int i = 1; i <= size / 4; i <<= 1){ + auto j = stride * i; + wavelet_mul(x, x[0], x[j], full_size, j); + } + } + + inline void wavelet_2D(double* in, unsigned int width, unsigned int height){ + for(unsigned int y = 0; y < height; ++y) + wavelet(in + y*width, width, 1); + for(unsigned int x = 0; x < width; ++x) + wavelet(in + x, height, width); + } + + // size indicates number of elements to process (so this is different from above!) + inline void unwavelet(double* x, unsigned int size, unsigned int stride){ + assert(x && is_pow_of_two(size) && size >= 4); + auto full_size = stride*size; + for(unsigned int i = size / 4; i >= 1; i >>= 1){ + auto j = stride * i; + wavelet_inv(x, x[full_size-j], x[full_size-2*j], full_size, j); + } + } + + inline void unwavelet_2D(double* in, unsigned int width, unsigned int height){ + for(unsigned int x = 0; x < width; ++x) + unwavelet(in + x, height, width); + for(unsigned int y = 0; y < height; ++y) + unwavelet(in + y*width, width, 1); + } + } +} diff --git a/lib/wavelet_constants.hpp b/lib/wavelet_constants.hpp new file mode 100644 index 0000000..4647816 --- /dev/null +++ b/lib/wavelet_constants.hpp @@ -0,0 +1,37 @@ +#pragma once + +#include + +namespace wvlt { + // first row of the matrix Wn + static double const evn_coef[] = { + (1.0 + std::sqrt(3.0))/(std::sqrt(32.0)), + (3.0 + std::sqrt(3.0))/(std::sqrt(32.0)), + (3.0 - std::sqrt(3.0))/(std::sqrt(32.0)), + (1.0 - std::sqrt(3.0))/(std::sqrt(32.0)) + }; + + // second row of the matrix Wn + static double const odd_coef[] = { + evn_coef[3], + -evn_coef[2], + evn_coef[1], + -evn_coef[0] + }; + + // first (shifted) row of the matrix Wn^-1 + static double const evn_coef_inv[] = { + evn_coef[2], + evn_coef[1], + evn_coef[0], + evn_coef[3] + }; + + // second (shifted) row of the matrix Wn^-1 + static double const odd_coef_inv[] = { + evn_coef[3], + -evn_coef[0], + evn_coef[1], + -evn_coef[2] + }; +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..1848345 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,12 @@ + +set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +file(GLOB sources "*.cpp") + +set(libs common avformat avcodec avutil swscale ${Boost_LIBRARIES}) + +foreach(source ${sources}) + get_filename_component(exec ${source} NAME_WE) + add_executable(${exec} ${source}) + target_link_libraries(${exec} ${libs}) +endforeach() diff --git a/src/compress.cpp b/src/compress.cpp new file mode 100644 index 0000000..159dffd --- /dev/null +++ b/src/compress.cpp @@ -0,0 +1,55 @@ +/* + * This does not literally comrpess the image + * But it shows how the image would look like after compressing and decomressing it, useful + * to see how different algorithms afect the image. + */ + +#include +#include + +#include + +extern "C" { +#include +} + +#include +#include + +using namespace std; + +int main(){ + av_register_all(); + + auto image = open_as_rgb("image.jpg"); + + std::vector vector(make_u(image.width() * image.height())); + + // for every color + for(unsigned int i = 0; i < 3; ++i){ + for(unsigned int n = 0; n < make_u(image.width() * image.height()); ++n){ + vector[n] = 2.0 * image.data[3*n + i] / double(255) - 1.0; + } + + wvlt::wavelet_2D(vector.data(), make_u(image.width()), make_u(image.height())); + + auto copy = vector; + for(auto & x : copy) x = std::abs(x); + + unsigned int n_coefficients[] = {20, 20, 20}; + std::nth_element(copy.begin(), copy.begin() + n_coefficients[i], copy.end(), std::greater()); + + for(auto & x : vector){ + if(std::abs(x) < copy[n_coefficients[i]]) x = 0; + } + + wvlt::unwavelet_2D(vector.data(), make_u(image.width()), make_u(image.height())); + + for(unsigned int n = 0; n < make_u(image.width() * image.height()); ++n){ + image.data[3*n + i] = to_uint8_t(0.5 * vector[n] + 0.5); + } + } + + save_as_ppm(image, "output.ppm"); +} + diff --git a/src/fingerprint_test.cpp b/src/fingerprint_test.cpp new file mode 100644 index 0000000..85626d0 --- /dev/null +++ b/src/fingerprint_test.cpp @@ -0,0 +1,19 @@ +#include + +extern "C" { +#include +} + +#include + +using namespace std; + +int main(){ + av_register_all(); + auto image = open_as_rgb("test.jpg"); + + rgb_wavelet_coefficients x = rgb_wavelet_coefficients::calculate(image); + + cout << x << endl; + cout << x.distance_to(x) << endl; +} diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..c161bd8 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,162 @@ +#include +#include +#include +#include + +#include +#include +#include + +extern "C" { +#include +#include +#include +#include +} + +#include +#include +#include +#include +#include +#include + +using namespace std; +namespace fs = boost::filesystem; +namespace ar = boost::archive; + +using Database = image_database; +using Mozaic = map, string>; + +Database read_database(string const & database_directory){ + image_database db; + auto database_file = database_directory + ".db"; + + if (!boost::filesystem::exists(database_file)){ + fs::path directory(database_directory); + fs::directory_iterator eod; + for(fs::directory_iterator it(directory); it != eod; ++it){ + auto && path = it->path(); + auto ext = path.extension(); + if(ext != ".png" && ext != ".jpg") continue; + + cout << colors::green("adding: ") << path.string() << endl; + db.add(path.string()); + } + + ofstream file(database_file); + ar::binary_oarchive archive(file); + archive << db; + } else { + ifstream file(database_file); + ar::binary_iarchive archive(file); + // read class state from archive + archive >> db; + } + + cout << colors::green("read database: ") << db.size() << endl; + return db; +} + +Mozaic create_mozaic(Database const & db, string const & filename, int h_tiles, int v_tiles){ + map, string> mozaic; + + apply_to_tiles("image.jpg", h_tiles, v_tiles, [&](int c, int r, raw_rgb_image const & image){ + auto index = db.nearest_image(image); + cout << colors::red("tile ") << c << ", " << r << ": " << db.filename(index) << endl; + mozaic[make_pair(c, r)] = db.filename(index); + }); + + return mozaic; +} + +void save_mozaic(Mozaic const & mozaic, string filename, int h_tiles, int v_tiles){ + const auto pix_fmt = AV_PIX_FMT_YUVJ444P; + const auto codec_id= AV_CODEC_ID_MJPEG; + + int tile_width = 128; + int tile_height = 128; + + // Open all files we need + map frames; + for(auto&& x : mozaic){ + // only open a file once + if(frames.count(x.second) > 0) continue; + frames.emplace(x.second, crop_to_square(open_image(x.second))); + } + + auto total_width = h_tiles * tile_width; + auto total_height = v_tiles * tile_height; + + // Create output frame + std::vector> data(make_u(avpicture_get_size(pix_fmt, total_width, total_height)), 0); + av::frame frame = av::frame_alloc(); + avpicture_fill(reinterpret_cast(frame.get()), data.data(), pix_fmt, total_width, total_height); + frame->width = total_width; + frame->height = total_height; + frame->format = pix_fmt; + + // For each tile: get the part, copy input to it + av::frame frame_part = av::frame_clone(frame); + for(int r = 0; r < v_tiles; ++r) { + for(int c = 0; c < h_tiles; ++c){ + av_picture_crop(reinterpret_cast(frame_part.get()), reinterpret_cast(frame.get()), av::get_format(frame), r * tile_height, c * tile_width); + frame_part->width = tile_width; + frame_part->height = tile_height; + + auto&& input = frames.at(mozaic.at(make_pair(c, r))); + auto sws_context = sws_getContext(input->width, input->height, av::get_format(input), frame_part->width, frame_part->height, av::get_format(frame_part), 0, nullptr, nullptr, nullptr); + if(!sws_context) throw std::runtime_error("boem sws context"); + sws_scale (sws_context, {input->data}, {input->linesize}, 0, input->height, {frame_part->data}, {frame_part->linesize}); + sws_freeContext(sws_context); + } + } + + // Done with the input + frames.clear(); + + // Encode + auto codec = avcodec_find_encoder(codec_id); + if(!codec) throw av::error("Could not find codec"); + + auto codec_ctx = std::unique_ptr>(avcodec_alloc_context3(codec), [](auto x){ avcodec_free_context(&x); }); + if(!codec_ctx) throw av::error("Could not allocate codec context"); + + codec_ctx->pix_fmt = pix_fmt; + codec_ctx->width = frame->width; + codec_ctx->height = frame->height; + codec_ctx->time_base = av_make_q(1, 1); + auto opened_codec = av::codec_open(codec_ctx.get(), codec, nullptr); + + const auto buffer_size = avpicture_get_size(pix_fmt, codec_ctx->width, codec_ctx->height); + std::vector buffer(make_u(buffer_size), 0); + auto output_size = avcodec_encode_video(codec_ctx.get(), buffer.data(), buffer_size, frame.get()); + assert(output_size <= buffer_size); + cout << "output size" << output_size << endl; + + auto file = fopen(filename.c_str(), "wb"); + fwrite(buffer.data(), 1, make_u(output_size), file); + fclose(file); +} + +int main(){ + av_register_all(); + + string database_directory = "database"; + string filename = "image.jpg"; + string output = "output.jpg"; + int h_tiles = 4 * 13; + int v_tiles = 3 * 13; + + const auto db = read_database(database_directory); + const auto mozaic = create_mozaic(db, filename, h_tiles, v_tiles); + save_mozaic(mozaic, output, h_tiles, v_tiles); + + // debugging + for(int r = 0; r < v_tiles; ++r){ + for(int c = 0; c < h_tiles; ++c){ + cout << mozaic.at(make_pair(c, r)) << "\t"; + } + cout << endl; + } +} diff --git a/src/needle.cpp b/src/needle.cpp new file mode 100644 index 0000000..e229328 --- /dev/null +++ b/src/needle.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +#include + +#include + +extern "C" { +#include +} + +#include +#include +#include + +using namespace std; +namespace fs = boost::filesystem; + +int main(){ + av_register_all(); + + string database_directory = "database"; + string filename = "needle.jpg"; + + image_database db; + + fs::path directory(database_directory); + fs::directory_iterator eod; + for(fs::directory_iterator it(directory); it != eod; ++it){ + auto && path = it->path(); + auto ext = path.extension(); + if(ext != ".png" && ext != ".jpg") continue; + + cout << colors::green("adding: ") << path.string() << endl; + db.add(path.string()); + } + + while(true){ + cout << "****************" << endl; + cout << colors::green("database: ") << db.size() << endl; + cout << colors::green("press enter to search match for: ") << filename << endl; + cin.ignore(); + auto index = db.nearest_image(open_as_rgb(filename)); + cout << colors::green("match: ") << db.filename(index) << endl; + } +} + diff --git a/src/writer.cpp b/src/writer.cpp new file mode 100644 index 0000000..79cf292 --- /dev/null +++ b/src/writer.cpp @@ -0,0 +1,75 @@ +#include +#include + +extern "C" { +#include +#include +#include +} + +using namespace std; + + +static void save_as_jpg(av::frame const & frame, std::string const & filename){ + const auto pix_fmt = AV_PIX_FMT_YUVJ444P; + const auto codec_id= AV_CODEC_ID_MJPEG; + + // Convert + int tile_width = 800; + int tile_height = 600; + + int h_tiles = 8; + int v_tiles = 6; + + std::vector> data(make_u(avpicture_get_size(pix_fmt, h_tiles * tile_width, v_tiles * tile_height)), 0); + av::frame converted_frame = av::frame_alloc(); + avpicture_fill(reinterpret_cast(converted_frame.get()), data.data(), pix_fmt, h_tiles * tile_width, v_tiles * tile_height); + converted_frame->width = h_tiles * tile_width; + converted_frame->height = v_tiles * tile_height; + converted_frame->format = pix_fmt; + + auto sws_context = sws_getContext(frame->width, frame->height, av::get_format(frame), tile_width, tile_height, av::get_format(converted_frame), 0, nullptr, nullptr, nullptr); + if(!sws_context) throw std::runtime_error("boem sws context"); + + av::frame cropped_frame = av::frame_clone(converted_frame); + for(int r = 0; r < v_tiles; ++r) { + for(int c = 0; c < h_tiles; ++c){ + av_picture_crop(reinterpret_cast(cropped_frame.get()), reinterpret_cast(converted_frame.get()), av::get_format(converted_frame), r * tile_height, c * tile_width); + sws_scale (sws_context, {frame->data}, {frame->linesize}, 0, frame->height, {cropped_frame->data}, {cropped_frame->linesize}); + } + } + + sws_freeContext(sws_context); + + // Encode + auto codec = avcodec_find_encoder(codec_id); + if(!codec) throw av::error("Could not find codec"); + + auto codec_ctx = std::unique_ptr>(avcodec_alloc_context3(codec), [](auto x){ avcodec_free_context(&x); }); + if(!codec_ctx) throw av::error("Could not allocate codec context"); + + codec_ctx->pix_fmt = pix_fmt; + codec_ctx->width = converted_frame->width; + codec_ctx->height = converted_frame->height; + codec_ctx->time_base = av_make_q(1, 1); + auto opened_codec = av::codec_open(codec_ctx.get(), codec, nullptr); + + const auto buffer_size = avpicture_get_size(pix_fmt, codec_ctx->width, codec_ctx->height); + std::vector buffer(make_u(buffer_size), 0); + auto output_size = avcodec_encode_video(codec_ctx.get(), buffer.data(), buffer_size, converted_frame.get()); + assert(output_size <= buffer_size); + cout << "output size" << output_size << endl; + + auto file = fopen(filename.c_str(), "wb"); + fwrite(buffer.data(), 1, make_u(output_size), file); + fclose(file); +} + +int main(){ + av_register_all(); + + while(true){ + auto image = open_image("needle.png"); + save_as_jpg(image, "output.jpg"); + } +}