diff --git a/lib/av/av.cpp b/lib/av/av.cpp index e13d72e..687ffa9 100644 --- a/lib/av/av.cpp +++ b/lib/av/av.cpp @@ -27,11 +27,28 @@ namespace av { } - open_codec codec_open(AVCodecContext* ctx, AVCodec* codec, AVDictionary** options){ + codec find_encoder(AVCodecID codec_id){ + auto ptr = avcodec_find_encoder(codec_id); + if(!ptr) throw error("Could not find codec"); + return {ptr, nullptr}; + } + + codec_context context_alloc(codec const & codec){ + auto ptr = avcodec_alloc_context3(codec.get()); + if(!ptr) throw av::error("Could not allocate codec context"); + return {ptr, [](auto x){ avcodec_free_context(&x); }}; + } + + codec_context context_from_stream(format_context const & ctx, size_t i){ + return {ctx->streams[i]->codec, nullptr}; + } + + + open_guard codec_open(codec_context & ctx, codec const & codec, AVDictionary** options){ if(!ctx) throw error("Invalid codec context"); if(!codec) throw error("Invalid codec"); - if(avcodec_open2(ctx, codec, options) < 0) throw error("Could not open codec"); - return {ctx, [](auto x){ avcodec_close(x); }}; + if(avcodec_open2(ctx.get(), codec.get(), options) < 0) throw error("Could not open codec"); + return {ctx.get(), [](auto x){ avcodec_close(x); }}; } @@ -65,7 +82,7 @@ namespace av { } frame crop(frame&& f, int left, int top, int width, int height){ - if(left + width > f->width || top + height > f->height) throw error("Crop sizes do not match"); + // if(left + width > f->width || top + height > f->height) throw error("Crop sizes do not match"); auto ptr = reinterpret_cast(f.get()); auto ret = av_picture_crop(ptr, ptr, av::get_format(f), top, left); diff --git a/lib/av/av.hpp b/lib/av/av.hpp index 2545c10..cbcf1c3 100644 --- a/lib/av/av.hpp +++ b/lib/av/av.hpp @@ -4,6 +4,7 @@ extern "C" { #include +#include // only needed for AVCodecID typedef struct AVDictionary AVDictionary; typedef struct AVFormatContext AVFormatContext; @@ -18,22 +19,29 @@ typedef struct AVFrame AVFrame; namespace av { // AVFormatContext related - using format_context = av::unique_ptr; + using format_context = wrapper; format_context format_open_input(std::string const & filename, AVInputFormat* format, AVDictionary** options); format_context format_alloc_context(); // AVCodec related - using open_codec = av::unique_ptr; - open_codec codec_open(AVCodecContext* ctx, AVCodec* codec, AVDictionary** options); + using codec = wrapper; + codec find_encoder(AVCodecID codec_id); + + using codec_context = wrapper; + codec_context context_alloc(codec const & codec); + codec_context context_from_stream(format_context const & ctx, size_t i); + + using open_guard = wrapper; + open_guard codec_open(codec_context & ctx, codec const & codec, AVDictionary** options); // AVPacket related (this is somewhat strange, but matches the usecase) // I need to rethink this using packet_buffer = AVPacket; - using packet = av::unique_ptr; + using packet = wrapper; packet read_frame(format_context & ctx, packet_buffer & p); // AVFrame related - using frame = av::unique_ptr; + using frame = wrapper; frame frame_alloc(); frame frame_clone(frame const & f); // creates a clone with the *same* buffer AVPixelFormat get_format(frame const & f); diff --git a/lib/av/av_base.hpp b/lib/av/av_base.hpp index 9fe69cd..d7be969 100644 --- a/lib/av/av_base.hpp +++ b/lib/av/av_base.hpp @@ -13,13 +13,49 @@ namespace av { using runtime_error::runtime_error; }; - // Type of a freeing function (for unique_ptr) + // Type of a freeing function (for wrapper) template using deleter = void(*)(T*); - // Often used type - template - using unique_ptr = std::unique_ptr>; + // Kind of unique_ptr, but with const-semantics + template > + struct wrapper { + wrapper(T * data_ = nullptr, D deleter_ = D()) + : data(data_) + , deleter(deleter_) + {} + + wrapper(wrapper && o) + : data(o.data) + , deleter(o.deleter) + { o.release(); } + + wrapper & operator=(wrapper && o) { + data = o.data; + deleter = o.deleter; + o.release(); + return *this; + } + + wrapper(wrapper const & o) = delete; + wrapper & operator=(wrapper const & o) = delete; + + ~wrapper(){ reset(); } + + T * get() { return data; } + T const * get() const { return data; } + T & operator*() { return *get(); } + T const & operator*() const { return *get(); } + T * operator->() { return get(); } + T const * operator->() const { return get(); } + + T * release() { auto t = data; data = nullptr; return t; } + void reset() { if(data && deleter) deleter(data); data = nullptr; } + operator bool() const { return get(); } + private: + T * data; + D deleter; + }; // Allocator template diff --git a/lib/av/sws.cpp b/lib/av/sws.cpp index 62e6b0b..cb87dec 100644 --- a/lib/av/sws.cpp +++ b/lib/av/sws.cpp @@ -20,7 +20,7 @@ namespace sws { return {ptr, &sws_freeContext}; } - void scale(context const & c, av::frame const & src, av::frame const & dest){ + void scale(context & c, av::frame const & src, av::frame & dest){ auto ret = sws_scale(c.get(), {src->data}, {src->linesize}, 0, src->height, {dest->data}, {dest->linesize}); if(ret < 0) throw error("boem scale"); } diff --git a/lib/av/sws.hpp b/lib/av/sws.hpp index a48bbc2..ffcfd29 100644 --- a/lib/av/sws.hpp +++ b/lib/av/sws.hpp @@ -11,15 +11,10 @@ typedef struct SwsContext SwsContext; #include namespace sws{ - template - using deleter = av::deleter; + using namespace av; - struct error : std::runtime_error { - using runtime_error::runtime_error; - }; - - using context = std::unique_ptr>; + using context = wrapper; context create_context(av::frame const & src, av::frame const & dest, int flags = 0, SwsFilter * src_filter = nullptr, SwsFilter * dest_filter = nullptr, double * params = nullptr); - void scale(context const & c, av::frame const & src, av::frame const & dest); + void scale(context & c, av::frame const & src, av::frame & dest); } diff --git a/lib/fingerprints/math.hpp b/lib/fingerprints/math.hpp new file mode 100644 index 0000000..99cfaaf --- /dev/null +++ b/lib/fingerprints/math.hpp @@ -0,0 +1,6 @@ +#pragma once + +//! best function ever +inline double square(double x){ + return x*x; +} diff --git a/lib/fingerprints/rgb.cpp b/lib/fingerprints/rgb.cpp new file mode 100644 index 0000000..15fd0e6 --- /dev/null +++ b/lib/fingerprints/rgb.cpp @@ -0,0 +1,58 @@ +#include "rgb.hpp" +#include "math.hpp" + +#include +#include + +#include +#include + +namespace fingerprints { + +static raw_rgb_image downscale_step(av::frame const & frame, int factor) { + raw_rgb_image image(frame->width / factor, frame->height / factor); + + auto context = sws::create_context(frame, image.frame); + sws::scale(context, frame, image.frame); + + return image; +} + +static raw_rgb_image downscale_to(av::frame const & frame, int w, int h){ + // ffmpeg doesnt let us downscale all the way to 5 at once :(, so we do a loop + raw_rgb_image image; + auto* new_frame = &frame; + while((*new_frame)->width > 8*w && (*new_frame)->height > 8*h){ + image = downscale_step(*new_frame, 4); + new_frame = &image.frame; + } + return to_raw_rgb_image(image.frame, w, h); +} + +rgb rgb::pre_calculate(av::frame const & frame){ + auto const image = downscale_to(crop_to_square(frame), 5, 5); + + rgb ret; + ret.data.assign(image.data.size(), 0); + + std::copy(image.data.begin(), image.data.end(), ret.data.begin()); + + return ret; +} + +rgb rgb::calculate(const av::frame& frame){ + return pre_calculate(frame); +} + +double rgb::distance_to(const rgb& fingerprint) const { + assert(data.size() == fingerprint.data.size()); + + double distance = 0; + for(size_t i = 0; i < data.size(); ++i){ + distance += square(data[i] - fingerprint.data[i]); + } + + return distance; +} + +} // namespace fingerprints diff --git a/lib/fingerprints/rgb.hpp b/lib/fingerprints/rgb.hpp new file mode 100644 index 0000000..5c06980 --- /dev/null +++ b/lib/fingerprints/rgb.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +#include +#include + +#include +#include + + +namespace fingerprints { +struct rgb { + std::vector data; + + static rgb pre_calculate(av::frame const & frame); + static rgb calculate(av::frame const & frame); + double distance_to(rgb const & fingerprint) const; + + static std::string name(){ return "rgb-25-25-25"; } + +private: + friend class boost::serialization::access; + template + void serialize(Archive & ar, const unsigned int /*version*/){ + ar & data; + } +}; +} // namespace fingerprints diff --git a/lib/fingerprints/wvlt_rgb.cpp b/lib/fingerprints/wvlt_rgb.cpp new file mode 100644 index 0000000..059e2de --- /dev/null +++ b/lib/fingerprints/wvlt_rgb.cpp @@ -0,0 +1,88 @@ +#include "wvlt_rgb.hpp" +#include "math.hpp" + +#include +#include +#include + +#include +#include + +namespace fingerprints { + +static const int size = 512; + +wvlt_rgb::pre_fingerprint wvlt_rgb::pre_calculate(av::frame const & frame) { + auto const image = to_raw_rgb_image(crop_to_square(frame), size, size); + wvlt_rgb::pre_fingerprint ret; + + // for every color + for(unsigned int color = 0; color < 3; ++color){ + auto & vector = ret[color]; + vector.assign(make_u(image.width() * image.height()), 0); + + for(unsigned int n = 0; n < make_u(image.width() * image.height()); ++n){ + vector[n] = 2.0 * image.data[3*n + color] / double(255) - 1.0; + } + + wvlt::wavelet_2D(vector.data(), make_u(image.width()), make_u(image.height())); + } + + return ret; +} + +wvlt_rgb wvlt_rgb::calculate(av::frame const & frame){ + auto const image = to_raw_rgb_image(crop_to_square(frame), size, size); + wvlt_rgb ret; + + std::vector vector(make_u(image.width() * image.height()), 0); + + // for every color + for(unsigned int color = 0; color < 3; ++color){ + auto& coefficient_array = color == 0 ? ret.reds : (color == 1 ? ret.greens : ret.blues); + unsigned int array_index = 0; + + for(unsigned int n = 0; n < make_u(image.width() * image.height()); ++n){ + vector[n] = 2.0 * image.data[3*n + color] / double(255) - 1.0; + } + + wvlt::wavelet_2D(vector.data(), make_u(image.width()), make_u(image.height())); + + auto copy = vector; + for(auto & x : copy) x = std::abs(x); + + auto const n_coefficients = coefficient_array.size(); + std::nth_element(copy.begin(), copy.begin() + n_coefficients, copy.end(), std::greater()); + auto const threshold = copy[n_coefficients-1]; + + for(unsigned int n = 0; n < vector.size(); ++n){ + auto const x = vector[n]; + if(std::abs(x) >= threshold) { + coefficient_array[array_index++] = std::make_pair(n, x); + } + if(array_index >= coefficient_array.size()) { + break; + } + } + } + + return ret; +} + +double wvlt_rgb::distance_to(pre_fingerprint const & fingerprint) const { + double distance = 0; + + for(unsigned int color = 0; color < 3; ++color){ + auto const & coefficients = color == 0 ? reds : (color == 1 ? greens : blues); + + for(auto&& p : coefficients){ + auto const x = p.second; + auto const y = fingerprint[color][p.first]; + distance += square(x - y) - square(y); + } + } + + return distance; +} + +} // namespace fingerprints diff --git a/lib/fingerprints/wvlt_rgb.hpp b/lib/fingerprints/wvlt_rgb.hpp new file mode 100644 index 0000000..d202c28 --- /dev/null +++ b/lib/fingerprints/wvlt_rgb.hpp @@ -0,0 +1,50 @@ +#pragma once + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +namespace boost { + namespace serialization { + template + void serialize(Archive & ar, std::array & a, const unsigned int /*version*/) { + ar & make_array(a.data(), a.size()); + } + } // namespace serialization +} // namespace boost + + +namespace fingerprints { +struct wvlt_rgb { + // a double for (x, y) location represented in a single int + using coefficient = std::pair; + using pre_fingerprint = std::array, 3>; + + std::array reds; + std::array greens; + std::array blues; + + static pre_fingerprint pre_calculate(av::frame const & frame); + static wvlt_rgb calculate(av::frame const & frame); + double distance_to(pre_fingerprint const & fingerprint) const; + + static std::string name(){ return "wvlt-rgb-20x20x20"; } + +private: + friend class boost::serialization::access; + template + void serialize(Archive & ar, const unsigned int /*version*/){ + ar & reds; + ar & greens; + ar & blues; + } +}; +} // namespace fingerprints diff --git a/lib/image_io.cpp b/lib/image_io.cpp index 571e639..5a78618 100644 --- a/lib/image_io.cpp +++ b/lib/image_io.cpp @@ -1,6 +1,8 @@ #include "image_io.hpp" #include "utilities.hpp" +#include + extern "C" { #include #include @@ -11,6 +13,7 @@ extern "C" { #include #include +#include #include #include @@ -35,35 +38,17 @@ int raw_rgb_image::height() const { return frame->height; } AVPixelFormat raw_rgb_image::format() const { return av::get_format(frame); } -void save_as_ppm(raw_rgb_image const & image, std::string const & filename) { - // Open file - FILE* file = fopen(filename.c_str(), "wb"); - if(!file) throw std::runtime_error("cannot save"); - - // Write header - fprintf(stderr, "P6\n%d %d\n255\n", image.width(), image.height()); - fprintf(file, "P6\n%d %d\n255\n", image.width(), image.height()); - - // Write pixel data - for(int y = 0; y < image.height(); y++) - fwrite(image.data.data() + 3*y*image.width(), 1, make_u(3*image.width()), file); - - // Close file - fclose(file); -} - - av::frame open_image(std::string const & filename){ // Open the file auto format_context = av::format_open_input(filename, nullptr, nullptr); // Get the codec and let us own the buffers - auto codec_context = format_context->streams[0]->codec; - auto codec = avcodec_find_decoder(codec_context->codec_id); + auto codec_context = av::context_from_stream(format_context, 0); + const auto codec = avcodec_find_decoder(codec_context->codec_id); codec_context->refcounted_frames = 1; // Open the codec - auto opened_codec = av::codec_open(codec_context, codec, nullptr); + const auto opened_codec = av::codec_open(codec_context, codec, nullptr); // Allocate frame av::frame frame = av::frame_alloc(); @@ -74,7 +59,7 @@ av::frame open_image(std::string const & filename){ while(auto packet = av::read_frame(format_context, empty_packet)) { if(packet->stream_index != 0) continue; - int ret = avcodec_decode_video2(opened_codec.get(), frame.get(), &finished, packet.get()); + int ret = avcodec_decode_video2(codec_context.get(), frame.get(), &finished, packet.get()); if (ret <= 0) { printf("Error [%d] while decoding frame: %s\n", ret, strerror(AVERROR(ret))); throw std::runtime_error("boem packet"); @@ -86,7 +71,7 @@ av::frame open_image(std::string const & filename){ // some decoders need extra passes while(!finished) { - avcodec_decode_video2(opened_codec.get(), frame.get(), &finished, &empty_packet); + avcodec_decode_video2(codec_context.get(), frame.get(), &finished, &empty_packet); av_free_packet(&empty_packet); } @@ -115,40 +100,31 @@ av::frame crop_to_square(av::frame && frame){ raw_rgb_image to_raw_rgb_image(av::frame const & frame, int new_width, int new_height){ raw_rgb_image image(new_width, new_height); - auto c = sws_getContext(frame->width, frame->height, av::get_format(frame), image.width(), image.height(), image.format(), 0, nullptr, nullptr, nullptr); - if(!c) throw std::runtime_error("boem sws context"); - sws_scale (c, {frame->data}, {frame->linesize}, 0, frame->height, {image.frame->data}, {image.frame->linesize}); - sws_freeContext(c); + auto context = sws::create_context(frame, image.frame); + sws::scale(context, frame, image.frame); return image; } void apply_to_tiles(std::string const & filename, int h_tiles, int v_tiles, std::function fun) { - auto org_frame = open_image(filename); - - // create clone to crop - av::frame cropped_frame = av::frame_clone(org_frame); + const auto org_frame = open_image(filename); // create raw buffer for the callback // TODO: do not scale the cropped region raw_rgb_image image(512, 512); // create the tiles - cropped_frame->width = org_frame->width / h_tiles; - cropped_frame->height = org_frame->height / v_tiles; + const int width = org_frame->width / h_tiles; + const int height = org_frame->height / v_tiles; for(int r = 0; r < v_tiles; ++r){ for(int c = 0; c < h_tiles; ++c){ - int x_crop = c * cropped_frame->width; - int y_crop = r * cropped_frame->height; - //std::cout << "crop " << x_crop << ", " << y_crop << std::endl; - av_picture_crop(reinterpret_cast(cropped_frame.get()), reinterpret_cast(org_frame.get()), av::get_format(org_frame), y_crop, x_crop); - - auto context = sws_getContext(cropped_frame->width, cropped_frame->height, av::get_format(org_frame), image.width(), image.height(), image.format(), 0, nullptr, nullptr, nullptr); - if(!context) throw std::runtime_error("boem sws context"); - sws_scale (context, {cropped_frame->data}, {cropped_frame->linesize}, 0, cropped_frame->height, {image.frame->data}, {image.frame->linesize}); - sws_freeContext(context); + const int x_crop = c * width; + const int y_crop = r * height; + const auto cropped_frame = av::crop(org_frame, x_crop, y_crop, width, height); + auto context = sws::create_context(cropped_frame, image.frame); + sws::scale(context, cropped_frame, image.frame); fun(c, r, image.frame); } } @@ -156,54 +132,41 @@ void apply_to_tiles(std::string const & filename, int h_tiles, int v_tiles, std: void save_as_jpg(av::frame const & frame, std::string const & filename){ const auto pix_fmt = AV_PIX_FMT_YUVJ444P; - const auto codec_id= AV_CODEC_ID_MJPEG; - // Convert - int tile_width = 800; - int tile_height = 600; - - int h_tiles = 8; - int v_tiles = 6; - - std::vector> data(make_u(avpicture_get_size(pix_fmt, h_tiles * tile_width, v_tiles * tile_height)), 0); + std::vector> data(make_u(avpicture_get_size(pix_fmt, frame->width, + frame->height)), 0); av::frame converted_frame = av::frame_alloc(); - avpicture_fill(reinterpret_cast(converted_frame.get()), data.data(), pix_fmt, h_tiles * tile_width, v_tiles * tile_height); - converted_frame->width = h_tiles * tile_width; - converted_frame->height = v_tiles * tile_height; + avpicture_fill(reinterpret_cast(converted_frame.get()), data.data(), pix_fmt, frame->width, frame->height); + converted_frame->width = frame->width; + converted_frame->height = frame->height; converted_frame->format = pix_fmt; - auto const sws_context = sws_getContext(frame->width, frame->height, av::get_format(frame), tile_width, tile_height, av::get_format(converted_frame), 0, nullptr, nullptr, nullptr); - if(!sws_context) throw std::runtime_error("boem sws context"); - - av::frame cropped_frame = av::frame_clone(converted_frame); - for(int r = 0; r < v_tiles; ++r) { - for(int c = 0; c < h_tiles; ++c){ - av_picture_crop(reinterpret_cast(cropped_frame.get()), reinterpret_cast(converted_frame.get()), av::get_format(converted_frame), r * tile_height, c * tile_width); - sws_scale (sws_context, {frame->data}, {frame->linesize}, 0, frame->height, {cropped_frame->data}, {cropped_frame->linesize}); - } + { + auto sws_context = sws::create_context(frame, converted_frame); + sws::scale(sws_context, frame, converted_frame); } - sws_freeContext(sws_context); - - // Encode - auto const codec = avcodec_find_encoder(codec_id); - if(!codec) throw av::error("Could not find codec"); + encode_as_jpg(converted_frame, filename); +} - auto codec_ctx = std::unique_ptr>(avcodec_alloc_context3(codec), [](auto x){ avcodec_free_context(&x); }); - if(!codec_ctx) throw av::error("Could not allocate codec context"); +void encode_as_jpg(const av::frame& frame, const std::string& filename){ + auto const codec_id= AV_CODEC_ID_MJPEG; + const auto pix_fmt = av::get_format(frame); + const auto codec = av::find_encoder(codec_id); + auto codec_ctx = av::context_alloc(codec); codec_ctx->pix_fmt = pix_fmt; - codec_ctx->width = converted_frame->width; - codec_ctx->height = converted_frame->height; + codec_ctx->width = frame->width; + codec_ctx->height = frame->height; codec_ctx->time_base = av_make_q(1, 1); - auto const opened_codec = av::codec_open(codec_ctx.get(), codec, nullptr); - auto const buffer_size = avpicture_get_size(pix_fmt, codec_ctx->width, codec_ctx->height); + const auto opened_codec = av::codec_open(codec_ctx, codec, nullptr); + + const auto buffer_size = avpicture_get_size(pix_fmt, codec_ctx->width, codec_ctx->height); std::vector buffer(make_u(buffer_size), 0); - auto const output_size = avcodec_encode_video(codec_ctx.get(), buffer.data(), buffer_size, converted_frame.get()); + const auto output_size = avcodec_encode_video(codec_ctx.get(), buffer.data(), buffer_size, frame.get()); assert(output_size <= buffer_size); - auto const file = fopen(filename.c_str(), "wb"); - fwrite(buffer.data(), 1, make_u(output_size), file); - fclose(file); + std::ofstream file(filename); + file.write(reinterpret_cast(buffer.data()), output_size); } diff --git a/lib/image_io.hpp b/lib/image_io.hpp index e0c3b97..002d970 100644 --- a/lib/image_io.hpp +++ b/lib/image_io.hpp @@ -21,9 +21,6 @@ struct raw_rgb_image { AVPixelFormat format() const; }; -// dumps image in ppm format -void save_as_ppm(raw_rgb_image const & image, std::string const & filename); - // opens an image in its own format av::frame open_image(std::string const & filename); @@ -35,13 +32,11 @@ av::frame crop_to_square(av::frame && frame); // converts and resizes raw_rgb_image to_raw_rgb_image(av::frame const & frame, int new_width, int new_height); -// Legacy -inline raw_rgb_image open_as_rgb(const std::string &filename){ - return to_raw_rgb_image(crop_to_square(open_image(filename)), 512, 512); -} - // apply function to every tile, fun :: Column, Row, Image -> Void void apply_to_tiles(std::string const & filename, int h_tiles, int v_tiles, std::function fun); // does what you think it does void save_as_jpg(av::frame const & frame, std::string const & filename); + +// encodes an av::frame with yuv pixelformat (is used by save_as_jpg()). +void encode_as_jpg(av::frame const & frame, std::string const & filename); diff --git a/lib/read_database.hpp b/lib/read_database.hpp index 03a4acd..2be3cf3 100644 --- a/lib/read_database.hpp +++ b/lib/read_database.hpp @@ -1,4 +1,43 @@ -#ifndef READ_DATABASE_HPP -#define READ_DATABASE_HPP +#pragma once -#endif // READ_DATABASE_HPP +#include +#include + +#include +#include +#include + +#include +#include +#include + +template +auto read_database(std::string const & database_directory, bool output_files = false){ + namespace fs = boost::filesystem; + namespace ar = boost::archive; + + image_database db; + auto const database_file = database_directory + "-" + db.fingerprint_name() + ".db"; + + if (!boost::filesystem::exists(database_file)){ + fs::path const directory(database_directory); + fs::directory_iterator eod; + for(fs::directory_iterator it(directory); it != eod; ++it){ + auto const path = it->path(); + auto const ext = to_lower(path.extension().string()); + if(ext != ".png" && ext != ".jpg" && ext != ".jpeg" && ext != ".gif") continue; + if(output_files) std::cout << path << std::endl; + db.add(path.string()); + } + + std::ofstream file(database_file); + ar::binary_oarchive archive(file); + archive << db; + } else { + std::ifstream file(database_file); + ar::binary_iarchive archive(file); + archive >> db; + } + + return db; +} diff --git a/lib/utilities.hpp b/lib/utilities.hpp index e3e8846..0f33562 100644 --- a/lib/utilities.hpp +++ b/lib/utilities.hpp @@ -1,20 +1,10 @@ #pragma once -#include +#include +#include #include -#include #include -#include - -template -bool is_pow_of_two(Int n){ - return n && !(n & (n - 1)); -} - -template -bool is_even(Int n){ - return (n & 1) == 0; -} +#include // Used to silence warnings, will assert in debug build inline unsigned int make_u(int x){ @@ -22,25 +12,6 @@ inline unsigned int make_u(int x){ return static_cast(x); } -// calculates integer 2-log such that: -// 2^(two_log(x)) >= x > 2^(two_log(x) - 1) -inline unsigned int two_log(unsigned int x){ - if(x <= 1) return 0; - return 8*sizeof(unsigned int) - unsigned(__builtin_clz(x-1)); -} - -// calculates 2^x (by squaring) -inline unsigned int pow_two(unsigned int x){ - unsigned int base = 2; - unsigned int y = 1; - while(x){ - if(x & 1) y *= base; - x >>= 1; - base *= base; - } - return y; -} - inline uint8_t to_uint8_t(double x){ if(x >= 1) return 255; if(x <= 0) return 0; @@ -70,14 +41,9 @@ inline std::string field(std::string const & str){ return str + ":" + std::string(add, ' ') + "\t"; } -// Prints a vector with brackets and commas -// Does not work recursively! -template -void print_vec(std::vector const & v){ - auto it = v.begin(), end = v.end(); - std::cout << "{" << *it++; - while(it != end) std::cout << ", " << *it++; - std::cout << "}\n"; +inline std::string to_lower(std::string in){ + std::transform(in.begin(), in.end(), in.begin(), ::tolower); + return in; } // RAII struct for timing diff --git a/lib/wvlt/wavelet.hpp b/lib/wvlt/wavelet.hpp new file mode 100644 index 0000000..900c981 --- /dev/null +++ b/lib/wvlt/wavelet.hpp @@ -0,0 +1,3 @@ +#pragma once + +#include "wavelet_2.hpp" diff --git a/lib/wvlt/wavelet_2.hpp b/lib/wvlt/wavelet_2.hpp new file mode 100644 index 0000000..11d9c66 --- /dev/null +++ b/lib/wvlt/wavelet_2.hpp @@ -0,0 +1,109 @@ +#pragma once + +#include +#include "wavelet_utilities.hpp" +#include "wavelet_constants.hpp" + +/* Rewrite of the basic functions + * This will make the adaption for the parallel case easier, + * because we can explicitly pass the two elements which are out of range + * (these are normally wrap-around values) + * + * These are also faster (testcase: size = 8, stride = 1, iterations = 100000) + * V2 0.00377901 + * V1 0.0345114 + * + * But also less abstract (which can be both a good thing and bad thing) + * + * wavelet function does not shuffle! + */ + +namespace wvlt{ + inline namespace V2 { + inline double inner_product(double* x, double const* coef, unsigned int stride){ + return x[0] * coef[0] + x[stride] * coef[1] + x[2*stride] * coef[2] + x[3*stride] * coef[3]; + } + + // will calculate part of wavelete transform (in place!) + // size is size of vector x (so x[size-1] is valid) + // does not calculate "last two" elements (it does not assume periodicity) + // calculates size/stride - 2 elements of the output + inline void wavelet_mul_base(double* x, unsigned int size, unsigned int stride){ + assert(x && is_even(size) && is_pow_of_two(stride) && 4*stride <= size); + + for(unsigned int i = 0; i < size - 2*stride; i += 2*stride){ + double y1 = inner_product(x + i, evn_coef, stride); + double y2 = inner_product(x + i, odd_coef, stride); + x[i] = y1; + x[i+stride] = y2; + } + } + + // x1 and x2 are next elements, or wrap around + // calculates size/stride elements of the output + inline void wavelet_mul(double* x, double x1, double x2, unsigned int size, unsigned int stride){ + assert(x && is_even(size) && is_pow_of_two(stride) && 2*stride <= size); + if(4*stride <= size) + wavelet_mul_base(x, size, stride); + + unsigned int i = size - 2*stride; + double y1 = x[i] * evn_coef[0] + x[i+stride] * evn_coef[1] + x1 * evn_coef[2] + x2 * evn_coef[3]; + double y2 = x[i] * odd_coef[0] + x[i+stride] * odd_coef[1] + x1 * odd_coef[2] + x2 * odd_coef[3]; + x[i] = y1; + x[i+stride] = y2; + } + + // will overwrite x, x2 and x1 are previous elements, or wrap around + // size is size of vector x (so x[size-1] is valid) + inline void wavelet_inv(double* x, double x1, double x2, unsigned int size, unsigned int stride){ + assert(x && is_even(size) && is_pow_of_two(stride) && 4*stride <= size); + + for(unsigned int i = size - 2*stride; i >= 2*stride; i -= 2*stride){ + double y1 = inner_product(x + i - 2*stride, evn_coef_inv, stride); + double y2 = inner_product(x + i - 2*stride, odd_coef_inv, stride); + x[i] = y1; + x[i+stride] = y2; + } + + unsigned int i = 0; + double y1 = x2 * evn_coef_inv[0] + x1 * evn_coef_inv[1] + x[i] * evn_coef_inv[2] + x[i+stride] * evn_coef_inv[3]; + double y2 = x2 * odd_coef_inv[0] + x1 * odd_coef_inv[1] + x[i] * odd_coef_inv[2] + x[i+stride] * odd_coef_inv[3]; + x[i] = y1; + x[i+stride] = y2; + } + + // size indicates number of elements to process (so this is different from above!) + inline void wavelet(double* x, unsigned int size, unsigned int stride){ + assert(x && is_pow_of_two(size) && size >= 4); + auto full_size = stride*size; + for(unsigned int i = 1; i <= size / 4; i <<= 1){ + auto j = stride * i; + wavelet_mul(x, x[0], x[j], full_size, j); + } + } + + inline void wavelet_2D(double* in, unsigned int width, unsigned int height){ + for(unsigned int y = 0; y < height; ++y) + wavelet(in + y*width, width, 1); + for(unsigned int x = 0; x < width; ++x) + wavelet(in + x, height, width); + } + + // size indicates number of elements to process (so this is different from above!) + inline void unwavelet(double* x, unsigned int size, unsigned int stride){ + assert(x && is_pow_of_two(size) && size >= 4); + auto full_size = stride*size; + for(unsigned int i = size / 4; i >= 1; i >>= 1){ + auto j = stride * i; + wavelet_inv(x, x[full_size-j], x[full_size-2*j], full_size, j); + } + } + + inline void unwavelet_2D(double* in, unsigned int width, unsigned int height){ + for(unsigned int x = 0; x < width; ++x) + unwavelet(in + x, height, width); + for(unsigned int y = 0; y < height; ++y) + unwavelet(in + y*width, width, 1); + } + } +} diff --git a/lib/wvlt/wavelet_constants.hpp b/lib/wvlt/wavelet_constants.hpp new file mode 100644 index 0000000..4647816 --- /dev/null +++ b/lib/wvlt/wavelet_constants.hpp @@ -0,0 +1,37 @@ +#pragma once + +#include + +namespace wvlt { + // first row of the matrix Wn + static double const evn_coef[] = { + (1.0 + std::sqrt(3.0))/(std::sqrt(32.0)), + (3.0 + std::sqrt(3.0))/(std::sqrt(32.0)), + (3.0 - std::sqrt(3.0))/(std::sqrt(32.0)), + (1.0 - std::sqrt(3.0))/(std::sqrt(32.0)) + }; + + // second row of the matrix Wn + static double const odd_coef[] = { + evn_coef[3], + -evn_coef[2], + evn_coef[1], + -evn_coef[0] + }; + + // first (shifted) row of the matrix Wn^-1 + static double const evn_coef_inv[] = { + evn_coef[2], + evn_coef[1], + evn_coef[0], + evn_coef[3] + }; + + // second (shifted) row of the matrix Wn^-1 + static double const odd_coef_inv[] = { + evn_coef[3], + -evn_coef[0], + evn_coef[1], + -evn_coef[2] + }; +} diff --git a/lib/wvlt/wavelet_utilities.hpp b/lib/wvlt/wavelet_utilities.hpp new file mode 100644 index 0000000..6e57edc --- /dev/null +++ b/lib/wvlt/wavelet_utilities.hpp @@ -0,0 +1,19 @@ +#pragma once + +template +bool is_pow_of_two(Int n){ + return n && !(n & (n - 1)); +} + +template +bool is_even(Int n){ + return (n & 1) == 0; +} + +// calculates integer 2-log such that: +// 2^(two_log(x)) >= x > 2^(two_log(x) - 1) +inline unsigned int two_log(unsigned int x){ + if(x <= 1) return 0; + return 8*sizeof(unsigned int) - unsigned(__builtin_clz(x-1)); +} + diff --git a/src/compress.cpp b/src/compress.cpp index 018b814..f73d5d2 100644 --- a/src/compress.cpp +++ b/src/compress.cpp @@ -5,6 +5,7 @@ */ #include +#include #include #include @@ -21,7 +22,7 @@ using namespace std; int main(){ av_register_all(); - auto image = open_as_rgb("image.jpg"); + auto image = to_raw_rgb_image(crop_to_square(open_image("image.jpg")), 512, 512); std::vector vector(make_u(image.width() * image.height())); @@ -50,7 +51,6 @@ int main(){ } } - // TODO: save as jpg - save_as_jpg(image.frame, "output.ppm"); + save_as_jpg(image.frame, "output.jpg"); } diff --git a/src/fingerprint_test.cpp b/src/fingerprint_test.cpp deleted file mode 100644 index d5fe8d9..0000000 --- a/src/fingerprint_test.cpp +++ /dev/null @@ -1,21 +0,0 @@ -#include -#include - -extern "C" { -#include -} - -#include - -using namespace std; - -int main(){ - av_register_all(); - auto const image = open_image("test.jpg"); - - auto const x = rgb_wavelet_coefficients::calculate(image); - auto const y = rgb_wavelet_coefficients::pre_calculate(image); - - cout << x << endl; - cout << x.distance_to(y) << endl; -} diff --git a/src/main.cpp b/src/main.cpp index 8fcedd6..9597206 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -12,10 +13,9 @@ extern "C" { #include #include #include -#include } -#include +#include #include #include @@ -24,7 +24,7 @@ static const int tile_height = 500; using namespace std; -using Metric = downscale; +using Metric = fingerprints::rgb; using Database = image_database; static auto calculate_all_distances(Database const & db, string const & filename, int h_tiles, int v_tiles){ @@ -41,51 +41,37 @@ static auto calculate_all_distances(Database const & db, string const & filename template static void save_mozaic(Mozaiq const & mozaic, string filename){ auto const pix_fmt = AV_PIX_FMT_YUVJ444P; - auto const codec_id= AV_CODEC_ID_MJPEG; auto const total_width = mozaic.h_tiles * tile_width; auto const total_height = mozaic.v_tiles * tile_height; // Create output frame - std::vector> data(make_u(avpicture_get_size(pix_fmt, total_width, total_height)), 0); - const av::frame frame = av::frame_alloc(); - avpicture_fill(reinterpret_cast(frame.get()), data.data(), pix_fmt, total_width, total_height); - frame->width = total_width; - frame->height = total_height; - frame->format = pix_fmt; + const auto frame_data = [=]{ + std::vector> data(make_u(avpicture_get_size(pix_fmt, total_width, total_height)), 0); + auto frame = av::frame_alloc(); + avpicture_fill(reinterpret_cast(frame.get()), data.data(), pix_fmt, total_width, total_height); + frame->width = total_width; + frame->height = total_height; + frame->format = pix_fmt; + return std::make_pair(std::move(frame), std::move(data)); + }(); + const auto & frame = frame_data.first; // For each tile: get the part, copy input to it + av::frame frame_part = av::frame_clone(frame); + frame_part->width = tile_width; + frame_part->height = tile_height; for(int r = 0; r < mozaic.v_tiles; ++r) { for(int c = 0; c < mozaic.h_tiles; ++c){ - auto frame_part = av::crop(frame, c * tile_width, r * tile_height, tile_width, tile_height); - auto input = crop_to_square(open_image(mozaic[r][c])); + av_picture_crop(reinterpret_cast(frame_part.get()), reinterpret_cast(frame.get()), av::get_format(frame), r * tile_height, c * tile_width); + auto const input = crop_to_square(open_image(mozaic[r][c])); auto context = sws::create_context(input, frame_part); sws::scale(context, input, frame_part); } } - // Encode - auto codec = avcodec_find_encoder(codec_id); - if(!codec) throw av::error("Could not find codec"); - - auto codec_ctx = std::unique_ptr>(avcodec_alloc_context3(codec), [](auto x){ avcodec_free_context(&x); }); - if(!codec_ctx) throw av::error("Could not allocate codec context"); - - codec_ctx->pix_fmt = pix_fmt; - codec_ctx->width = frame->width; - codec_ctx->height = frame->height; - codec_ctx->time_base = av_make_q(1, 1); - auto opened_codec = av::codec_open(codec_ctx.get(), codec, nullptr); - - auto const buffer_size = avpicture_get_size(pix_fmt, codec_ctx->width, codec_ctx->height); - std::vector buffer(make_u(buffer_size), 0); - auto const output_size = avcodec_encode_video(codec_ctx.get(), buffer.data(), buffer_size, frame.get()); - assert(output_size <= buffer_size); - - auto file = fopen(filename.c_str(), "wb"); - fwrite(buffer.data(), 1, make_u(output_size), file); - fclose(file); + encode_as_jpg(frame, filename); } int main(){ @@ -93,9 +79,9 @@ int main(){ av_log_set_level(AV_LOG_QUIET); // TODO: use boost::program_options - string const database_directory = "vakantie"; - string const filename = "vakantie.jpg"; - string const output = "output/vakantie.jpg"; + string const database_directory = "basbram"; + string const filename = "basbram.jpg"; + string const output = "output/basbram.jpg"; int const h_tiles = 4*4; int const v_tiles = 3*4;