mozaic/lib/image_io.cpp

#include "image_io.hpp"
#include "utilities.hpp"

extern "C" {
#include <libavutil/frame.h>
#include <libavutil/mem.h>
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libswscale/swscale.h>
}

#include <algorithm>
#include <cassert>
#include <iostream>
#include <stdexcept>


raw_rgb_image::raw_rgb_image()
: data()
, frame(nullptr, [](auto x){ av_frame_free(&x); })
{}

raw_rgb_image::raw_rgb_image(int W, int H)
: data(make_u(avpicture_get_size(AV_PIX_FMT_RGB24, W, H)))
, frame(av::frame_alloc())
{
	avpicture_fill(reinterpret_cast<AVPicture*>(frame.get()), data.data(), AV_PIX_FMT_RGB24, W, H);
	frame->width = W;
	frame->height = H;
	frame->format = AV_PIX_FMT_RGB24;
}

int raw_rgb_image::width() const { return frame->width; }
int raw_rgb_image::height() const { return frame->height; }
AVPixelFormat raw_rgb_image::format() const { return av::get_format(frame); }


void save_as_ppm(raw_rgb_image const & image, std::string const & filename) {
	// Open file
	FILE* file = fopen(filename.c_str(), "wb");
	if(!file) throw std::runtime_error("cannot save");

	// Write header
	fprintf(stderr, "P6\n%d %d\n255\n", image.width(), image.height());
	fprintf(file, "P6\n%d %d\n255\n", image.width(), image.height());

	// Write pixel data
	for(int y = 0; y < image.height(); y++)
		fwrite(image.data.data() + 3*y*image.width(), 1, make_u(3*image.width()), file);

	// Close file
	fclose(file);
}


av::frame open_image(std::string const & filename){
	// Open the file
	auto format_context = av::format_open_input(filename, nullptr, nullptr);

	// Get the codec and let us own the buffers
	auto codec_context = format_context->streams[0]->codec;
	auto codec = avcodec_find_decoder(codec_context->codec_id);
	codec_context->refcounted_frames = 1;

	// Open the codec
	auto opened_codec = av::codec_open(codec_context, codec, nullptr);

	// Allocate frame
	av::frame frame = av::frame_alloc();

	// things to read and decode it
	av::packet_buffer empty_packet;
	int finished = 0;
	while(auto packet = av::read_frame(format_context, empty_packet)) {
		if(packet->stream_index != 0) continue;

		int ret = avcodec_decode_video2(opened_codec.get(), frame.get(), &finished, packet.get());
		if (ret <= 0) {
			printf("Error [%d] while decoding frame: %s\n", ret, strerror(AVERROR(ret)));
			throw std::runtime_error("boem packet");
		}

		// we only need the first frame
		if(finished) break;
	}

	// some decoders need extra passes
	while(!finished) {
		avcodec_decode_video2(opened_codec.get(), frame.get(), &finished, &empty_packet);
		av_free_packet(&empty_packet);
	}

	return frame;
}


void crop_to_square(av::frame& frame){
	int diff = frame->width - frame->height;
	if(diff > 0) {
		av::crop(frame, diff/2, 0, frame->height, frame->height);
	} else if(diff < 0) {
		av::crop(frame, 0, -diff/2, frame->width, frame->width);
	}
}

av::frame crop_to_square(const av::frame& frame){
	return crop_to_square(av::frame_clone(frame));
}

av::frame crop_to_square(av::frame && frame){
	crop_to_square(frame);
	return std::move(frame);
}

raw_rgb_image to_raw_rgb_image(av::frame const & frame, int new_width, int new_height){
	raw_rgb_image image(new_width, new_height);

	auto c = sws_getContext(frame->width, frame->height, av::get_format(frame), image.width(), image.height(), image.format(), 0, nullptr, nullptr, nullptr);
	if(!c) throw std::runtime_error("boem sws context");
	sws_scale (c, {frame->data}, {frame->linesize}, 0, frame->height, {image.frame->data}, {image.frame->linesize});
	sws_freeContext(c);

	return image;
}


void apply_to_tiles(std::string const & filename, int h_tiles, int v_tiles, std::function<void(int, int, av::frame const &)> fun) {
	auto org_frame = open_image(filename);

	// create clone to crop
	av::frame cropped_frame = av::frame_clone(org_frame);

	// create raw buffer for the callback
	// TODO: do not scale the cropped region
	raw_rgb_image image(512, 512);

	// create the tiles
	cropped_frame->width = org_frame->width / h_tiles;
	cropped_frame->height = org_frame->height / v_tiles;
	for(int r = 0; r < v_tiles; ++r){
		for(int c = 0; c < h_tiles; ++c){
			int x_crop = c * cropped_frame->width;
			int y_crop = r * cropped_frame->height;
			//std::cout << "crop " << x_crop << ", " << y_crop << std::endl;
			av_picture_crop(reinterpret_cast<AVPicture*>(cropped_frame.get()), reinterpret_cast<AVPicture*>(org_frame.get()), av::get_format(org_frame), y_crop, x_crop);

			auto context = sws_getContext(cropped_frame->width, cropped_frame->height, av::get_format(org_frame), image.width(), image.height(), image.format(), 0, nullptr, nullptr, nullptr);
			if(!context) throw std::runtime_error("boem sws context");
			sws_scale (context, {cropped_frame->data}, {cropped_frame->linesize}, 0, cropped_frame->height, {image.frame->data}, {image.frame->linesize});
			sws_freeContext(context);

			fun(c, r, image.frame);
		}
	}
}

void save_as_jpg(av::frame const & frame, std::string const & filename){
	const auto pix_fmt = AV_PIX_FMT_YUVJ444P;
	const auto codec_id= AV_CODEC_ID_MJPEG;

	// Convert
	int tile_width = 800;
	int tile_height = 600;

	int h_tiles = 8;
	int v_tiles = 6;

	std::vector<uint8_t, av::allocator<uint8_t>> data(make_u(avpicture_get_size(pix_fmt, h_tiles * tile_width, v_tiles * tile_height)), 0);
	av::frame converted_frame = av::frame_alloc();
	avpicture_fill(reinterpret_cast<AVPicture*>(converted_frame.get()), data.data(), pix_fmt, h_tiles * tile_width, v_tiles * tile_height);
	converted_frame->width = h_tiles * tile_width;
	converted_frame->height = v_tiles * tile_height;
	converted_frame->format = pix_fmt;

	auto const sws_context = sws_getContext(frame->width, frame->height, av::get_format(frame), tile_width, tile_height, av::get_format(converted_frame), 0, nullptr, nullptr, nullptr);
	if(!sws_context) throw std::runtime_error("boem sws context");

	av::frame cropped_frame = av::frame_clone(converted_frame);
	for(int r = 0; r < v_tiles; ++r) {
		for(int c = 0; c < h_tiles; ++c){
			av_picture_crop(reinterpret_cast<AVPicture*>(cropped_frame.get()), reinterpret_cast<AVPicture*>(converted_frame.get()), av::get_format(converted_frame), r * tile_height, c * tile_width);
			sws_scale (sws_context, {frame->data}, {frame->linesize}, 0, frame->height, {cropped_frame->data}, {cropped_frame->linesize});
		}
	}

	sws_freeContext(sws_context);

	// Encode
	auto const codec = avcodec_find_encoder(codec_id);
	if(!codec) throw av::error("Could not find codec");

	auto codec_ctx = std::unique_ptr<AVCodecContext, av::deleter<AVCodecContext>>(avcodec_alloc_context3(codec), [](auto x){ avcodec_free_context(&x); });
	if(!codec_ctx) throw av::error("Could not allocate codec context");

	codec_ctx->pix_fmt = pix_fmt;
	codec_ctx->width = converted_frame->width;
	codec_ctx->height = converted_frame->height;
	codec_ctx->time_base = av_make_q(1, 1);
	auto const opened_codec = av::codec_open(codec_ctx.get(), codec, nullptr);

	auto const buffer_size = avpicture_get_size(pix_fmt, codec_ctx->width, codec_ctx->height);
	std::vector<uint8_t> buffer(make_u(buffer_size), 0);
	auto const output_size = avcodec_encode_video(codec_ctx.get(), buffer.data(), buffer_size, converted_frame.get());
	assert(output_size <= buffer_size);

	auto const file = fopen(filename.c_str(), "wb");
	fwrite(buffer.data(), 1, make_u(output_size), file);
	fclose(file);
}
Woah, first iteration (with DAU4 wavelet metric) 10 years ago			`#include "image_io.hpp"`
			`#include "utilities.hpp"`

			`extern "C" {`
			`#include <libavutil/frame.h>`
			`#include <libavutil/mem.h>`
			`#include <libavformat/avformat.h>`
			`#include <libavcodec/avcodec.h>`
			`#include <libswscale/swscale.h>`
			`}`

			`#include <algorithm>`
			`#include <cassert>`
Some more abstraction for libav* stuff. New: mosaic with unique tiles. 10 years ago			`#include <iostream>`
			`#include <stdexcept>`
Woah, first iteration (with DAU4 wavelet metric) 10 years ago

Fixed downscaling metric 10 years ago			`raw_rgb_image::raw_rgb_image()`
			`: data()`
			`, frame(nullptr, [](auto x){ av_frame_free(&x); })`
			`{}`

Woah, first iteration (with DAU4 wavelet metric) 10 years ago			`raw_rgb_image::raw_rgb_image(int W, int H)`
			`: data(make_u(avpicture_get_size(AV_PIX_FMT_RGB24, W, H)))`
			`, frame(av::frame_alloc())`
			`{`
			`avpicture_fill(reinterpret_cast<AVPicture*>(frame.get()), data.data(), AV_PIX_FMT_RGB24, W, H);`
			`frame->width = W;`
			`frame->height = H;`
			`frame->format = AV_PIX_FMT_RGB24;`
			`}`

			`int raw_rgb_image::width() const { return frame->width; }`
			`int raw_rgb_image::height() const { return frame->height; }`
			`AVPixelFormat raw_rgb_image::format() const { return av::get_format(frame); }`


			`void save_as_ppm(raw_rgb_image const & image, std::string const & filename) {`
			`// Open file`
			`FILE* file = fopen(filename.c_str(), "wb");`
			`if(!file) throw std::runtime_error("cannot save");`

			`// Write header`
			`fprintf(stderr, "P6\n%d %d\n255\n", image.width(), image.height());`
			`fprintf(file, "P6\n%d %d\n255\n", image.width(), image.height());`

			`// Write pixel data`
			`for(int y = 0; y < image.height(); y++)`
			`fwrite(image.data.data() + 3yimage.width(), 1, make_u(3*image.width()), file);`

			`// Close file`
			`fclose(file);`
			`}`


			`av::frame open_image(std::string const & filename){`
			`// Open the file`
			`auto format_context = av::format_open_input(filename, nullptr, nullptr);`

			`// Get the codec and let us own the buffers`
			`auto codec_context = format_context->streams[0]->codec;`
			`auto codec = avcodec_find_decoder(codec_context->codec_id);`
			`codec_context->refcounted_frames = 1;`

			`// Open the codec`
			`auto opened_codec = av::codec_open(codec_context, codec, nullptr);`

			`// Allocate frame`
			`av::frame frame = av::frame_alloc();`

			`// things to read and decode it`
			`av::packet_buffer empty_packet;`
			`int finished = 0;`
			`while(auto packet = av::read_frame(format_context, empty_packet)) {`
			`if(packet->stream_index != 0) continue;`

			`int ret = avcodec_decode_video2(opened_codec.get(), frame.get(), &finished, packet.get());`
			`if (ret <= 0) {`
			`printf("Error [%d] while decoding frame: %s\n", ret, strerror(AVERROR(ret)));`
			`throw std::runtime_error("boem packet");`
			`}`

			`// we only need the first frame`
			`if(finished) break;`
			`}`

			`// some decoders need extra passes`
			`while(!finished) {`
			`avcodec_decode_video2(opened_codec.get(), frame.get(), &finished, &empty_packet);`
			`av_free_packet(&empty_packet);`
			`}`

			`return frame;`
			`}`


			`void crop_to_square(av::frame& frame){`
Some more abstraction for libav* stuff. New: mosaic with unique tiles. 10 years ago			`int diff = frame->width - frame->height;`
Woah, first iteration (with DAU4 wavelet metric) 10 years ago			`if(diff > 0) {`
Some more abstraction for libav* stuff. New: mosaic with unique tiles. 10 years ago			`av::crop(frame, diff/2, 0, frame->height, frame->height);`
Woah, first iteration (with DAU4 wavelet metric) 10 years ago			`} else if(diff < 0) {`
Some more abstraction for libav* stuff. New: mosaic with unique tiles. 10 years ago			`av::crop(frame, 0, -diff/2, frame->width, frame->width);`
Woah, first iteration (with DAU4 wavelet metric) 10 years ago			`}`
			`}`

Adds possibility to do asymmetric fingerprint comparison Adds another metric (doesn't work yet) Adds more constness 10 years ago			`av::frame crop_to_square(const av::frame& frame){`
			`return crop_to_square(av::frame_clone(frame));`
			`}`

Woah, first iteration (with DAU4 wavelet metric) 10 years ago			`av::frame crop_to_square(av::frame && frame){`
			`crop_to_square(frame);`
			`return std::move(frame);`
			`}`

			`raw_rgb_image to_raw_rgb_image(av::frame const & frame, int new_width, int new_height){`
			`raw_rgb_image image(new_width, new_height);`

			`auto c = sws_getContext(frame->width, frame->height, av::get_format(frame), image.width(), image.height(), image.format(), 0, nullptr, nullptr, nullptr);`
			`if(!c) throw std::runtime_error("boem sws context");`
			`sws_scale (c, {frame->data}, {frame->linesize}, 0, frame->height, {image.frame->data}, {image.frame->linesize});`
			`sws_freeContext(c);`

			`return image;`
			`}`


Adds possibility to do asymmetric fingerprint comparison Adds another metric (doesn't work yet) Adds more constness 10 years ago			`void apply_to_tiles(std::string const & filename, int h_tiles, int v_tiles, std::function<void(int, int, av::frame const &)> fun) {`
Woah, first iteration (with DAU4 wavelet metric) 10 years ago			`auto org_frame = open_image(filename);`

			`// create clone to crop`
			`av::frame cropped_frame = av::frame_clone(org_frame);`

			`// create raw buffer for the callback`
Adds possibility to do asymmetric fingerprint comparison Adds another metric (doesn't work yet) Adds more constness 10 years ago			`// TODO: do not scale the cropped region`
Woah, first iteration (with DAU4 wavelet metric) 10 years ago			`raw_rgb_image image(512, 512);`

			`// create the tiles`
			`cropped_frame->width = org_frame->width / h_tiles;`
			`cropped_frame->height = org_frame->height / v_tiles;`
			`for(int r = 0; r < v_tiles; ++r){`
			`for(int c = 0; c < h_tiles; ++c){`
			`int x_crop = c * cropped_frame->width;`
			`int y_crop = r * cropped_frame->height;`
			`//std::cout << "crop " << x_crop << ", " << y_crop << std::endl;`
			`av_picture_crop(reinterpret_cast<AVPicture>(cropped_frame.get()), reinterpret_cast<AVPicture>(org_frame.get()), av::get_format(org_frame), y_crop, x_crop);`

			`auto context = sws_getContext(cropped_frame->width, cropped_frame->height, av::get_format(org_frame), image.width(), image.height(), image.format(), 0, nullptr, nullptr, nullptr);`
			`if(!context) throw std::runtime_error("boem sws context");`
			`sws_scale (context, {cropped_frame->data}, {cropped_frame->linesize}, 0, cropped_frame->height, {image.frame->data}, {image.frame->linesize});`
			`sws_freeContext(context);`

Adds possibility to do asymmetric fingerprint comparison Adds another metric (doesn't work yet) Adds more constness 10 years ago			`fun(c, r, image.frame);`
Woah, first iteration (with DAU4 wavelet metric) 10 years ago			`}`
			`}`
			`}`
Adds possibility to do asymmetric fingerprint comparison Adds another metric (doesn't work yet) Adds more constness 10 years ago
			`void save_as_jpg(av::frame const & frame, std::string const & filename){`
			`const auto pix_fmt = AV_PIX_FMT_YUVJ444P;`
			`const auto codec_id= AV_CODEC_ID_MJPEG;`

			`// Convert`
			`int tile_width = 800;`
			`int tile_height = 600;`

			`int h_tiles = 8;`
			`int v_tiles = 6;`

			`std::vector<uint8_t, av::allocator<uint8_t>> data(make_u(avpicture_get_size(pix_fmt, h_tiles * tile_width, v_tiles * tile_height)), 0);`
			`av::frame converted_frame = av::frame_alloc();`
			`avpicture_fill(reinterpret_cast<AVPicture>(converted_frame.get()), data.data(), pix_fmt, h_tiles tile_width, v_tiles * tile_height);`
			`converted_frame->width = h_tiles * tile_width;`
			`converted_frame->height = v_tiles * tile_height;`
			`converted_frame->format = pix_fmt;`

			`auto const sws_context = sws_getContext(frame->width, frame->height, av::get_format(frame), tile_width, tile_height, av::get_format(converted_frame), 0, nullptr, nullptr, nullptr);`
			`if(!sws_context) throw std::runtime_error("boem sws context");`

			`av::frame cropped_frame = av::frame_clone(converted_frame);`
			`for(int r = 0; r < v_tiles; ++r) {`
			`for(int c = 0; c < h_tiles; ++c){`
			`av_picture_crop(reinterpret_cast<AVPicture>(cropped_frame.get()), reinterpret_cast<AVPicture>(converted_frame.get()), av::get_format(converted_frame), r * tile_height, c * tile_width);`
			`sws_scale (sws_context, {frame->data}, {frame->linesize}, 0, frame->height, {cropped_frame->data}, {cropped_frame->linesize});`
			`}`
			`}`

			`sws_freeContext(sws_context);`

			`// Encode`
			`auto const codec = avcodec_find_encoder(codec_id);`
			`if(!codec) throw av::error("Could not find codec");`

			`auto codec_ctx = std::unique_ptr<AVCodecContext, av::deleter<AVCodecContext>>(avcodec_alloc_context3(codec), [](auto x){ avcodec_free_context(&x); });`
			`if(!codec_ctx) throw av::error("Could not allocate codec context");`

			`codec_ctx->pix_fmt = pix_fmt;`
			`codec_ctx->width = converted_frame->width;`
			`codec_ctx->height = converted_frame->height;`
			`codec_ctx->time_base = av_make_q(1, 1);`
			`auto const opened_codec = av::codec_open(codec_ctx.get(), codec, nullptr);`

			`auto const buffer_size = avpicture_get_size(pix_fmt, codec_ctx->width, codec_ctx->height);`
			`std::vector<uint8_t> buffer(make_u(buffer_size), 0);`
			`auto const output_size = avcodec_encode_video(codec_ctx.get(), buffer.data(), buffer_size, converted_frame.get());`
			`assert(output_size <= buffer_size);`

			`auto const file = fopen(filename.c_str(), "wb");`
			`fwrite(buffer.data(), 1, make_u(output_size), file);`
			`fclose(file);`
			`}`