Aegisub/FFmpegSource2/indexing.cpp

//  Copyright (c) 2007-2008 Fredrik Mellbin
//
//  Permission is hereby granted, free of charge, to any person obtaining a copy
//  of this software and associated documentation files (the "Software"), to deal
//  in the Software without restriction, including without limitation the rights
//  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
//  copies of the Software, and to permit persons to whom the Software is
//  furnished to do so, subject to the following conditions:
//
//  The above copyright notice and this permission notice shall be included in
//  all copies or substantial portions of the Software.
//
//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
//  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
//  THE SOFTWARE.

#include <iostream>
#include <fstream>
#include <set>
#include <algorithm>
#include <memory>
#include <errno.h>
#include "indexing.h"
#include "wave64writer.h"

class MatroskaAudioContext {
public:
	Wave64Writer *W64W;
	AVCodecContext *CTX;
	CompressedStream *CS;
	int64_t CurrentSample;

	MatroskaAudioContext() {
		W64W = NULL;
		CTX = NULL;
		CS = NULL;
		CurrentSample = 0;
	}

	~MatroskaAudioContext() {
		delete W64W;
		if (CTX) {
			avcodec_close(CTX);
			av_free(CTX);
		}
		if (CS)
			cs_Destroy(CS);
	}
};

class FFAudioContext {
public:
	Wave64Writer *W64W;
	AVCodecContext *CTX;
	int64_t CurrentSample;

	FFAudioContext() {
		W64W = NULL;
		CTX = 0;
		CurrentSample = 0;
	}

	~FFAudioContext() {
		delete W64W;
		if (CTX)
			avcodec_close(CTX);
	}
};

class MatroskaIndexMemory {
private:
	int16_t *DecodingBuffer;
	MatroskaAudioContext *AudioContexts;
public:
	MatroskaIndexMemory(int Tracks, int16_t *&DecodingBuffer, MatroskaAudioContext *&AudioContexts) {
		DecodingBuffer = new int16_t[AVCODEC_MAX_AUDIO_FRAME_SIZE*10];
		AudioContexts = new MatroskaAudioContext[Tracks];
		this->DecodingBuffer = DecodingBuffer;
		this->AudioContexts = AudioContexts;
	}

	~MatroskaIndexMemory() {
		delete [] DecodingBuffer;
		delete [] AudioContexts;
	}
};

class FFIndexMemory {
private:
	int16_t *DecodingBuffer;
	FFAudioContext *AudioContexts;
public:
	FFIndexMemory(int Tracks, int16_t *&DecodingBuffer, FFAudioContext *&AudioContexts) {
		DecodingBuffer = new int16_t[AVCODEC_MAX_AUDIO_FRAME_SIZE*10];
		AudioContexts = new FFAudioContext[Tracks];
		this->DecodingBuffer = DecodingBuffer;
		this->AudioContexts = AudioContexts;
	}

	~FFIndexMemory() {
		delete [] DecodingBuffer;
		delete [] AudioContexts;
	}
};

class MatroskaMemory {
private:
	MatroskaFile *MF;
	MatroskaReaderContext *MC;
public:
	MatroskaMemory(MatroskaFile *MF, MatroskaReaderContext *MC) {
		this->MF = MF;
		this->MC = MC;
	}

	~MatroskaMemory() {
		mkv_Close(MF);
		fclose(MC->ST.fp);
	}
};

static bool DTSComparison(FrameInfo FI1, FrameInfo FI2) {
	return FI1.DTS < FI2.DTS;
}

static void SortTrackIndices(FrameIndex *TrackIndices) {
	for (FrameIndex::iterator Cur=TrackIndices->begin(); Cur!=TrackIndices->end(); Cur++)
		std::sort(Cur->begin(), Cur->end(), DTSComparison);
}

int WriteIndex(const char *IndexFile, FrameIndex *TrackIndices, char *ErrorMsg, unsigned MsgSize) {
	std::ofstream Index(IndexFile, std::ios::out | std::ios::binary | std::ios::trunc);

	if (!Index.is_open()) {
		_snprintf(ErrorMsg, MsgSize, "Failed to open '%s' for writing", IndexFile);
		return 1;
	}

	// Write the index file header
	IndexHeader IH;
	IH.Id = INDEXID;
	IH.Version = INDEXVERSION;
	IH.Tracks = TrackIndices->size();
	IH.Decoder = TrackIndices->Decoder;
	Index.write(reinterpret_cast<char *>(&IH), sizeof(IH));

	for (unsigned int i = 0; i < IH.Tracks; i++) {
		int TT = (*TrackIndices)[i].TT;
		Index.write(reinterpret_cast<char *>(&TT), sizeof(TT));
		int Num = (*TrackIndices)[i].TB.Num;
		Index.write(reinterpret_cast<char *>(&Num), sizeof(Num));
		int Den = (*TrackIndices)[i].TB.Den;
		Index.write(reinterpret_cast<char *>(&Den), sizeof(Den));
		size_t Frames = (*TrackIndices)[i].size();
		Index.write(reinterpret_cast<char *>(&Frames), sizeof(Frames));

		for (size_t j = 0; j < Frames; j++)
			Index.write(reinterpret_cast<char *>(&(TrackIndices->at(i)[j])), sizeof(FrameInfo));
	}

	return 0;
}

static FrameIndex *MakeMatroskaIndex(const char *SourceFile, int IndexMask, int DumpMask, const char *AudioFile, bool IgnoreDecodeErrors, IndexCallback IP, void *Private, char *ErrorMsg, unsigned MsgSize) {
	MatroskaFile *MF;
	char ErrorMessage[256];
	MatroskaReaderContext MC;
	MC.Buffer = NULL;
	MC.BufferSize = 0;

	InitStdIoStream(&MC.ST);
	MC.ST.fp = fopen(SourceFile, "rb");
	if (MC.ST.fp == NULL) {
		_snprintf(ErrorMsg, MsgSize, "Can't open '%s': %s", SourceFile, strerror(errno));
		return NULL;
	}

	setvbuf(MC.ST.fp, NULL, _IOFBF, CACHESIZE);

	MF = mkv_OpenEx(&MC.ST.base, 0, 0, ErrorMessage, sizeof(ErrorMessage));
	if (MF == NULL) {
		fclose(MC.ST.fp);
		_snprintf(ErrorMsg, MsgSize, "Can't parse Matroska file: %s", ErrorMessage);
		return NULL;
	}

	MatroskaMemory MM = MatroskaMemory(MF, &MC);

	// Audio stuff

	int16_t *db;
	MatroskaAudioContext *AudioContexts;
	MatroskaIndexMemory IM = MatroskaIndexMemory(mkv_GetNumTracks(MF), db, AudioContexts);

	for (unsigned int i = 0; i < mkv_GetNumTracks(MF); i++) {
		if (IndexMask & (1 << i) && mkv_GetTrackInfo(MF, i)->Type == TT_AUDIO) {
			AVCodecContext *AudioCodecContext = avcodec_alloc_context();
			AudioCodecContext->extradata = (uint8_t *)mkv_GetTrackInfo(MF, i)->CodecPrivate;
			AudioCodecContext->extradata_size = mkv_GetTrackInfo(MF, i)->CodecPrivateSize;
			AudioContexts[i].CTX = AudioCodecContext;

			if (mkv_GetTrackInfo(MF, i)->CompEnabled) {
				AudioContexts[i].CS = cs_Create(MF, i, ErrorMessage, sizeof(ErrorMessage));
				if (AudioContexts[i].CS == NULL) {
					_snprintf(ErrorMsg, MsgSize, "Can't create decompressor: %s", ErrorMessage);
					return NULL;
				}
			}

			AVCodec *AudioCodec = avcodec_find_decoder(MatroskaToFFCodecID(mkv_GetTrackInfo(MF, i)));
			if (AudioCodec == NULL) {
					_snprintf(ErrorMsg, MsgSize, "Audio codec not found");
					return NULL;
			}

			if (avcodec_open(AudioCodecContext, AudioCodec) < 0) {
					_snprintf(ErrorMsg, MsgSize, "Could not open audio codec");
					return NULL;
			}
		} else {
			IndexMask &= ~(1 << i);
		}
	}

	//

	int64_t CurrentPos = _ftelli64(MC.ST.fp);
	_fseeki64(MC.ST.fp, 0, SEEK_END);
	int64_t SourceSize = _ftelli64(MC.ST.fp);
	_fseeki64(MC.ST.fp, CurrentPos, SEEK_SET);

	FrameIndex *TrackIndices = new FrameIndex();
	TrackIndices->Decoder = 1;

	for (unsigned int i = 0; i < mkv_GetNumTracks(MF); i++)
		TrackIndices->push_back(FrameInfoVector(mkv_TruncFloat(mkv_GetTrackInfo(MF, i)->TimecodeScale), 1000000, mkv_GetTrackInfo(MF, i)->Type - 1));

	ulonglong StartTime, EndTime, FilePos;
	unsigned int Track, FrameFlags, FrameSize;

	while (mkv_ReadFrame(MF, 0, &Track, &StartTime, &EndTime, &FilePos, &FrameSize, &FrameFlags) == 0) {
		// Update progress
		if (IP) {
			if ((*IP)(0, _ftelli64(MC.ST.fp), SourceSize, Private)) {
				_snprintf(ErrorMsg, MsgSize, "Cancelled by user");
				delete TrackIndices;
				return NULL;
			}
		}

		// Only create index entries for video for now to save space
		if (mkv_GetTrackInfo(MF, Track)->Type == TT_VIDEO) {
			(*TrackIndices)[Track].push_back(FrameInfo(StartTime, (FrameFlags & FRAME_KF) != 0));
		} else if (mkv_GetTrackInfo(MF, Track)->Type == TT_AUDIO && (IndexMask & (1 << Track))) {
			(*TrackIndices)[Track].push_back(FrameInfo(AudioContexts[Track].CurrentSample, FilePos, FrameSize, (FrameFlags & FRAME_KF) != 0));
			ReadFrame(FilePos, FrameSize, AudioContexts[Track].CS, MC, ErrorMsg, MsgSize);

			int Size = FrameSize;
			uint8_t *Data = MC.Buffer;
			AVCodecContext *AudioCodecContext = AudioContexts[Track].CTX;

			while (Size > 0) {
				int dbsize = AVCODEC_MAX_AUDIO_FRAME_SIZE*10;
				int Ret = avcodec_decode_audio2(AudioCodecContext, db, &dbsize, Data, Size);
				if (Ret < 0) {
					if (IgnoreDecodeErrors) {
						(*TrackIndices)[Track].clear();
						IndexMask &= ~(1 << Track);
						break;
					} else {
						_snprintf(ErrorMsg, MsgSize, "Audio decoding error");
						delete TrackIndices;
						return NULL;
					}
				}

				if (Ret > 0) {
					Size -= Ret;
					Data += Ret;
				}

				if (dbsize > 0)
					AudioContexts[Track].CurrentSample += (dbsize * 8) / (av_get_bits_per_sample_format(AudioCodecContext->sample_fmt) * AudioCodecContext->channels);

				if (dbsize > 0 && (DumpMask & (1 << Track))) {
					// Delay writer creation until after an audio frame has been decoded. This ensures that all parameters are known when writing the headers.
					if (!AudioContexts[Track].W64W) {
						char ABuf[50];
						std::string WN(AudioFile);
						int Offset = StartTime * mkv_TruncFloat(mkv_GetTrackInfo(MF, Track)->TimecodeScale) / (double)1000000;
						_snprintf(ABuf, sizeof(ABuf), ".%02d.delay.%d.w64", Track, Offset);
						WN += ABuf;

						AudioContexts[Track].W64W = new Wave64Writer(WN.c_str(), av_get_bits_per_sample_format(AudioCodecContext->sample_fmt),
							AudioCodecContext->channels, AudioCodecContext->sample_rate, AudioFMTIsFloat(AudioCodecContext->sample_fmt));
					}

					AudioContexts[Track].W64W->WriteData(db, dbsize);
				}
			}
		}
	}

	SortTrackIndices(TrackIndices);
	return TrackIndices;
}

FrameIndex *MakeIndex(const char *SourceFile, int IndexMask, int DumpMask, const char *AudioFile, bool IgnoreDecodeErrors, IndexCallback IP, void *Private, char *ErrorMsg, unsigned MsgSize) {
	AVFormatContext *FormatContext = NULL;
	IndexMask |= DumpMask;

	if (av_open_input_file(&FormatContext, SourceFile, NULL, 0, NULL) != 0) {
		_snprintf(ErrorMsg, MsgSize, "Can't open '%s'", SourceFile);
		return NULL;
	}

	// Do matroska indexing instead?
	if (!strcmp(FormatContext->iformat->name, "matroska")) {
		av_close_input_file(FormatContext);
		return MakeMatroskaIndex(SourceFile, IndexMask, DumpMask, AudioFile, IgnoreDecodeErrors, IP, Private, ErrorMsg, MsgSize);
	}

	if (av_find_stream_info(FormatContext) < 0) {
		av_close_input_file(FormatContext);
		_snprintf(ErrorMsg, MsgSize, "Couldn't find stream information");
		return NULL;
	}

	// Audio stuff

	int16_t *db;
	FFAudioContext *AudioContexts;
	FFIndexMemory IM = FFIndexMemory(FormatContext->nb_streams, db, AudioContexts);

	for (unsigned int i = 0; i < FormatContext->nb_streams; i++) {
		if (IndexMask & (1 << i) && FormatContext->streams[i]->codec->codec_type == CODEC_TYPE_AUDIO) {
			AVCodecContext *AudioCodecContext = FormatContext->streams[i]->codec;

			AVCodec *AudioCodec = avcodec_find_decoder(AudioCodecContext->codec_id);
			if (AudioCodec == NULL) {
				_snprintf(ErrorMsg, MsgSize, "Audio codec not found");
				return NULL;
			}

			if (avcodec_open(AudioCodecContext, AudioCodec) < 0) {
				_snprintf(ErrorMsg, MsgSize, "Could not open audio codec");
				return NULL;
			}
		} else {
			IndexMask &= ~(1 << i);
		}
	}

	//

	FrameIndex *TrackIndices = new FrameIndex();
	TrackIndices->Decoder = 0;

	for (unsigned int i = 0; i < FormatContext->nb_streams; i++)
		TrackIndices->push_back(FrameInfoVector(FormatContext->streams[i]->time_base.num * 1000,
		FormatContext->streams[i]->time_base.den,
		FormatContext->streams[i]->codec->codec_type));

	AVPacket Packet;
	while (av_read_frame(FormatContext, &Packet) >= 0) {
		// Update progress
		if (IP) {
			if ((*IP)(0, FormatContext->pb->pos, FormatContext->file_size, Private)) {
				_snprintf(ErrorMsg, MsgSize, "Cancelled by user");
				delete TrackIndices;
				return NULL;
			}
		}

		// Only create index entries for video for now to save space
		if (FormatContext->streams[Packet.stream_index]->codec->codec_type == CODEC_TYPE_VIDEO) {
			(*TrackIndices)[Packet.stream_index].push_back(FrameInfo(Packet.dts, (Packet.flags & PKT_FLAG_KEY) ? 1 : 0));
		} else if (FormatContext->streams[Packet.stream_index]->codec->codec_type == CODEC_TYPE_AUDIO && (IndexMask & (1 << Packet.stream_index))) {
			(*TrackIndices)[Packet.stream_index].push_back(FrameInfo(Packet.dts, AudioContexts[Packet.stream_index].CurrentSample, (Packet.flags & PKT_FLAG_KEY) ? 1 : 0));
			AVCodecContext *AudioCodecContext = FormatContext->streams[Packet.stream_index]->codec;
			int Size = Packet.size;
			uint8_t *Data = Packet.data;

			while (Size > 0) {
				int dbsize = AVCODEC_MAX_AUDIO_FRAME_SIZE*10;
				int Ret = avcodec_decode_audio2(AudioCodecContext, db, &dbsize, Data, Size);
				if (Ret < 0) {
					if (IgnoreDecodeErrors) {
						(*TrackIndices)[Packet.stream_index].clear();
						IndexMask &= ~(1 << Packet.stream_index);
						break;
					} else {
						_snprintf(ErrorMsg, MsgSize, "Audio decoding error");
						delete TrackIndices;
						return NULL;
					}
				}

				if (Ret > 0) {
					Size -= Ret;
					Data += Ret;
				}

				if (dbsize > 0)
					AudioContexts[Packet.stream_index].CurrentSample += (dbsize * 8) / (av_get_bits_per_sample_format(AudioCodecContext->sample_fmt) * AudioCodecContext->channels);

				if (dbsize > 0 && (DumpMask & (1 << Packet.stream_index))) {
					// Delay writer creation until after an audio frame has been decoded. This ensures that all parameters are known when writing the headers.
					if (!AudioContexts[Packet.stream_index].W64W) {
						char ABuf[50];
						std::string WN(AudioFile);
						int Offset = (Packet.dts * FormatContext->streams[Packet.stream_index]->time_base.num)
							/ (double)(FormatContext->streams[Packet.stream_index]->time_base.den * 1000);
						_snprintf(ABuf, sizeof(ABuf), ".%02d.delay.%d.w64", Packet.stream_index, Offset);
						WN += ABuf;

						AudioContexts[Packet.stream_index].W64W = new Wave64Writer(WN.c_str(), av_get_bits_per_sample_format(AudioCodecContext->sample_fmt),
							AudioCodecContext->channels, AudioCodecContext->sample_rate, AudioFMTIsFloat(AudioCodecContext->sample_fmt));
					}

					AudioContexts[Packet.stream_index].W64W->WriteData(db, dbsize);
				}
			}
		}

		av_free_packet(&Packet);
	}

	av_close_input_file(FormatContext);

	SortTrackIndices(TrackIndices);
	return TrackIndices;
}

FrameIndex *ReadIndex(const char *IndexFile, char *ErrorMsg, unsigned MsgSize) {
	std::ifstream Index(IndexFile, std::ios::in | std::ios::binary);

	if (!Index.is_open()) {
		_snprintf(ErrorMsg, MsgSize, "Failed to open '%s' for reading", IndexFile);
		return NULL;
	}

	// Read the index file header
	IndexHeader IH;
	Index.read(reinterpret_cast<char *>(&IH), sizeof(IH));
	if (IH.Id != INDEXID) {
		_snprintf(ErrorMsg, MsgSize, "'%s' is not a valid index file", IndexFile);
		return NULL;
	}

	if (IH.Version != INDEXVERSION) {
		_snprintf(ErrorMsg, MsgSize, "'%s' is not the expected index version", IndexFile);
		return NULL;
	}

	FrameIndex *TrackIndices = new FrameIndex();

	try {

		TrackIndices->Decoder = IH.Decoder;

		for (unsigned int i = 0; i < IH.Tracks; i++) {
			// Read how many records belong to the current stream
			int TT;
			Index.read(reinterpret_cast<char *>(&TT), sizeof(TT));
			int Num;
			Index.read(reinterpret_cast<char *>(&Num), sizeof(Num));
			int Den;
			Index.read(reinterpret_cast<char *>(&Den), sizeof(Den));
			size_t Frames;
			Index.read(reinterpret_cast<char *>(&Frames), sizeof(Frames));
			TrackIndices->push_back(FrameInfoVector(Num, Den, TT));

			FrameInfo FI(0, false);
			for (size_t j = 0; j < Frames; j++) {
				Index.read(reinterpret_cast<char *>(&FI), sizeof(FrameInfo));
				TrackIndices->at(i).push_back(FI);
			}
		}

	} catch (...) {
		delete TrackIndices;
		_snprintf(ErrorMsg, MsgSize, "Unknown error while reading index information in '%s'", IndexFile);
		return NULL;
	}

	return TrackIndices;
}

int FrameInfoVector::WriteTimecodes(const char *TimecodeFile, char *ErrorMsg, unsigned MsgSize) {
	std::ofstream Timecodes(TimecodeFile, std::ios::out | std::ios::trunc);

	if (!Timecodes.is_open()) {
		_snprintf(ErrorMsg, MsgSize, "Failed to open '%s' for writing", TimecodeFile);
		return 1;
	}

	Timecodes << "# timecode format v2\n";

	for (iterator Cur=begin(); Cur!=end(); Cur++)
		Timecodes << (int64_t)((Cur->DTS * TB.Num) / (double)TB.Den) << "\n";

	return 0;
}

int FrameInfoVector::FrameFromDTS(int64_t DTS) {
	for (int i = 0; i < static_cast<int>(size()); i++)
		if (at(i).DTS == DTS)
			return i;
	return -1;
}

int FrameInfoVector::ClosestFrameFromDTS(int64_t DTS) {
	int Frame = 0;
	int64_t BestDiff = 0xFFFFFFFFFFFFFFLL; // big number
	for (int i = 0; i < static_cast<int>(size()); i++) {
		int64_t CurrentDiff = FFABS(at(i).DTS - DTS);
		if (CurrentDiff < BestDiff) {
			BestDiff = CurrentDiff;
			Frame = i;
		}
	}
	return Frame;
}

int FrameInfoVector::FindClosestKeyFrame(int Frame) {
	Frame = FFMIN(FFMAX(Frame, 0), size() - 1);
	for (int i = Frame; i > 0; i--)
		if (at(i).KeyFrame)
			return i;
	return 0;
}

FrameInfoVector::FrameInfoVector() {
	this->TT = 0;
	this->TB.Num = 0;
	this->TB.Den = 0;
}

FrameInfoVector::FrameInfoVector(int Num, int Den, int TT) {
	this->TT = TT;
	this->TB.Num = Num;
	this->TB.Den = Den;
}