Aegisub/aegisub/libffms/src/core/audiosource.cpp

//  Copyright (c) 2010 Thomas Goyne <tgoyne@gmail.com>
//
//  Permission is hereby granted, free of charge, to any person obtaining a copy
//  of this software and associated documentation files (the "Software"), to deal
//  in the Software without restriction, including without limitation the rights
//  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
//  copies of the Software, and to permit persons to whom the Software is
//  furnished to do so, subject to the following conditions:
//
//  The above copyright notice and this permission notice shall be included in
//  all copies or substantial portions of the Software.
//
//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
//  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
//  THE SOFTWARE.

#include "audiosource.h"

#include <algorithm>
#include <cassert>

FFMS_AudioSource::FFMS_AudioSource(const char *SourceFile, FFMS_Index &Index, int Track)
: Delay(0)
, MaxCacheBlocks(50)
, BytesPerSample(0)
, Decoded(0)
, CurrentSample(-1)
, PacketNumber(0)
, CurrentFrame(NULL)
, TrackNumber(Track)
, SeekOffset(0)
, DecodingBuffer(AVCODEC_MAX_AUDIO_FRAME_SIZE * 10)
{
	if (Track < 0 || Track >= static_cast<int>(Index.size()))
		throw FFMS_Exception(FFMS_ERROR_INDEX, FFMS_ERROR_INVALID_ARGUMENT,
			"Out of bounds track index selected");

	if (Index[Track].TT != FFMS_TYPE_AUDIO)
		throw FFMS_Exception(FFMS_ERROR_INDEX, FFMS_ERROR_INVALID_ARGUMENT,
			"Not an audio track");

	if (Index[Track].empty())
		throw FFMS_Exception(FFMS_ERROR_INDEX, FFMS_ERROR_INVALID_ARGUMENT,
			"Audio track contains no audio frames");

	Frames = Index[Track];

	if (!Index.CompareFileSignature(SourceFile))
		throw FFMS_Exception(FFMS_ERROR_INDEX, FFMS_ERROR_FILE_MISMATCH,
			"The index does not match the source file");
}
void FFMS_AudioSource::Init(FFMS_Index &Index, int DelayMode) {
	// The first packet after a seek is often decoded incorrectly, which
	// makes it impossible to ever correctly seek back to the beginning, so
	// store the first block now

	// In addition, anything with the same PTS as the first packet can't be
	// distinguished from the first packet and so can't be seeked to, so
	// store those as well

	// Some of LAVF's splitters don't like to seek to the beginning of the
	// file (ts and?), so cache a few blocks even if PTSes are unique
	// Packet 7 is the last packet I've had be unseekable to, so cache up to
	// 10 for a bit of an extra buffer
	CacheIterator end = Cache.end();
	while (PacketNumber < Frames.size() &&
		((Frames[0].PTS != ffms_av_nopts_value && Frames[PacketNumber].PTS == Frames[0].PTS) ||
		 Cache.size() < 10)) {

		DecodeNextBlock();
		if (Decoded)
			CacheBlock(end, CurrentSample, Decoded, &DecodingBuffer[0]);
	}
	// Store the iterator to the last element of the cache which is used for
	// correctness rather than speed, so that when looking for one to delete
	// we know how much to skip
	CacheNoDelete = Cache.end();
	--CacheNoDelete;

	// Read properties of the audio which may not be available until the first
	// frame has been decoded
	FillAP(AP, CodecContext, Frames);

	if (AP.SampleRate <= 0 || AP.BitsPerSample <= 0)
		throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
			"Codec returned zero size audio");

	if (DelayMode < FFMS_DELAY_NO_SHIFT)
		throw FFMS_Exception(FFMS_ERROR_INDEX, FFMS_ERROR_INVALID_ARGUMENT,
			"Bad audio delay compensation mode");

	if (DelayMode == FFMS_DELAY_NO_SHIFT) return;

	if (DelayMode > (signed)Index.size())
		throw FFMS_Exception(FFMS_ERROR_INDEX, FFMS_ERROR_INVALID_ARGUMENT,
			"Out of bounds track index selected for audio delay compensation");

	if (DelayMode >= 0 && Index[DelayMode].TT != FFMS_TYPE_VIDEO)
		throw FFMS_Exception(FFMS_ERROR_INDEX, FFMS_ERROR_INVALID_ARGUMENT,
			"Audio delay compensation must be relative to a video track");

	double AdjustRelative = 0;
	if (DelayMode != FFMS_DELAY_TIME_ZERO) {
		if (DelayMode == FFMS_DELAY_FIRST_VIDEO_TRACK) {
			for (size_t i = 0; i < Index.size(); ++i) {
				if (Index[i].TT == FFMS_TYPE_VIDEO) {
					DelayMode = i;
					break;
				}
			}
		}

		if (DelayMode >= 0) {
			const FFMS_Track &VTrack = Index[DelayMode];
			AdjustRelative = VTrack[0].PTS * VTrack.TB.Num / (double)VTrack.TB.Den / 1000.;
		}
	}

	Delay = static_cast<int64_t>((AdjustRelative - Frames[0].PTS) * AP.SampleRate + .5);
	AP.NumSamples -= Delay;
}

void FFMS_AudioSource::CacheBlock(CacheIterator &pos, int64_t Start, size_t Samples, uint8_t *SrcData) {
	Cache.insert(pos, AudioBlock(Start, Samples, SrcData, Samples * BytesPerSample));

	if (Cache.size() >= MaxCacheBlocks) {
		// Kill the oldest one
		CacheIterator min = CacheNoDelete;
		// Never drop the first one as the first packet decoded after a seek
		// is often decoded incorrectly and we can't seek to before the first one
		++min;
		for (CacheIterator it = min; it != Cache.end(); ++it)
			if (it->Age < min->Age) min = it;
		if (min == pos) ++pos;
		Cache.erase(min);
	}
}

void FFMS_AudioSource::DecodeNextBlock() {
	if (BytesPerSample == 0) BytesPerSample = (av_get_bits_per_sample_fmt(CodecContext->sample_fmt) * CodecContext->channels) / 8;

	CurrentFrame = &Frames[PacketNumber];

	AVPacket Packet;
	if (!ReadPacket(&Packet))
		throw FFMS_Exception(FFMS_ERROR_PARSER, FFMS_ERROR_UNKNOWN, "ReadPacket unexpectedly failed to read a packet");

	// ReadPacket may have changed the packet number
	CurrentFrame = &Frames[PacketNumber];
	CurrentSample = CurrentFrame->SampleStart;
	++PacketNumber;

	uint8_t *Buf = &DecodingBuffer[0];
	uint8_t *Data = Packet.data;
	while (Packet.size > 0) {
		int TempOutputBufSize = AVCODEC_MAX_AUDIO_FRAME_SIZE * 10 - (Buf - &DecodingBuffer[0]);
		int Ret = avcodec_decode_audio3(CodecContext, (int16_t *)Buf, &TempOutputBufSize, &Packet);

		// Should only ever happen if the user chose to ignore decoding errors
		// during indexing, so continue to just ignore decoding errors
		if (Ret < 0) break;

		if (Ret > 0) {
			Packet.size -= Ret;
			Packet.data += Ret;
			Buf += TempOutputBufSize;
		}
	}
	Packet.data = Data;
	FreePacket(&Packet);

	Decoded = (Buf - &DecodingBuffer[0]) / BytesPerSample;
	if (Decoded == 0) {
		// zero sample packets aren't included in the index so we didn't
		// actually move to the next packet
		--PacketNumber;
	}
}

static bool SampleStartComp(const TFrameInfo &a, const TFrameInfo &b) {
	return a.SampleStart < b.SampleStart;
}

void FFMS_AudioSource::GetAudio(void *Buf, int64_t Start, int64_t Count) {
	if (Start < 0 || Start + Count > AP.NumSamples || Count < 0)
		throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_INVALID_ARGUMENT,
			"Out of bounds audio samples requested");

	uint8_t *Dst = static_cast<uint8_t*>(Buf);

	// Apply audio delay (if any) and fill any samples before the start time with zero
	Start -= Delay;
	if (Start < 0) {
		size_t Bytes = static_cast<size_t>(BytesPerSample * FFMIN(-Start, Count));
		memset(Dst, 0, Bytes);

		Count += Start;
		// Entire request was before the start of the audio
		if (Count <= 0) return;

		Start = 0;
		Dst += Bytes;
	}

	CacheIterator it = Cache.begin();

	while (Count > 0) {
		// Find first useful cache block
		while (it != Cache.end() && it->Start + it->Samples <= Start) ++it;

		// Cache has the next block we want
		if (it != Cache.end() && it->Start <= Start) {
			int64_t SrcOffset = FFMAX(0, Start - it->Start);
			int64_t DstOffset = FFMAX(0, it->Start - Start);
			int64_t CopySamples = FFMIN(it->Samples - SrcOffset, Count - DstOffset);
			size_t Bytes = static_cast<size_t>(CopySamples * BytesPerSample);

			memcpy(Dst + DstOffset * BytesPerSample, &it->Data[SrcOffset * BytesPerSample], Bytes);
			Start += CopySamples;
			Count -= CopySamples;
			Dst += Bytes;
			++it;
		}
		// Decode another block
		else {
			if (Start < CurrentSample && SeekOffset == -1)
				throw FFMS_Exception(FFMS_ERROR_SEEKING, FFMS_ERROR_CODEC, "Audio stream is not seekable");

			if (SeekOffset >= 0 && (Start < CurrentSample || Start > CurrentSample + Decoded * 5)) {
				TFrameInfo f;
				f.SampleStart = Start;
				int NewPacketNumber = std::distance(Frames.begin(), std::lower_bound(Frames.begin(), Frames.end(), f, SampleStartComp));
				NewPacketNumber = FFMAX(0, NewPacketNumber - SeekOffset - 15);
				while (NewPacketNumber > 0 && !Frames[NewPacketNumber].KeyFrame) --NewPacketNumber;

				// Only seek forward if it'll actually result in moving forward
				if (Start < CurrentSample || static_cast<size_t>(NewPacketNumber) > PacketNumber) {
					PacketNumber = NewPacketNumber;
					Decoded = 0;
					CurrentSample = -1;
					avcodec_flush_buffers(CodecContext);
					Seek();
				}
			}

			// Decode everything between the last keyframe and the block we want
			while (CurrentSample + Decoded <= Start) DecodeNextBlock();
			if (CurrentSample > Start)
				throw FFMS_Exception(FFMS_ERROR_SEEKING, FFMS_ERROR_CODEC, "Seeking is severely broken");

			CacheBlock(it, CurrentSample, Decoded, &DecodingBuffer[0]);

			size_t FirstSample = static_cast<size_t>(Start - CurrentSample);
			size_t Samples = static_cast<size_t>(Decoded - FirstSample);
			size_t Bytes = FFMIN(Samples, static_cast<size_t>(Count)) * BytesPerSample;

			memcpy(Dst, &DecodingBuffer[FirstSample * BytesPerSample], Bytes);

			Start += Samples;
			Count -= Samples;
			Dst += Bytes;
		}
	}
}

size_t GetSeekablePacketNumber(FFMS_Track const& Frames, size_t PacketNumber) {
	// Packets don't always have unique PTSes, so we may not be able to
	// uniquely identify the packet we want. This function attempts to find
	// a PTS we can seek to which will let us figure out which packet we're
	// on before we get to the packet we actually wanted

	// MatroskaAudioSource doesn't need this, as it seeks by byte offset
	// rather than PTS. LAVF theoretically can seek by byte offset, but we
	// don't use it as not all demuxers support it and it's broken in some of
	// those that claim to support it

	// However much we might wish to, we can't seek to before packet zero
	if (PacketNumber == 0) return PacketNumber;

	// Desired packet's PTS is unique, so don't do anything
	if (Frames[PacketNumber].PTS != Frames[PacketNumber - 1].PTS &&
		(PacketNumber + 1 == Frames.size() || Frames[PacketNumber].PTS != Frames[PacketNumber + 1].PTS))
		return PacketNumber;

	// When decoding, we only reliably know what packet we're at when the
	// newly parsed packet has a different PTS from the previous one. As such,
	// we walk backwards until we hit a different PTS and then seek to there,
	// so that we can then decode until we hit the PTS group we actually wanted
	// (and thereby know that we're at the first packet in the group rather
	// than whatever the splitter happened to choose)

	// This doesn't work if our desired packet has the same PTS as the first
	// packet, but this scenario should never come up anyway; we permanently
	// cache the decoded results from those packets, so there's no need to ever
	// seek to them
	int64_t PTS = Frames[PacketNumber].PTS;
	while (PacketNumber > 0 && PTS == Frames[PacketNumber].PTS)
		--PacketNumber;
	return PacketNumber;
}