// Copyright 2003-2006 Gabest
// http://www.gabest.org
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
// http://www.gnu.org/copyleft/gpl.html

#include "stdafx.h"
#include <math.h>
#include "DirectVobSubFilter.h"
#include "dsutil/DSUtil.h"
#include "dsutil/MediaTypes.h"

#include <initguid.h>
#include "include/moreuuids.h"

extern int c2y_yb[256];
extern int c2y_yg[256];
extern int c2y_yr[256];
extern void ColorConvInit();

void BltLineRGB32(DWORD* d, BYTE* sub, int w, const GUID& subtype)
{
	if(subtype == MEDIASUBTYPE_YV12 || subtype == MEDIASUBTYPE_I420 || subtype == MEDIASUBTYPE_IYUV)
	{
		BYTE* db = (BYTE*)d;
		BYTE* dbtend = db + w;

		for(; db < dbtend; sub+=4, db++)
		{
			if(sub[3] < 0xff)
			{
				int y = (c2y_yb[sub[0]] + c2y_yg[sub[1]] + c2y_yr[sub[2]] + 0x108000) >> 16; 
				*db = y; // w/o colors 
			}
		}
	}
	else if(subtype == MEDIASUBTYPE_YUY2)
	{
		WORD* ds = (WORD*)d;
		WORD* dstend = ds + w;

		for(; ds < dstend; sub+=4, ds++)
		{
			if(sub[3] < 0xff)
			{
				int y = (c2y_yb[sub[0]] + c2y_yg[sub[1]] + c2y_yr[sub[2]] + 0x108000) >> 16; 
				*ds = 0x8000|y; // w/o colors 
			}
		}
	}
	else if(subtype == MEDIASUBTYPE_RGB555)
	{
		WORD* ds = (WORD*)d;
		WORD* dstend = ds + w;

		for(; ds < dstend; sub+=4, ds++)
		{
			if(sub[3] < 0xff)
			{
				*ds = ((*((DWORD*)sub)>>9)&0x7c00)|((*((DWORD*)sub)>>6)&0x03e0)|((*((DWORD*)sub)>>3)&0x001f);
			}
		}
	}
	else if(subtype == MEDIASUBTYPE_RGB565)
	{
		WORD* ds = (WORD*)d;
		WORD* dstend = ds + w;

		for(; ds < dstend; sub+=4, ds++)
		{
			if(sub[3] < 0xff)
			{
				*ds = ((*((DWORD*)sub)>>8)&0xf800)|((*((DWORD*)sub)>>5)&0x07e0)|((*((DWORD*)sub)>>3)&0x001f);
			}
		}
	}
	else if(subtype == MEDIASUBTYPE_RGB24)
	{
		BYTE* dt = (BYTE*)d;
		BYTE* dstend = dt + w*3;

		for(; dt < dstend; sub+=4, dt+=3)
		{
			if(sub[3] < 0xff)
			{
				dt[0] = sub[0];
				dt[1] = sub[1];
				dt[2] = sub[2];
			}
		}
	}
	else if(subtype == MEDIASUBTYPE_RGB32 || subtype == MEDIASUBTYPE_ARGB32)
	{
		DWORD* dstend = d + w;

		for(; d < dstend; sub+=4, d++)
		{
			if(sub[3] < 0xff) *d = *((DWORD*)sub)&0xffffff;
		}
	}
}

/* ResX2 */
void Scale2x(const GUID& subtype, BYTE* d, int dpitch, BYTE* s, int spitch, int w, int h)
{
	if(subtype == MEDIASUBTYPE_YV12 || subtype == MEDIASUBTYPE_I420 || subtype == MEDIASUBTYPE_IYUV)
	{
		BYTE* s1;
		BYTE* s2;
		BYTE* d1;

		for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch) // TODO: replace this mess with mmx code
		{
			BYTE* stmp = s1 + spitch;
			BYTE* dtmp = d1 + dpitch;

			for(BYTE* s3 = s1 + (w-1); s1 < s3; s1 += 1, d1 += 2)
			{
				d1[0] = s1[0]; 
				d1[1] = (s1[0]+s1[1])>>1;
			}

			d1[0] = d1[1] = s1[0]; 

			s1 += 1;
			d1 += 2;

			s1 = stmp;
			d1 = dtmp;
		}

		AvgLines8(d, h*2, dpitch);
	}
	else if(subtype == MEDIASUBTYPE_YUY2)
	{
		unsigned __int64 __0xffffffff00000000 = 0xffffffff00000000;
		unsigned __int64 __0x00000000ffffffff = 0x00000000ffffffff;
		unsigned __int64 __0x00ff00ff00ff00ff = 0x00ff00ff00ff00ff;

		BYTE* s1;
		BYTE* s2;
		BYTE* d1;

		for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch)
		{
			BYTE* stmp = s1 + spitch;
			BYTE* dtmp = d1 + dpitch;

			// row0, 4 pixels: y1|u1|y2|v1|y3|u2|y4|v2
			// ->
			// row0, 8 pixels: y1|u1|(y1+y2)/2|v1|y2|(u1+u2)/2|(y2+y3)/2|(v1+v2)/2

			__asm
			{
				mov		esi, s1
				mov		edi, d1

				mov		ecx, w
				shr		ecx, 1
				dec		ecx

				movq	mm4, __0x00ff00ff00ff00ff
				movq	mm5, __0x00000000ffffffff
				movq	mm6, __0xffffffff00000000
row_loop1:
				movq	mm0, [esi]
				movq	mm2, mm0

				pand	mm0, mm4	// mm0 = 00y400y300y200y1
				psrlw	mm2, 8		// mm2 = 00u200v200u100v1


				movq	mm1, mm0

				pand	mm0, mm5	// mm0 = 0000000000y200y1

				psllq	mm1, 16
				pand	mm1, mm6	// mm1 = 00y300y200000000

				por		mm1, mm0	// mm1 = 00y300y200y200y1

				punpcklwd mm0, mm0	// mm0 = 00y200y200y100y1

				paddw	mm0, mm1
				psrlw	mm0, 1		// mm0 = (mm0 + mm1) / 2


				movq	mm1, mm2
				punpckldq	mm1, mm1 // mm1 = 00u100v100u100v1

				paddw	mm1, mm2
				psrlw	mm1, 1		// mm1 = (mm1 + mm2) / 2


				psllw	mm1, 8
				por		mm0, mm1	// mm0 = (v1+v2)/2|(y2+y3)/2|(u1+u2)/2|y2|v1|(y1+y2)/2|u1|y1

				movq	[edi], mm0

				lea		esi, [esi+4]
				lea		edi, [edi+8]

				dec		ecx
				jnz		row_loop1

				mov		s1, esi
				mov		d1, edi
			};

			*d1++ = s1[0];
			*d1++ = s1[1];
			*d1++ =(s1[0]+s1[2])>>1;
			*d1++ = s1[3];

			*d1++ = s1[2];
			*d1++ = s1[1];
			*d1++ = s1[2];
			*d1++ = s1[3];

			s1 += 4;

			s1 = stmp;
			d1 = dtmp;
		}

		AvgLines8(d, h*2, dpitch);
	}
	else if(subtype == MEDIASUBTYPE_RGB555)
	{
		BYTE* s1;
		BYTE* s2;
		BYTE* d1;

		for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch) // TODO: replace this mess with mmx code
		{
			BYTE* stmp = s1 + spitch;
			BYTE* dtmp = d1 + dpitch;

			for(BYTE* s3 = s1 + (w-1)*2; s1 < s3; s1 += 2, d1 += 4)
			{
				*((WORD*)d1) = *((WORD*)s1);
				*((WORD*)d1+1) = 
					((((*((WORD*)s1)&0x7c00) + (*((WORD*)s1+1)&0x7c00)) >> 1)&0x7c00)|
					((((*((WORD*)s1)&0x03e0) + (*((WORD*)s1+1)&0x03e0)) >> 1)&0x03e0)|
					((((*((WORD*)s1)&0x001f) + (*((WORD*)s1+1)&0x001f)) >> 1)&0x001f);
			}

			*((WORD*)d1) = *((WORD*)s1);
			*((WORD*)d1+1) = *((WORD*)s1);

			s1 += 2;
			d1 += 4;

			s1 = stmp;
			d1 = dtmp;
		}

		AvgLines555(d, h*2, dpitch);
	}
	else if(subtype == MEDIASUBTYPE_RGB565)
	{
		BYTE* s1;
		BYTE* s2;
		BYTE* d1;

		for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch) // TODO: replace this mess with mmx code
		{
			BYTE* stmp = s1 + spitch;
			BYTE* dtmp = d1 + dpitch;

			for(BYTE* s3 = s1 + (w-1)*2; s1 < s3; s1 += 2, d1 += 4)
			{
				*((WORD*)d1) = *((WORD*)s1);
				*((WORD*)d1+1) = 
					((((*((WORD*)s1)&0xf800) + (*((WORD*)s1+1)&0xf800)) >> 1)&0xf800)|
					((((*((WORD*)s1)&0x07e0) + (*((WORD*)s1+1)&0x07e0)) >> 1)&0x07e0)|
					((((*((WORD*)s1)&0x001f) + (*((WORD*)s1+1)&0x001f)) >> 1)&0x001f);
			}

			*((WORD*)d1) = *((WORD*)s1);
			*((WORD*)d1+1) = *((WORD*)s1);

			s1 += 2;
			d1 += 4;

			s1 = stmp;
			d1 = dtmp;
		}

		AvgLines565(d, h*2, dpitch);
	}
	else if(subtype == MEDIASUBTYPE_RGB24)
	{
		BYTE* s1;
		BYTE* s2;
		BYTE* d1;

		for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch) // TODO: replace this mess with mmx code
		{
			BYTE* stmp = s1 + spitch;
			BYTE* dtmp = d1 + dpitch;

			for(BYTE* s3 = s1 + (w-1)*3; s1 < s3; s1 += 3, d1 += 6)
			{
				d1[0] = s1[0]; 
				d1[1] = s1[1]; 
				d1[2] = s1[2];
				d1[3] = (s1[0]+s1[3])>>1;
				d1[4] = (s1[1]+s1[4])>>1;
				d1[5] = (s1[2]+s1[5])>>1;
			}

			d1[0] = d1[3] = s1[0]; 
			d1[1] = d1[4] = s1[1]; 
			d1[2] = d1[5] = s1[2];

			s1 += 3;
			d1 += 6;

			s1 = stmp;
			d1 = dtmp;
		}

		AvgLines8(d, h*2, dpitch);
	}
	else if(subtype == MEDIASUBTYPE_RGB32 || subtype == MEDIASUBTYPE_ARGB32)
	{
		BYTE* s1;
		BYTE* s2;
		BYTE* d1;

		for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch)
		{
			BYTE* stmp = s1 + spitch;
			BYTE* dtmp = d1 + dpitch;

			__asm
			{
				mov		esi, s1
				mov		edi, d1

				mov		ecx, w
				dec		ecx

				pxor	mm0, mm0
row_loop3:
				movq	mm1, [esi]
				movq	mm2, mm1

				punpcklbw mm1, mm0	// mm1 = 00xx00r100g100b1
				punpckhbw mm2, mm0	// mm2 = 00xx00r200g200b2

				paddw	mm2, mm1
				psrlw	mm2, 1		// mm2 = (mm1 + mm2) / 2

				packuswb	mm1, mm2

				movq	[edi], mm1

				lea		esi, [esi+4]
				lea		edi, [edi+8]

				dec		ecx
				jnz		row_loop3

				mov		s1, esi
				mov		d1, edi
			};

			*((DWORD*)d1) = *((DWORD*)s1);
			*((DWORD*)d1+1) = *((DWORD*)s1);

			s1 += 4;
			d1 += 8;

			s1 = stmp;
			d1 = dtmp;
		}

		AvgLines8(d, h*2, dpitch);
	}

	__asm emms;
}

HRESULT CDirectVobSubFilter::Copy(BYTE* pSub, BYTE* pIn, CSize sub, CSize in, int bpp, const GUID& subtype, DWORD black)
{
	int wIn = in.cx, hIn = in.cy, pitchIn = wIn*bpp>>3;
	int wSub = sub.cx, hSub = sub.cy, pitchSub = wSub*bpp>>3;
	bool fScale2x = wIn*2 <= wSub;

	if(fScale2x) wIn <<= 1, hIn <<= 1;

	int left = ((wSub - wIn)>>1)&~1;
	int mid = wIn;
	int right = left + ((wSub - wIn)&1);

	int dpLeft = left*bpp>>3;
	int dpMid = mid*bpp>>3;
	int dpRight = right*bpp>>3;

	ASSERT(wSub >= wIn);

	{
		int i = 0, j = 0;

		j += (hSub - hIn) >> 1;

		for(; i < j; i++, pSub += pitchSub)
		{
			memsetd(pSub, black, dpLeft+dpMid+dpRight);
		}

		j += hIn;

		if(hIn > hSub)
			pIn += pitchIn * ((hIn - hSub) >> (fScale2x?2:1));

		if(fScale2x)
		{
			Scale2x(subtype, 
				pSub + dpLeft, pitchSub, pIn, pitchIn, 
				in.cx, (min(j, hSub) - i) >> 1);
            
			for(int k = min(j, hSub); i < k; i++, pIn += pitchIn, pSub += pitchSub)
			{
				memsetd(pSub, black, dpLeft);
				memsetd(pSub + dpLeft+dpMid, black, dpRight);
			}
		}
		else
		{
			for(int k = min(j, hSub); i < k; i++, pIn += pitchIn, pSub += pitchSub)
			{
				memsetd(pSub, black, dpLeft);
				memcpy(pSub + dpLeft, pIn, dpMid);
				memsetd(pSub + dpLeft+dpMid, black, dpRight);
			}
		}

		j = hSub;

		for(; i < j; i++, pSub += pitchSub)
		{
			memsetd(pSub, black, dpLeft+dpMid+dpRight);
		}
	}

	return NOERROR;
}

void CDirectVobSubFilter::PrintMessages(BYTE* pOut)
{
	if(!m_hdc || !m_hbm)
		return;

	ColorConvInit();

	const GUID& subtype = m_pOutput->CurrentMediaType().subtype;

	BITMAPINFOHEADER bihOut;
	ExtractBIH(&m_pOutput->CurrentMediaType(), &bihOut);

	CString msg, tmp;

	if(m_fOSD)
	{
		tmp.Format(_T("in: %dx%d %s\nout: %dx%d %s\n"), 
			m_w, m_h, 
			Subtype2String(m_pInput->CurrentMediaType().subtype),
			bihOut.biWidth, bihOut.biHeight, 
			Subtype2String(m_pOutput->CurrentMediaType().subtype));
		msg += tmp;

		tmp.Format(_T("real fps: %.3f, current fps: %.3f\nmedia time: %d, subtitle time: %d [ms]\nframe number: %d (calculated)\nrate: %.4f\n"), 
			m_fps, m_fMediaFPSEnabled?m_MediaFPS:fabs(m_fps),
			(int)m_tPrev.Millisecs(), (int)(CalcCurrentTime()/10000),
			(int)(m_tPrev.m_time * m_fps / 10000000),
			m_pInput->CurrentRate());
		msg += tmp;

		CAutoLock cAutoLock(&m_csQueueLock);

		if(m_pSubPicQueue)
		{
			int nSubPics = -1;
			REFERENCE_TIME rtNow = -1, rtStart = -1, rtStop = -1;
			m_pSubPicQueue->GetStats(nSubPics, rtNow, rtStart, rtStop);
			tmp.Format(_T("queue stats: %I64d - %I64d [ms]\n"), rtStart/10000, rtStop/10000);
			msg += tmp;

			for(int i = 0; i < nSubPics; i++)
			{
				m_pSubPicQueue->GetStats(i, rtStart, rtStop);
				tmp.Format(_T("%d: %I64d - %I64d [ms]\n"), i, rtStart/10000, rtStop/10000);
				msg += tmp;
			}

		}
	}

	if(msg.IsEmpty()) return;

	HANDLE hOldBitmap = SelectObject(m_hdc, m_hbm);
	HANDLE hOldFont = SelectObject(m_hdc, m_hfont);

	SetTextColor(m_hdc, 0xffffff);
	SetBkMode(m_hdc, TRANSPARENT);
	SetMapMode(m_hdc, MM_TEXT);

	BITMAP bm;
	GetObject(m_hbm, sizeof(BITMAP), &bm);

	CRect r(0, 0, bm.bmWidth, bm.bmHeight);
	DrawText(m_hdc, msg, _tcslen(msg), &r, DT_CALCRECT|DT_EXTERNALLEADING|DT_NOPREFIX|DT_WORDBREAK);

	r += CPoint(10, 10);
	r &= CRect(0, 0, bm.bmWidth, bm.bmHeight);

	DrawText(m_hdc, msg, _tcslen(msg), &r, DT_LEFT|DT_TOP|DT_NOPREFIX|DT_WORDBREAK);

	BYTE* pIn = (BYTE*)bm.bmBits;
	int pitchIn = bm.bmWidthBytes;
	int pitchOut = bihOut.biWidth * bihOut.biBitCount >> 3;

	if(subtype == MEDIASUBTYPE_YV12 || subtype == MEDIASUBTYPE_I420 || subtype == MEDIASUBTYPE_IYUV)
		pitchOut = bihOut.biWidth;

	pitchIn = (pitchIn+3)&~3;
	pitchOut = (pitchOut+3)&~3;

	if(bihOut.biHeight > 0 && bihOut.biCompression <= 3) // flip if the dst bitmap is flipped rgb (m_hbm is a top-down bitmap, not like the subpictures)
	{
		pOut += pitchOut * (abs(bihOut.biHeight)-1);
		pitchOut = -pitchOut;
	}

	pIn += pitchIn * r.top;
	pOut += pitchOut * r.top;

	for(int w = min(r.right, m_w), h = r.Height(); h--; pIn += pitchIn, pOut += pitchOut)
	{
		BltLineRGB32((DWORD*)pOut, pIn, w, subtype);
		memsetd(pIn, 0xff000000, r.right*4);
	}

	SelectObject(m_hdc, hOldBitmap);
	SelectObject(m_hdc, hOldFont);
}