/* * Copyright (C) 2003-2006 Gabest * http://www.gabest.org * * This Program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This Program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU Make; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * http://www.gnu.org/copyleft/gpl.html * */ #include "stdafx.h" #include "MemSubPic.h" // color conv unsigned char Clip_base[256*3]; unsigned char* Clip = Clip_base + 256; const int c2y_cyb = int(0.114*219/255*65536+0.5); const int c2y_cyg = int(0.587*219/255*65536+0.5); const int c2y_cyr = int(0.299*219/255*65536+0.5); const int c2y_cu = int(1.0/2.018*1024+0.5); const int c2y_cv = int(1.0/1.596*1024+0.5); int c2y_yb[256]; int c2y_yg[256]; int c2y_yr[256]; const int y2c_cbu = int(2.018*65536+0.5); const int y2c_cgu = int(0.391*65536+0.5); const int y2c_cgv = int(0.813*65536+0.5); const int y2c_crv = int(1.596*65536+0.5); int y2c_bu[256]; int y2c_gu[256]; int y2c_gv[256]; int y2c_rv[256]; const int cy_cy = int(255.0/219.0*65536+0.5); const int cy_cy2 = int(255.0/219.0*32768+0.5); bool fColorConvInitOK = false; void ColorConvInit() { if(fColorConvInitOK) return; int i; for(i = 0; i < 256; i++) { Clip_base[i] = 0; Clip_base[i+256] = i; Clip_base[i+512] = 255; } for(i = 0; i < 256; i++) { c2y_yb[i] = c2y_cyb*i; c2y_yg[i] = c2y_cyg*i; c2y_yr[i] = c2y_cyr*i; y2c_bu[i] = y2c_cbu*(i-128); y2c_gu[i] = y2c_cgu*(i-128); y2c_gv[i] = y2c_cgv*(i-128); y2c_rv[i] = y2c_crv*(i-128); } fColorConvInitOK = true; } #define rgb2yuv(r1,g1,b1,r2,g2,b2) \ int y1 = (c2y_yb[b1] + c2y_yg[g1] + c2y_yr[r1] + 0x108000) >> 16; \ int y2 = (c2y_yb[b2] + c2y_yg[g2] + c2y_yr[r2] + 0x108000) >> 16; \ \ int scaled_y = (y1+y2-32) * cy_cy2; \ \ unsigned char u = Clip[(((((b1+b2)<<15) - scaled_y) >> 10) * c2y_cu + 0x800000 + 0x8000) >> 16]; \ unsigned char v = Clip[(((((r1+r2)<<15) - scaled_y) >> 10) * c2y_cv + 0x800000 + 0x8000) >> 16]; \ // // CMemSubPic // CMemSubPic::CMemSubPic(SubPicDesc& spd) : m_spd(spd) { m_maxsize.SetSize(spd.w, spd.h); m_rcDirty.SetRect(0, 0, spd.w, spd.h); } CMemSubPic::~CMemSubPic() { delete [] m_spd.bits, m_spd.bits = NULL; } // ISubPic STDMETHODIMP_(void*) CMemSubPic::GetObject() { return (void*)&m_spd; } STDMETHODIMP CMemSubPic::GetDesc(SubPicDesc& spd) { spd.type = m_spd.type; spd.w = m_size.cx; spd.h = m_size.cy; spd.bpp = m_spd.bpp; spd.pitch = m_spd.pitch; spd.bits = m_spd.bits; spd.bitsU = m_spd.bitsU; spd.bitsV = m_spd.bitsV; spd.vidrect = m_vidrect; return S_OK; } STDMETHODIMP CMemSubPic::CopyTo(ISubPic* pSubPic) { HRESULT hr; if(FAILED(hr = __super::CopyTo(pSubPic))) return hr; SubPicDesc src, dst; if(FAILED(GetDesc(src)) || FAILED(pSubPic->GetDesc(dst))) return E_FAIL; int w = m_rcDirty.Width(), h = m_rcDirty.Height(); BYTE* s = (BYTE*)src.bits + src.pitch*m_rcDirty.top + m_rcDirty.left*4; BYTE* d = (BYTE*)dst.bits + dst.pitch*m_rcDirty.top + m_rcDirty.left*4; for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch) memcpy(d, s, w*4); return S_OK; } STDMETHODIMP CMemSubPic::ClearDirtyRect(DWORD color) { if(m_rcDirty.IsRectEmpty()) return S_FALSE; BYTE* p = (BYTE*)m_spd.bits + m_spd.pitch*m_rcDirty.top + m_rcDirty.left*(m_spd.bpp>>3); for(int j = 0, h = m_rcDirty.Height(); j < h; j++, p += m_spd.pitch) { // memsetd(p, 0, m_rcDirty.Width()); int w = m_rcDirty.Width(); __asm { mov eax, color mov ecx, w mov edi, p cld rep stosd } } m_rcDirty.SetRectEmpty(); return S_OK; } STDMETHODIMP CMemSubPic::Lock(SubPicDesc& spd) { return GetDesc(spd); } STDMETHODIMP CMemSubPic::Unlock(RECT* pDirtyRect) { m_rcDirty = pDirtyRect ? *pDirtyRect : CRect(0,0,m_spd.w,m_spd.h); if(m_rcDirty.IsRectEmpty()) return S_OK; if(m_spd.type == MSP_YUY2 || m_spd.type == MSP_YV12 || m_spd.type == MSP_IYUV || m_spd.type == MSP_AYUV) { ColorConvInit(); if(m_spd.type == MSP_YUY2 || m_spd.type == MSP_YV12 || m_spd.type == MSP_IYUV) { m_rcDirty.left &= ~1; m_rcDirty.right = (m_rcDirty.right+1)&~1; if(m_spd.type == MSP_YV12 || m_spd.type == MSP_IYUV) { m_rcDirty.top &= ~1; m_rcDirty.bottom = (m_rcDirty.bottom+1)&~1; } } } int w = m_rcDirty.Width(), h = m_rcDirty.Height(); BYTE* top = (BYTE*)m_spd.bits + m_spd.pitch*m_rcDirty.top + m_rcDirty.left*4; BYTE* bottom = top + m_spd.pitch*h; if(m_spd.type == MSP_RGB16) { for(; top < bottom ; top += m_spd.pitch) { DWORD* s = (DWORD*)top; DWORD* e = s + w; for(; s < e; s++) { *s = ((*s>>3)&0x1f000000)|((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x001f); // *s = (*s&0xff000000)|((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x001f); } } } else if(m_spd.type == MSP_RGB15) { for(; top < bottom; top += m_spd.pitch) { DWORD* s = (DWORD*)top; DWORD* e = s + w; for(; s < e; s++) { *s = ((*s>>3)&0x1f000000)|((*s>>9)&0x7c00)|((*s>>6)&0x03e0)|((*s>>3)&0x001f); // *s = (*s&0xff000000)|((*s>>9)&0x7c00)|((*s>>6)&0x03e0)|((*s>>3)&0x001f); } } } else if(m_spd.type == MSP_YUY2 || m_spd.type == MSP_YV12 || m_spd.type == MSP_IYUV) { for(; top < bottom ; top += m_spd.pitch) { BYTE* s = top; BYTE* e = s + w*4; for(; s < e; s+=8) // ARGB ARGB -> AxYU AxYV { if((s[3]+s[7]) < 0x1fe) { s[1] = (c2y_yb[s[0]] + c2y_yg[s[1]] + c2y_yr[s[2]] + 0x108000) >> 16; s[5] = (c2y_yb[s[4]] + c2y_yg[s[5]] + c2y_yr[s[6]] + 0x108000) >> 16; int scaled_y = (s[1]+s[5]-32) * cy_cy2; s[0] = Clip[(((((s[0]+s[4])<<15) - scaled_y) >> 10) * c2y_cu + 0x800000 + 0x8000) >> 16]; s[4] = Clip[(((((s[2]+s[6])<<15) - scaled_y) >> 10) * c2y_cv + 0x800000 + 0x8000) >> 16]; } else { s[1] = s[5] = 0x10; s[0] = s[4] = 0x80; } } } } else if(m_spd.type == MSP_AYUV) { for(; top < bottom ; top += m_spd.pitch) { BYTE* s = top; BYTE* e = s + w*4; for(; s < e; s+=4) // ARGB -> AYUV { if(s[3] < 0xff) { int y = (c2y_yb[s[0]] + c2y_yg[s[1]] + c2y_yr[s[2]] + 0x108000) >> 16; int scaled_y = (y-32) * cy_cy; s[1] = Clip[((((s[0]<<16) - scaled_y) >> 10) * c2y_cu + 0x800000 + 0x8000) >> 16]; s[0] = Clip[((((s[2]<<16) - scaled_y) >> 10) * c2y_cv + 0x800000 + 0x8000) >> 16]; s[2] = y; } else { s[0] = s[1] = 0x80; s[2] = 0x10; } } } } return S_OK; } STDMETHODIMP CMemSubPic::AlphaBlt(RECT* pSrc, RECT* pDst, SubPicDesc* pTarget) { ASSERT(pTarget); if(!pSrc || !pDst || !pTarget) return E_POINTER; const SubPicDesc& src = m_spd; SubPicDesc dst = *pTarget; // copy, because we might modify it if(src.type != dst.type) return E_INVALIDARG; CRect rs(*pSrc), rd(*pDst); if(dst.h < 0) { dst.h = -dst.h; rd.bottom = dst.h - rd.bottom; rd.top = dst.h - rd.top; } if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) return E_INVALIDARG; int w = rs.Width(), h = rs.Height(); BYTE* s = (BYTE*)src.bits + src.pitch*rs.top + rs.left*4; BYTE* d = (BYTE*)dst.bits + dst.pitch*rd.top + ((rd.left*dst.bpp)>>3); if(rd.top > rd.bottom) { if(dst.type == MSP_RGB32 || dst.type == MSP_RGB24 || dst.type == MSP_RGB16 || dst.type == MSP_RGB15 || dst.type == MSP_YUY2 || dst.type == MSP_AYUV) { d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + (rd.left*dst.bpp>>3); } else if(dst.type == MSP_YV12 || dst.type == MSP_IYUV) { d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + (rd.left*8>>3); } else { return E_NOTIMPL; } dst.pitch = -dst.pitch; } for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch) { if(dst.type == MSP_RGB32 || dst.type == MSP_AYUV) { BYTE* s2 = s; BYTE* s2end = s2 + w*4; DWORD* d2 = (DWORD*)d; for(; s2 < s2end; s2 += 4, d2++) { if(s2[3] < 0xff) { *d2 = (((((*d2&0x00ff00ff)*s2[3])>>8) + (*((DWORD*)s2)&0x00ff00ff))&0x00ff00ff) | (((((*d2&0x0000ff00)*s2[3])>>8) + (*((DWORD*)s2)&0x0000ff00))&0x0000ff00); } } } else if(dst.type == MSP_RGB24) { BYTE* s2 = s; BYTE* s2end = s2 + w*4; BYTE* d2 = d; for(; s2 < s2end; s2 += 4, d2 += 3) { if(s2[3] < 0xff) { d2[0] = ((d2[0]*s2[3])>>8) + s2[0]; d2[1] = ((d2[1]*s2[3])>>8) + s2[1]; d2[2] = ((d2[2]*s2[3])>>8) + s2[2]; } } } else if(dst.type == MSP_RGB16) { BYTE* s2 = s; BYTE* s2end = s2 + w*4; WORD* d2 = (WORD*)d; for(; s2 < s2end; s2 += 4, d2++) { if(s2[3] < 0x1f) { *d2 = (WORD)((((((*d2&0xf81f)*s2[3])>>5) + (*(DWORD*)s2&0xf81f))&0xf81f) | (((((*d2&0x07e0)*s2[3])>>5) + (*(DWORD*)s2&0x07e0))&0x07e0)); /* *d2 = (WORD)((((((*d2&0xf800)*s2[3])>>8) + (*(DWORD*)s2&0xf800))&0xf800) | (((((*d2&0x07e0)*s2[3])>>8) + (*(DWORD*)s2&0x07e0))&0x07e0) | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f)); */ } } } else if(dst.type == MSP_RGB15) { BYTE* s2 = s; BYTE* s2end = s2 + w*4; WORD* d2 = (WORD*)d; for(; s2 < s2end; s2 += 4, d2++) { if(s2[3] < 0x1f) { *d2 = (WORD)((((((*d2&0x7c1f)*s2[3])>>5) + (*(DWORD*)s2&0x7c1f))&0x7c1f) | (((((*d2&0x03e0)*s2[3])>>5) + (*(DWORD*)s2&0x03e0))&0x03e0)); /* *d2 = (WORD)((((((*d2&0x7c00)*s2[3])>>8) + (*(DWORD*)s2&0x7c00))&0x7c00) | (((((*d2&0x03e0)*s2[3])>>8) + (*(DWORD*)s2&0x03e0))&0x03e0) | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f)); */ } } } else if(dst.type == MSP_YUY2) { // BYTE y1, y2, u, v; unsigned int ia, c; BYTE* s2 = s; BYTE* s2end = s2 + w*4; DWORD* d2 = (DWORD*)d; for(; s2 < s2end; s2 += 8, d2++) { ia = (s2[3]+s2[7])>>1; if(ia < 0xff) { /* y1 = (BYTE)(((((*d2&0xff)-0x10)*s2[3])>>8) + s2[1]); // + y1; y2 = (BYTE)((((((*d2>>16)&0xff)-0x10)*s2[7])>>8) + s2[5]); // + y2; u = (BYTE)((((((*d2>>8)&0xff)-0x80)*ia)>>8) + s2[0]); // + u; v = (BYTE)((((((*d2>>24)&0xff)-0x80)*ia)>>8) + s2[4]); // + v; *d2 = (v<<24)|(y2<<16)|(u<<8)|y1; */ static const __int64 _8181 = 0x0080001000800010i64; ia = (ia<<24)|(s2[7]<<16)|(ia<<8)|s2[3]; c = (s2[4]<<24)|(s2[5]<<16)|(s2[0]<<8)|s2[1]; // (v<<24)|(y2<<16)|(u<<8)|y1; __asm { mov esi, s2 mov edi, d2 pxor mm0, mm0 movq mm1, _8181 movd mm2, c punpcklbw mm2, mm0 movd mm3, [edi] punpcklbw mm3, mm0 movd mm4, ia punpcklbw mm4, mm0 psrlw mm4, 1 psubsw mm3, mm1 pmullw mm3, mm4 psraw mm3, 7 paddsw mm3, mm2 packuswb mm3, mm3 movd [edi], mm3 }; } } } else if(dst.type == MSP_YV12 || dst.type == MSP_IYUV) { BYTE* s2 = s; BYTE* s2end = s2 + w*4; BYTE* d2 = d; for(; s2 < s2end; s2 += 4, d2++) { if(s2[3] < 0xff) { d2[0] = (((d2[0]-0x10)*s2[3])>>8) + s2[1]; } } } else { return E_NOTIMPL; } } dst.pitch = abs(dst.pitch); if(dst.type == MSP_YV12 || dst.type == MSP_IYUV) { int w2 = w/2, h2 = h/2; if(!dst.pitchUV) { dst.pitchUV = dst.pitch/2; } int sizep4 = dst.pitchUV*dst.h/2; BYTE* ss[2]; ss[0] = (BYTE*)src.bits + src.pitch*rs.top + rs.left*4; ss[1] = ss[0] + 4; if(!dst.bitsU || !dst.bitsV) { dst.bitsU = (BYTE*)dst.bits + dst.pitch*dst.h; dst.bitsV = dst.bitsU + dst.pitchUV*dst.h/2; if(dst.type == MSP_YV12) { BYTE* p = dst.bitsU; dst.bitsU = dst.bitsV; dst.bitsV = p; } } BYTE* dd[2]; dd[0] = dst.bitsU + dst.pitchUV*rd.top/2 + rd.left/2; dd[1] = dst.bitsV + dst.pitchUV*rd.top/2 + rd.left/2; if(rd.top > rd.bottom) { dd[0] = dst.bitsU + dst.pitchUV*(rd.top/2-1) + rd.left/2; dd[1] = dst.bitsV + dst.pitchUV*(rd.top/2-1) + rd.left/2; dst.pitchUV = -dst.pitchUV; } for(int i = 0; i < 2; i++) { s = ss[i]; d = dd[i]; BYTE* is = ss[1-i]; for(int j = 0; j < h2; j++, s += src.pitch*2, d += dst.pitchUV, is += src.pitch*2) { BYTE* s2 = s; BYTE* s2end = s2 + w*4; BYTE* d2 = d; BYTE* is2 = is; for(; s2 < s2end; s2 += 8, d2++, is2 += 8) { unsigned int ia = (s2[3]+s2[3+src.pitch]+is2[3]+is2[3+src.pitch])>>2; if(ia < 0xff) { *d2 = (((*d2-0x80)*ia)>>8) + ((s2[0]+s2[src.pitch])>>1); } } } } } __asm emms; return S_OK; } // // CMemSubPicAllocator // CMemSubPicAllocator::CMemSubPicAllocator(int type, SIZE maxsize) : ISubPicAllocatorImpl(maxsize, false, false) , m_type(type) , m_maxsize(maxsize) { } // ISubPicAllocatorImpl bool CMemSubPicAllocator::Alloc(bool fStatic, ISubPic** ppSubPic) { if(!ppSubPic) return(false); SubPicDesc spd; spd.w = m_maxsize.cx; spd.h = m_maxsize.cy; spd.bpp = 32; spd.pitch = (spd.w*spd.bpp)>>3; spd.type = m_type; if(!(spd.bits = new BYTE[spd.pitch*spd.h])) return(false); if(!(*ppSubPic = new CMemSubPic(spd))) return(false); (*ppSubPic)->AddRef(); return(true); }