Back to classic pixmix/pixmix2 architecture. Probably made some parts clearer. More and more correct documentation of the Draw code. Probably more correct (at least not less correct, it seems) alpha calculation.

Originally committed to SVN as r1543.
This commit is contained in:
Niels Martin Hansen 2007-08-29 19:57:16 +00:00
parent 35f2b7caa2
commit 268b6c45af
1 changed files with 61 additions and 56 deletions

View File

@ -773,8 +773,21 @@ bool Rasterizer::Rasterize(int xsub, int ysub, bool fBlur)
static __forceinline void pixmix(DWORD *dst, DWORD color, DWORD alpha)
{
int a = (((alpha)*(color>>24))>>12)&0xff;
int a = (((alpha)*(color>>24))>>6)&0xff;
// Make sure both a and ia are in range 1..256 for the >>8 operations below to be correct
int ia = 256-a;
a+=1;
*dst = ((((*dst&0x00ff00ff)*ia + (color&0x00ff00ff)*a)&0xff00ff00)>>8)
| ((((*dst&0x0000ff00)*ia + (color&0x0000ff00)*a)&0x00ff0000)>>8)
| ((((*dst>>8)&0x00ff0000)*ia)&0xff000000);
}
static __forceinline void pixmix2(DWORD *dst, DWORD color, DWORD shapealpha, DWORD clipalpha)
{
int a = (((shapealpha)*(clipalpha)*(color>>24))>>12)&0xff;
int ia = 256-a;
a+=1;
*dst = ((((*dst&0x00ff00ff)*ia + (color&0x00ff00ff)*a)&0xff00ff00)>>8)
| ((((*dst&0x0000ff00)*ia + (color&0x0000ff00)*a)&0x00ff0000)>>8)
@ -786,11 +799,30 @@ static __forceinline void pixmix(DWORD *dst, DWORD color, DWORD alpha)
static __forceinline void pixmix_sse2(DWORD* dst, DWORD color, DWORD alpha)
{
alpha = ((alpha * (color>>24)) >> 12) & 0xff;
alpha = (((alpha) * (color>>24)) >> 6) & 0xff;
color &= 0xffffff;
__m128i zero = _mm_setzero_si128();
__m128i a = _mm_set1_epi32((alpha << 16) | (0x100 - alpha));
__m128i a = _mm_set1_epi32(((alpha+1) << 16) | (0x100 - alpha));
__m128i d = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*dst), zero);
__m128i s = _mm_unpacklo_epi8(_mm_cvtsi32_si128(color), zero);
__m128i r = _mm_unpacklo_epi16(d, s);
r = _mm_madd_epi16(r, a);
r = _mm_srli_epi32(r, 8);
r = _mm_packs_epi32(r, r);
r = _mm_packus_epi16(r, r);
*dst = (DWORD)_mm_cvtsi128_si32(r);
}
static __forceinline void pixmix2_sse2(DWORD* dst, DWORD color, DWORD shapealpha, DWORD clipalpha)
{
int alpha = (((shapealpha)*(clipalpha)*(color>>24))>>12)&0xff;
color &= 0xffffff;
__m128i zero = _mm_setzero_si128();
__m128i a = _mm_set1_epi32(((alpha+1) << 16) | (0x100 - alpha));
__m128i d = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*dst), zero);
__m128i s = _mm_unpacklo_epi8(_mm_cvtsi32_si128(color), zero);
__m128i r = _mm_unpacklo_epi16(d, s);
@ -813,7 +845,8 @@ static const __int64 _00ff00ff00ff00ff = 0x00ff00ff00ff00ffi64;
// clipRect is a rectangular clip region to render inside.
// pAlphaMask is an alpha clipping mask.
// xsub and ysub ???
// switchpts seems to be an array of interlaced colour switching coordinates/colours to switch to.
// switchpts seems to be an array of fill colours interlaced with coordinates.
// switchpts[i*2] contains a colour and switchpts[i*2+1] contains the coordinate to use that colour from
// fBody tells whether to render the body of the subs.
// fBorder tells whether to render the border of the subs.
CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int xsub, int ysub, const long* switchpts, bool fBody, bool fBorder)
@ -853,13 +886,16 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
// The alpha bitmap of the subtitles?
const byte* src = mpOverlayBuffer + 2*(mOverlayWidth * yo + xo);
// s points to what the "body" to use is
// If we're rendering body fill and border, src+1 points to the array of
// widened regions which contain both border and fill in one.
const byte* s = fBorder ? (src+1) : src;
// The complex "vector clip mask" I think.
const byte* am = pAlphaMask + spd.w * y + x;
// How would this differ from src?
unsigned long* dst = (unsigned long *)((char *)spd.bits + spd.pitch * y) + x;
// ??? What is switchpts ?
// Grab the first colour
unsigned long color = switchpts[0];
// CPUID from VDub
@ -871,23 +907,24 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
// Basic case of no complex clipping mask
if(!pAlphaMask)
{
// Again, what is switchpts?
// If the first colour switching coordinate is at "infinite" we're
// never switching and can use some simpler code.
// ??? Is this optimisation really worth the extra readability issues it adds?
if(switchpts[1] == 0xffffffff)
{
// Are we rendering the fill or a border/shadow? I think...
// fBody is true if we're rendering a fill or a shadow.
if(fBody)
{
// Run over every pixel, overlaying the subtitles with the fill colour
if(fSSE2)
for(int wt=0; wt<w; ++wt)
// Why s[wt*2] and not s[wt] ?
// The <<6 is due to pixmix expecting the alpha parameter to be
// the multiplication of two 6-bit unsigned numbers but we
// only have one here. (No alpha mask.)
pixmix_sse2(&dst[wt], color, s[wt*2]<<6);
pixmix_sse2(&dst[wt], color, s[wt*2]);
else
for(int wt=0; wt<w; ++wt)
pixmix(&dst[wt], color, s[wt*2]<<6);
pixmix(&dst[wt], color, s[wt*2]);
}
// Not body, ie. something else (border, shadow, I guess)
else
@ -902,10 +939,10 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
// created by CreateWidenedRegion, and thus contains
// both the fill and the border, so subtracting the fill
// from that is always safe.
pixmix_sse2(&dst[wt], color, (src[wt*2+1] - src[wt*2])<<6);
pixmix_sse2(&dst[wt], color, src[wt*2+1] - src[wt*2]);
else
for(int wt=0; wt<w; ++wt)
pixmix(&dst[wt], color, (src[wt*2+1] - src[wt*2])<<6);
pixmix(&dst[wt], color, src[wt*2+1] - src[wt*2]);
}
}
// not (switchpts[1] == 0xffffffff)
@ -923,13 +960,13 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
// So if we have passed the switchpoint (?) switch to another colour
// (So switchpts stores both colours *and* coordinates?)
if(wt+xo >= sw[1]) {while(wt+xo >= sw[1]) sw += 2; color = sw[-2];}
pixmix_sse2(&dst[wt], color, s[wt*2]<<6);
pixmix_sse2(&dst[wt], color, s[wt*2]);
}
else
for(int wt=0; wt<w; ++wt)
{
if(wt+xo >= sw[1]) {while(wt+xo >= sw[1]) sw += 2; color = sw[-2];}
pixmix(&dst[wt], color, s[wt*2]<<6);
pixmix(&dst[wt], color, s[wt*2]);
}
}
// Not body
@ -939,13 +976,13 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
for(int wt=0; wt<w; ++wt)
{
if(wt+xo >= sw[1]) {while(wt+xo >= sw[1]) sw += 2; color = sw[-2];}
pixmix_sse2(&dst[wt], color, (src[wt*2+1] - src[wt*2])<<6);
pixmix_sse2(&dst[wt], color, src[wt*2+1] - src[wt*2]);
}
else
for(int wt=0; wt<w; ++wt)
{
if(wt+xo >= sw[1]) {while(wt+xo >= sw[1]) sw += 2; color = sw[-2];}
pixmix(&dst[wt], color, (src[wt*2+1] - src[wt*2])<<6);
pixmix(&dst[wt], color, src[wt*2+1] - src[wt*2]);
}
}
}
@ -957,12 +994,6 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
{
if(fBody)
{
/*const byte* s = fBorder?(src+1):src;
for(int wt=0; wt<w; ++wt)
{
pixmix2(s[wt*2]);
}*/
if(fSSE2)
for(int wt=0; wt<w; ++wt)
// Both s and am contain 6-bit bitmaps of two different
@ -970,23 +1001,19 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
// clipping mask.
// Multiplying them together yields a 12-bit number.
// I think some imprecision is introduced here??
pixmix_sse2(&dst[wt], color, s[wt*2] * am[wt]);
pixmix2_sse2(&dst[wt], color, s[wt*2], am[wt]);
else
for(int wt=0; wt<w; ++wt)
pixmix(&dst[wt], color, s[wt*2] * am[wt]);
pixmix2(&dst[wt], color, s[wt*2], am[wt]);
}
else
{
/*for(int wt=0; wt<w; ++wt)
{
pixmix2(src[wt*2+1]-src[wt*2]);
}*/
if(fSSE2)
for(int wt=0; wt<w; ++wt)
pixmix_sse2(&dst[wt], color, (src[wt*2+1] - src[wt*2]) * am[wt]);
pixmix2_sse2(&dst[wt], color, src[wt*2+1] - src[wt*2], am[wt]);
else
for(int wt=0; wt<w; ++wt)
pixmix(&dst[wt], color, (src[wt*2+1] - src[wt*2]) * am[wt]);
pixmix2(&dst[wt], color, src[wt*2+1] - src[wt*2], am[wt]);
}
}
else
@ -995,18 +1022,6 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
if(fBody)
{
/*const byte* s = fBorder?(src+1):src;
for(int wt=0; wt<w; ++wt)
{
if(wt+xo >= sw[1])
{
while(wt+xo >= sw[1]) sw += 2;
color = sw[-2];
}
pixmix2(s[wt*2]);
}*/
if(fSSE2)
for(int wt=0; wt<w; ++wt)
{
@ -1014,7 +1029,7 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
while(wt+xo >= sw[1])
sw += 2; color = sw[-2];
}
pixmix_sse2(&dst[wt], color, s[wt*2] * am[wt]);
pixmix2_sse2(&dst[wt], color, s[wt*2], am[wt]);
}
else
for(int wt=0; wt<w; ++wt)
@ -1023,21 +1038,11 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
while(wt+xo >= sw[1])
sw += 2; color = sw[-2];
}
pixmix(&dst[wt], color, s[wt*2] * am[wt]);
pixmix2(&dst[wt], color, s[wt*2], am[wt]);
}
}
else
{
/*for(int wt=0; wt<w; ++wt)
{
if(wt+xo >= sw[1])
{
while(wt+xo >= sw[1]) sw += 2;
color = sw[-2];
}
pixmix2(src[wt*2+1]-src[wt*2]);
}*/
if(fSSE2)
for(int wt=0; wt<w; ++wt)
{
@ -1045,7 +1050,7 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
while(wt+xo >= sw[1])
sw += 2; color = sw[-2];
}
pixmix_sse2(&dst[wt], color, (src[wt*2+1] - src[wt*2]) * am[wt]);
pixmix2_sse2(&dst[wt], color, src[wt*2+1] - src[wt*2], am[wt]);
}
else
for(int wt=0; wt<w; ++wt)
@ -1054,7 +1059,7 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
while(wt+xo >= sw[1])
sw += 2; color = sw[-2];
}
pixmix(&dst[wt], color, (src[wt*2+1] - src[wt*2]) * am[wt]);
pixmix2(&dst[wt], color, src[wt*2+1] - src[wt*2], am[wt]);
}
}
}