wined3d: Implement half float vertex buffer conversion.

This commit is contained in:
Stefan Dösinger 2007-12-19 17:18:39 +01:00 committed by Alexandre Julliard
parent 6598589ebd
commit b5f925cae0
5 changed files with 266 additions and 68 deletions

View File

@ -277,6 +277,8 @@ static void CreateVBO(IWineD3DVertexBufferImpl *object) {
WARN("glBufferDataARB failed with error %s (%#x)\n", debug_glerror(error), error);
goto error;
}
object->vbo_size = object->resource.size;
object->vbo_usage = glUsage;
LEAVE_GL();

View File

@ -201,10 +201,6 @@ void primitiveDeclarationConvertToStridedData(
if((UINT_PTR)data < -This->stateBlock->loadBaseVertexIndex * stride) {
FIXME("System memory vertex data load offset is negative!\n");
}
} else if(vertexDeclaration->half_float_conv_needed) {
WARN("Half float vertex data used, but GL_NV_half_float is not supported. Not using vbos\n");
streamVBO = 0;
data = ((IWineD3DVertexBufferImpl *) This->stateBlock->streamSource[element->Stream])->resource.allocatedMemory;
}
if(fixup) {
@ -574,24 +570,6 @@ static void drawStridedSlow(IWineD3DDevice *iface, WineDirect3DVertexStridedData
checkGLcall("glEnd and previous calls");
}
/* See GL_NV_half_float for reference */
static inline float float_16_to_32(const unsigned short *in) {
const unsigned short s = ((*in) & 0x8000);
const unsigned short e = ((*in) & 0x7C00) >> 10;
const unsigned short m = (*in) & 0x3FF;
const float sgn = (s ? -1.0 : 1.0);
if(e == 0) {
if(m == 0) return sgn * 0.0; /* +0.0 or -0.0 */
else return sgn * pow(2, -14.0) * ( (float) m / 1024.0);
} else if(e < 31) {
return sgn * pow(2, (float) e-15.0) * (1.0 + ((float) m / 1024.0));
} else {
if(m == 0) return sgn / 0.0; /* +INF / -INF */
else return 0.0 / 0.0; /* NAN */
}
}
static inline void send_attribute(IWineD3DDeviceImpl *This, const DWORD type, const UINT index, const void *ptr) {
switch(type) {
case WINED3DDECLTYPE_FLOAT1:
@ -1103,19 +1081,21 @@ void drawPrimitive(IWineD3DDevice *iface,
if (This->useDrawStridedSlow || emulation) {
/* Immediate mode drawing */
drawStridedSlow(iface, strided, calculatedNumberOfindices,
glPrimType, idxData, idxSize, minIndex, StartIdx, StartVertexIndex);
if(use_vs(This)) {
FIXME("Using immediate mode with vertex shaders for half float emulation\n");
drawStridedSlowVs(iface, strided, calculatedNumberOfindices, glPrimType,
idxData, idxSize, minIndex, StartIdx, StartVertexIndex);
} else {
drawStridedSlow(iface, strided, calculatedNumberOfindices,
glPrimType, idxData, idxSize, minIndex, StartIdx, StartVertexIndex);
}
} else if(This->instancedDraw) {
/* Instancing emulation with mixing immediate mode and arrays */
drawStridedInstanced(iface, &This->strided_streams, calculatedNumberOfindices, glPrimType,
idxData, idxSize, minIndex, StartIdx, StartVertexIndex);
} else if(!((IWineD3DVertexDeclarationImpl *) This->stateBlock->vertexDecl)->half_float_conv_needed) {
} else {
drawStridedFast(iface, calculatedNumberOfindices, glPrimType,
idxData, idxSize, minIndex, StartIdx, StartVertexIndex);
} else {
FIXME("Using immediate mode with vertex shaders for half float emulation\n");
drawStridedSlowVs(iface, strided, calculatedNumberOfindices, glPrimType,
idxData, idxSize, minIndex, StartIdx, StartVertexIndex);
}
}

View File

@ -2880,15 +2880,11 @@ static inline void loadNumberedArrays(IWineD3DStateBlockImpl *stateblock, WineDi
GLint curVBO = GL_SUPPORT(ARB_VERTEX_BUFFER_OBJECT) ? -1 : 0;
int i;
UINT *offset = stateblock->streamOffset;
IWineD3DVertexBufferImpl *vb;
/* Default to no instancing */
stateblock->wineD3DDevice->instancedDraw = FALSE;
if(((IWineD3DVertexDeclarationImpl *)stateblock->vertexDecl)->half_float_conv_needed) {
/* This will be handled using drawStridedSlow */
return;
}
for (i = 0; i < MAX_ATTRIBS; i++) {
if (!strided->u.input[i].lpData && !strided->u.input[i].VBO)
@ -2909,12 +2905,37 @@ static inline void loadNumberedArrays(IWineD3DStateBlockImpl *stateblock, WineDi
checkGLcall("glBindBufferARB");
curVBO = strided->u.input[i].VBO;
}
GL_EXTCALL(glVertexAttribPointerARB(i,
WINED3D_ATR_SIZE(strided->u.input[i].dwType),
WINED3D_ATR_GLTYPE(strided->u.input[i].dwType),
WINED3D_ATR_NORMALIZED(strided->u.input[i].dwType),
strided->u.input[i].dwStride,
strided->u.input[i].lpData + stateblock->loadBaseVertexIndex * strided->u.input[i].dwStride + offset[strided->u.input[i].streamNo]) );
vb = (IWineD3DVertexBufferImpl *) stateblock->streamSource[strided->u.input[i].streamNo];
/* Use the VBO to find out if a vertex buffer exists, not the vb pointer. vb can point to a
* user pointer data blob. In that case curVBO will be 0. If there is a vertex buffer but no
* vbo we won't be load converted attributes anyway
*/
if(curVBO && vb->conv_shift) {
TRACE("Loading attribute from shifted buffer\n");
TRACE("Attrib %d has original stride %d, new stride %d\n", i, strided->u.input[i].dwStride, vb->conv_stride);
TRACE("Original offset %p, additional offset 0x%08x\n",strided->u.input[i].lpData, vb->conv_shift[(DWORD_PTR) strided->u.input[i].lpData]);
TRACE("Opengl type %x\n", WINED3D_ATR_GLTYPE(strided->u.input[i].dwType));
GL_EXTCALL(glVertexAttribPointerARB(i,
WINED3D_ATR_SIZE(strided->u.input[i].dwType),
WINED3D_ATR_GLTYPE(strided->u.input[i].dwType),
WINED3D_ATR_NORMALIZED(strided->u.input[i].dwType),
vb->conv_stride,
strided->u.input[i].lpData + vb->conv_shift[(DWORD_PTR) strided->u.input[i].lpData] +
stateblock->loadBaseVertexIndex * strided->u.input[i].dwStride +
offset[strided->u.input[i].streamNo]));
} else {
GL_EXTCALL(glVertexAttribPointerARB(i,
WINED3D_ATR_SIZE(strided->u.input[i].dwType),
WINED3D_ATR_GLTYPE(strided->u.input[i].dwType),
WINED3D_ATR_NORMALIZED(strided->u.input[i].dwType),
strided->u.input[i].dwStride,
strided->u.input[i].lpData +
stateblock->loadBaseVertexIndex * strided->u.input[i].dwStride +
offset[strided->u.input[i].streamNo]) );
}
GL_EXTCALL(glEnableVertexAttribArrayARB(i));
} else {
/* Stride = 0 means always the same values. glVertexAttribPointerARB doesn't do that. Instead disable the pointer and
@ -2922,7 +2943,7 @@ static inline void loadNumberedArrays(IWineD3DStateBlockImpl *stateblock, WineDi
*/
BYTE *ptr = strided->u.input[i].lpData + offset[strided->u.input[i].streamNo];
if(strided->u.input[i].VBO) {
IWineD3DVertexBufferImpl *vb = (IWineD3DVertexBufferImpl *) stateblock->streamSource[strided->u.input[i].streamNo];
vb = (IWineD3DVertexBufferImpl *) stateblock->streamSource[strided->u.input[i].streamNo];
ptr += (long) vb->resource.allocatedMemory;
}
GL_EXTCALL(glDisableVertexAttribArrayARB(i));
@ -3360,10 +3381,16 @@ static inline void handleStreams(IWineD3DStateBlockImpl *stateblock, BOOL useVer
}
if(useVertexShaderFunction) {
TRACE("Loading numbered arrays\n");
loadNumberedArrays(stateblock, dataLocations);
device->useDrawStridedSlow = FALSE;
context->numberedArraysLoaded = TRUE;
if(((IWineD3DVertexDeclarationImpl *) stateblock->vertexDecl)->half_float_conv_needed && !fixup) {
TRACE("Using drawStridedSlow with vertex shaders for FLOAT16 conversion\n");
device->useDrawStridedSlow = TRUE;
context->numberedArraysLoaded = FALSE;
} else {
TRACE("Loading numbered arrays\n");
loadNumberedArrays(stateblock, dataLocations);
device->useDrawStridedSlow = FALSE;
context->numberedArraysLoaded = TRUE;
}
} else if (fixup ||
(dataLocations->u.s.pSize.lpData == NULL &&
dataLocations->u.s.diffuse.lpData == NULL &&

View File

@ -142,12 +142,53 @@ static inline void fixup_transformed_pos(float *p) {
p[3] = w;
}
DWORD *find_conversion_shift(IWineD3DVertexBufferImpl *This, WineDirect3DVertexStridedData *strided, DWORD stride) {
DWORD *ret, i, shift, j, type;
DWORD orig_type_size;
if(!stride) {
TRACE("No shift\n");
return NULL;
}
This->conv_stride = stride;
ret = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(DWORD) * stride);
for(i = 0; i < MAX_ATTRIBS; i++) {
type = strided->u.input[i].dwType;
if(type == WINED3DDECLTYPE_FLOAT16_2) {
shift = 4;
} else if(type == WINED3DDECLTYPE_FLOAT16_4) {
shift = 8;
} else {
shift = 0;
}
This->conv_stride += shift;
if(shift) {
orig_type_size = WINED3D_ATR_TYPESIZE(type) * WINED3D_ATR_SIZE(type);
for(j = (DWORD_PTR) strided->u.input[i].lpData + orig_type_size; j < stride; j++) {
ret[j] += shift;
}
}
}
if(TRACE_ON(d3d)) {
TRACE("Dumping conversion shift:\n");
for(i = 0; i < stride; i++) {
TRACE("[%d]", ret[i]);
}
TRACE("\n");
}
return ret;
}
inline BOOL WINAPI IWineD3DVertexBufferImpl_FindDecl(IWineD3DVertexBufferImpl *This)
{
WineDirect3DVertexStridedData strided;
IWineD3DDeviceImpl *device = This->resource.wineD3DDevice;
BOOL ret = FALSE;
DWORD type_old, type_new;
DWORD type_old, type_new, stride;
int i;
/* In d3d7 the vertex buffer declaration NEVER changes because it is stored in the d3d7 vertex buffer.
* Once we have our declaration there is no need to look it up again.
@ -180,8 +221,6 @@ inline BOOL WINAPI IWineD3DVertexBufferImpl_FindDecl(IWineD3DVertexBufferImpl *T
*/
if (use_vs(device)) {
/* Assume no conversion. TODO: Deal with FLOAT16 conversion*/
memset(&strided, 0, sizeof(strided));
if(!This->last_was_vshader && This->last_was_converted) {
/* Reload if we're switching from converted fixed function to vertex shaders.
* This isn't strictly needed, e.g. a FLOAT16 attribute could stay at the same
@ -192,7 +231,69 @@ inline BOOL WINAPI IWineD3DVertexBufferImpl_FindDecl(IWineD3DVertexBufferImpl *T
ret = TRUE;
}
This->last_was_vshader = TRUE;
/* The vertex declaration is so nice to carry a flag for this. It takes the gl info into account,
* but check that separately to avoid the memcmp over the structure
*/
if(GL_SUPPORT(NV_HALF_FLOAT)) {
memset(&strided, 0, sizeof(strided));
ret = FALSE;
} else if(((IWineD3DVertexDeclarationImpl *)device->stateBlock->vertexDecl)->half_float_conv_needed) {
memcpy(&strided, &device->strided_streams, sizeof(strided));
stride = 0;
for(i = 0; i < MAX_ATTRIBS; i++) {
if(strided.u.input[i].VBO != This->vbo) {
/* Ignore attributes from a different vbo */
memset(&strided.u.input[i], 0, sizeof(strided.u.input[i]));
continue;
};
type_old = This->strided.u.input[i].dwType;
type_new = strided.u.input[i].dwType;
if(type_old != type_new) {
if(type_old == WINED3DDECLTYPE_FLOAT16_2 || type_new == WINED3DDECLTYPE_FLOAT16_2 ||
type_old == WINED3DDECLTYPE_FLOAT16_4 || type_new == WINED3DDECLTYPE_FLOAT16_4) {
TRACE("Reloading because attribute %i was %s before and is %s now\n", i,
debug_d3ddecltype(type_old), debug_d3ddecltype(type_new));
stride = strided.u.input[i].dwStride;
ret = TRUE;
}
} else if(type_new == WINED3DDECLTYPE_FLOAT16_2 || type_new == WINED3DDECLTYPE_FLOAT16_4) {
if(This->strided.u.input[i].lpData != strided.u.input[i].lpData) {
TRACE("Reconverting buffer because attribute %d has type %s and moved from offset %p to %p\n",
i, debug_d3ddecltype(type_new), This->strided.u.s.position.lpData, strided.u.s.position.lpData);
memcpy(&This->strided, &strided, sizeof(strided));
ret = TRUE;
stride = strided.u.input[i].dwStride;
}
}
if(!(type_new == WINED3DDECLTYPE_FLOAT16_2 || type_new == WINED3DDECLTYPE_FLOAT16_4)) {
/* Nuke unconverted attributes */
memset(&strided.u.input[i], 0, sizeof(strided.u.input[i]));
}
}
if(ret) {
memcpy(&This->strided, &strided, sizeof(strided));
HeapFree(GetProcessHeap(), 0, This->conv_shift);
This->conv_shift = find_conversion_shift(This, &This->strided, stride);
}
} else {
/* No conversion*/
memset(&strided, 0, sizeof(strided));
ret = (memcmp(&strided, &This->strided, sizeof(strided)) != 0);
if(ret) {
HeapFree(GetProcessHeap(), 0, This->conv_shift);
This->conv_shift = NULL;
}
}
} else {
/* This will need modifications of the loading code as well - not handled right now */
if(((IWineD3DVertexDeclarationImpl *)device->stateBlock->vertexDecl)->half_float_conv_needed) {
FIXME("Implement half float fixup with fixed function vertex processing\n");
}
/* we need a copy because we modify some params */
memcpy(&strided, &device->strided_streams, sizeof(strided));
@ -337,9 +438,9 @@ static void WINAPI IWineD3DVertexBufferImpl_PreLoad(IWineD3DVertexBuffer *if
IWineD3DVertexBufferImpl *This = (IWineD3DVertexBufferImpl *) iface;
IWineD3DDeviceImpl *device = This->resource.wineD3DDevice;
BYTE *data;
UINT start = 0, end = 0, stride = 0;
UINT start = 0, end = 0, stride = 0, vertices;
BOOL declChanged = FALSE;
int i;
int i, j;
TRACE("(%p)->()\n", This);
if(This->Flags & VBFLAG_LOAD) {
@ -391,6 +492,7 @@ static void WINAPI IWineD3DVertexBufferImpl_PreLoad(IWineD3DVertexBuffer *if
checkGLcall("glDeleteBuffersARB");
LEAVE_GL();
This->vbo = 0;
HeapFree(GetProcessHeap(), 0, This->conv_shift);
/* The stream source state handler might have read the memory of the vertex buffer already
* and got the memory in the vbo which is not valid any longer. Dirtify the stream source
@ -451,16 +553,80 @@ static void WINAPI IWineD3DVertexBufferImpl_PreLoad(IWineD3DVertexBuffer *if
/* OK, we have the original data from the app, the description of the buffer and the dirty area.
* so convert the stuff
*/
data = HeapAlloc(GetProcessHeap(), 0, end-start);
if(!data) {
ERR("Out of memory\n");
return;
}
memcpy(data, This->resource.allocatedMemory + start, end - start);
if(This->last_was_vshader) {
/* TODO: Implement conversion for FLOAT16_2 and FLOAT16_4 */
TRACE("vertex-shadered conversion\n");
/* TODO: Improve that */
for(i = 0; i < MAX_ATTRIBS; i++) {
if(This->strided.u.input[i].dwStride) {
stride = This->strided.u.input[i].dwStride;
}
}
TRACE("Found input stride %d, output stride %d\n", stride, This->conv_stride);
/* For now reconvert the entire buffer */
start = 0;
end = This->resource.size;
vertices = This->resource.size / stride;
TRACE("%d vertices in buffer\n", vertices);
if(This->vbo_size != vertices * This->conv_stride) {
TRACE("Old size %d, creating new size %d\n", This->vbo_size, vertices * This->conv_stride);
ENTER_GL();
GL_EXTCALL(glBindBufferARB(GL_ARRAY_BUFFER_ARB, This->vbo));
checkGLcall("glBindBufferARB");
GL_EXTCALL(glBufferDataARB(GL_ARRAY_BUFFER_ARB, vertices * This->conv_stride, NULL, This->vbo_usage));
This->vbo_size = vertices * This->conv_stride;
checkGLcall("glBufferDataARB");
LEAVE_GL();
}
data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, vertices * This->conv_stride);
if(!data) {
ERR("Out of memory\n");
return;
}
/* Now for each vertex in the buffer */
for(i = 0; i < vertices; i++) {
/* Copy the vertex over, taking the shifts into account */
for(j = 0; j < stride; j++) {
data[This->conv_stride * i + j + This->conv_shift[j]] = This->resource.allocatedMemory[i * stride + j];
}
/* And convert FLOAT16s */
for(j = 0; j < MAX_ATTRIBS; j++) {
DWORD_PTR offset = (DWORD_PTR) This->strided.u.input[j].lpData;
float *dest = (float *) (&data[This->conv_stride * i + offset + This->conv_shift[offset]]);
WORD *in = (WORD *) (&This->resource.allocatedMemory[i * stride + offset]);
switch(This->strided.u.input[j].dwType) {
case WINED3DDECLTYPE_FLOAT16_4:
dest[3] = float_16_to_32(in + 3);
dest[2] = float_16_to_32(in + 2);
/* drop through */
case WINED3DDECLTYPE_FLOAT16_2:
dest[1] = float_16_to_32(in + 1);
dest[0] = float_16_to_32(in + 0);
break;
default:
break;
}
}
}
if(!device->isInDraw) {
ActivateContext(device, device->lastActiveRenderTarget, CTXUSAGE_RESOURCELOAD);
}
ENTER_GL();
GL_EXTCALL(glBindBufferARB(GL_ARRAY_BUFFER_ARB, This->vbo));
checkGLcall("glBindBufferARB");
GL_EXTCALL(glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, This->vbo_size, data));
checkGLcall("glBufferSubDataARB");
LEAVE_GL();
} else {
data = HeapAlloc(GetProcessHeap(), 0, end-start);
if(!data) {
ERR("Out of memory\n");
return;
}
memcpy(data, This->resource.allocatedMemory + start, end - start);
if (This->strided.u.s.position.dwStride) stride = This->strided.u.s.position.dwStride;
else if(This->strided.u.s.specular.dwStride) stride = This->strided.u.s.specular.dwStride;
else if(This->strided.u.s.diffuse.dwStride) stride = This->strided.u.s.diffuse.dwStride;
@ -498,18 +664,17 @@ static void WINAPI IWineD3DVertexBufferImpl_PreLoad(IWineD3DVertexBuffer *if
CONVERT_D3DCOLOR_ATTRIB(texCoords[7]);
#undef CONVERT_D3DCOLOR_ATTRIB
}
if(!device->isInDraw) {
ActivateContext(device, device->lastActiveRenderTarget, CTXUSAGE_RESOURCELOAD);
}
ENTER_GL();
GL_EXTCALL(glBindBufferARB(GL_ARRAY_BUFFER_ARB, This->vbo));
checkGLcall("glBindBufferARB");
GL_EXTCALL(glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, start, end - start, data));
checkGLcall("glBufferSubDataARB");
LEAVE_GL();
}
if(!device->isInDraw) {
ActivateContext(device, device->lastActiveRenderTarget, CTXUSAGE_RESOURCELOAD);
}
ENTER_GL();
GL_EXTCALL(glBindBufferARB(GL_ARRAY_BUFFER_ARB, This->vbo));
checkGLcall("glBindBufferARB");
GL_EXTCALL(glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, start, end - start, data));
checkGLcall("glBufferSubDataARB");
LEAVE_GL();
HeapFree(GetProcessHeap(), 0, data);
}

View File

@ -121,6 +121,24 @@ void init_type_lookup(WineD3D_GL_Info *gl_info);
#define WINED3D_ATR_NORMALIZED(type) GLINFO_LOCATION.glTypeLookup[type].normalized
#define WINED3D_ATR_TYPESIZE(type) GLINFO_LOCATION.glTypeLookup[type].typesize
/* See GL_NV_half_float for reference */
static inline float float_16_to_32(const unsigned short *in) {
const unsigned short s = ((*in) & 0x8000);
const unsigned short e = ((*in) & 0x7C00) >> 10;
const unsigned short m = (*in) & 0x3FF;
const float sgn = (s ? -1.0 : 1.0);
if(e == 0) {
if(m == 0) return sgn * 0.0; /* +0.0 or -0.0 */
else return sgn * pow(2, -14.0) * ( (float) m / 1024.0);
} else if(e < 31) {
return sgn * pow(2, (float) e-15.0) * (1.0 + ((float) m / 1024.0));
} else {
if(m == 0) return sgn / 0.0; /* +INF / -INF */
else return 0.0 / 0.0; /* NAN */
}
}
/**
* Settings
*/
@ -826,6 +844,8 @@ typedef struct IWineD3DVertexBufferImpl
GLuint vbo;
BYTE Flags;
LONG bindCount;
LONG vbo_size;
GLenum vbo_usage;
UINT dirtystart, dirtyend;
LONG lockcount;
@ -835,6 +855,10 @@ typedef struct IWineD3DVertexBufferImpl
WineDirect3DVertexStridedData strided;
BOOL last_was_vshader;
BOOL last_was_converted;
/* Extra load offsets, for FLOAT16 conversion */
DWORD *conv_shift;
DWORD conv_stride;
} IWineD3DVertexBufferImpl;
extern const IWineD3DVertexBufferVtbl IWineD3DVertexBuffer_Vtbl;