diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index 5cf9cef9304..602e2c341ee 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -277,6 +277,8 @@ static void CreateVBO(IWineD3DVertexBufferImpl *object) { WARN("glBufferDataARB failed with error %s (%#x)\n", debug_glerror(error), error); goto error; } + object->vbo_size = object->resource.size; + object->vbo_usage = glUsage; LEAVE_GL(); diff --git a/dlls/wined3d/drawprim.c b/dlls/wined3d/drawprim.c index fdba368cca6..c3c642cab5c 100644 --- a/dlls/wined3d/drawprim.c +++ b/dlls/wined3d/drawprim.c @@ -201,10 +201,6 @@ void primitiveDeclarationConvertToStridedData( if((UINT_PTR)data < -This->stateBlock->loadBaseVertexIndex * stride) { FIXME("System memory vertex data load offset is negative!\n"); } - } else if(vertexDeclaration->half_float_conv_needed) { - WARN("Half float vertex data used, but GL_NV_half_float is not supported. Not using vbos\n"); - streamVBO = 0; - data = ((IWineD3DVertexBufferImpl *) This->stateBlock->streamSource[element->Stream])->resource.allocatedMemory; } if(fixup) { @@ -574,24 +570,6 @@ static void drawStridedSlow(IWineD3DDevice *iface, WineDirect3DVertexStridedData checkGLcall("glEnd and previous calls"); } -/* See GL_NV_half_float for reference */ -static inline float float_16_to_32(const unsigned short *in) { - const unsigned short s = ((*in) & 0x8000); - const unsigned short e = ((*in) & 0x7C00) >> 10; - const unsigned short m = (*in) & 0x3FF; - const float sgn = (s ? -1.0 : 1.0); - - if(e == 0) { - if(m == 0) return sgn * 0.0; /* +0.0 or -0.0 */ - else return sgn * pow(2, -14.0) * ( (float) m / 1024.0); - } else if(e < 31) { - return sgn * pow(2, (float) e-15.0) * (1.0 + ((float) m / 1024.0)); - } else { - if(m == 0) return sgn / 0.0; /* +INF / -INF */ - else return 0.0 / 0.0; /* NAN */ - } -} - static inline void send_attribute(IWineD3DDeviceImpl *This, const DWORD type, const UINT index, const void *ptr) { switch(type) { case WINED3DDECLTYPE_FLOAT1: @@ -1103,19 +1081,21 @@ void drawPrimitive(IWineD3DDevice *iface, if (This->useDrawStridedSlow || emulation) { /* Immediate mode drawing */ - drawStridedSlow(iface, strided, calculatedNumberOfindices, - glPrimType, idxData, idxSize, minIndex, StartIdx, StartVertexIndex); + if(use_vs(This)) { + FIXME("Using immediate mode with vertex shaders for half float emulation\n"); + drawStridedSlowVs(iface, strided, calculatedNumberOfindices, glPrimType, + idxData, idxSize, minIndex, StartIdx, StartVertexIndex); + } else { + drawStridedSlow(iface, strided, calculatedNumberOfindices, + glPrimType, idxData, idxSize, minIndex, StartIdx, StartVertexIndex); + } } else if(This->instancedDraw) { /* Instancing emulation with mixing immediate mode and arrays */ drawStridedInstanced(iface, &This->strided_streams, calculatedNumberOfindices, glPrimType, idxData, idxSize, minIndex, StartIdx, StartVertexIndex); - } else if(!((IWineD3DVertexDeclarationImpl *) This->stateBlock->vertexDecl)->half_float_conv_needed) { + } else { drawStridedFast(iface, calculatedNumberOfindices, glPrimType, idxData, idxSize, minIndex, StartIdx, StartVertexIndex); - } else { - FIXME("Using immediate mode with vertex shaders for half float emulation\n"); - drawStridedSlowVs(iface, strided, calculatedNumberOfindices, glPrimType, - idxData, idxSize, minIndex, StartIdx, StartVertexIndex); } } diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c index 39868cfdac2..71b0d3d64d7 100644 --- a/dlls/wined3d/state.c +++ b/dlls/wined3d/state.c @@ -2880,15 +2880,11 @@ static inline void loadNumberedArrays(IWineD3DStateBlockImpl *stateblock, WineDi GLint curVBO = GL_SUPPORT(ARB_VERTEX_BUFFER_OBJECT) ? -1 : 0; int i; UINT *offset = stateblock->streamOffset; + IWineD3DVertexBufferImpl *vb; /* Default to no instancing */ stateblock->wineD3DDevice->instancedDraw = FALSE; - if(((IWineD3DVertexDeclarationImpl *)stateblock->vertexDecl)->half_float_conv_needed) { - /* This will be handled using drawStridedSlow */ - return; - } - for (i = 0; i < MAX_ATTRIBS; i++) { if (!strided->u.input[i].lpData && !strided->u.input[i].VBO) @@ -2909,12 +2905,37 @@ static inline void loadNumberedArrays(IWineD3DStateBlockImpl *stateblock, WineDi checkGLcall("glBindBufferARB"); curVBO = strided->u.input[i].VBO; } - GL_EXTCALL(glVertexAttribPointerARB(i, - WINED3D_ATR_SIZE(strided->u.input[i].dwType), - WINED3D_ATR_GLTYPE(strided->u.input[i].dwType), - WINED3D_ATR_NORMALIZED(strided->u.input[i].dwType), - strided->u.input[i].dwStride, - strided->u.input[i].lpData + stateblock->loadBaseVertexIndex * strided->u.input[i].dwStride + offset[strided->u.input[i].streamNo]) ); + vb = (IWineD3DVertexBufferImpl *) stateblock->streamSource[strided->u.input[i].streamNo]; + /* Use the VBO to find out if a vertex buffer exists, not the vb pointer. vb can point to a + * user pointer data blob. In that case curVBO will be 0. If there is a vertex buffer but no + * vbo we won't be load converted attributes anyway + */ + if(curVBO && vb->conv_shift) { + TRACE("Loading attribute from shifted buffer\n"); + TRACE("Attrib %d has original stride %d, new stride %d\n", i, strided->u.input[i].dwStride, vb->conv_stride); + TRACE("Original offset %p, additional offset 0x%08x\n",strided->u.input[i].lpData, vb->conv_shift[(DWORD_PTR) strided->u.input[i].lpData]); + TRACE("Opengl type %x\n", WINED3D_ATR_GLTYPE(strided->u.input[i].dwType)); + GL_EXTCALL(glVertexAttribPointerARB(i, + WINED3D_ATR_SIZE(strided->u.input[i].dwType), + WINED3D_ATR_GLTYPE(strided->u.input[i].dwType), + WINED3D_ATR_NORMALIZED(strided->u.input[i].dwType), + vb->conv_stride, + + strided->u.input[i].lpData + vb->conv_shift[(DWORD_PTR) strided->u.input[i].lpData] + + stateblock->loadBaseVertexIndex * strided->u.input[i].dwStride + + offset[strided->u.input[i].streamNo])); + + } else { + GL_EXTCALL(glVertexAttribPointerARB(i, + WINED3D_ATR_SIZE(strided->u.input[i].dwType), + WINED3D_ATR_GLTYPE(strided->u.input[i].dwType), + WINED3D_ATR_NORMALIZED(strided->u.input[i].dwType), + strided->u.input[i].dwStride, + + strided->u.input[i].lpData + + stateblock->loadBaseVertexIndex * strided->u.input[i].dwStride + + offset[strided->u.input[i].streamNo]) ); + } GL_EXTCALL(glEnableVertexAttribArrayARB(i)); } else { /* Stride = 0 means always the same values. glVertexAttribPointerARB doesn't do that. Instead disable the pointer and @@ -2922,7 +2943,7 @@ static inline void loadNumberedArrays(IWineD3DStateBlockImpl *stateblock, WineDi */ BYTE *ptr = strided->u.input[i].lpData + offset[strided->u.input[i].streamNo]; if(strided->u.input[i].VBO) { - IWineD3DVertexBufferImpl *vb = (IWineD3DVertexBufferImpl *) stateblock->streamSource[strided->u.input[i].streamNo]; + vb = (IWineD3DVertexBufferImpl *) stateblock->streamSource[strided->u.input[i].streamNo]; ptr += (long) vb->resource.allocatedMemory; } GL_EXTCALL(glDisableVertexAttribArrayARB(i)); @@ -3360,10 +3381,16 @@ static inline void handleStreams(IWineD3DStateBlockImpl *stateblock, BOOL useVer } if(useVertexShaderFunction) { - TRACE("Loading numbered arrays\n"); - loadNumberedArrays(stateblock, dataLocations); - device->useDrawStridedSlow = FALSE; - context->numberedArraysLoaded = TRUE; + if(((IWineD3DVertexDeclarationImpl *) stateblock->vertexDecl)->half_float_conv_needed && !fixup) { + TRACE("Using drawStridedSlow with vertex shaders for FLOAT16 conversion\n"); + device->useDrawStridedSlow = TRUE; + context->numberedArraysLoaded = FALSE; + } else { + TRACE("Loading numbered arrays\n"); + loadNumberedArrays(stateblock, dataLocations); + device->useDrawStridedSlow = FALSE; + context->numberedArraysLoaded = TRUE; + } } else if (fixup || (dataLocations->u.s.pSize.lpData == NULL && dataLocations->u.s.diffuse.lpData == NULL && diff --git a/dlls/wined3d/vertexbuffer.c b/dlls/wined3d/vertexbuffer.c index 580d290cb41..32d475741d3 100644 --- a/dlls/wined3d/vertexbuffer.c +++ b/dlls/wined3d/vertexbuffer.c @@ -142,12 +142,53 @@ static inline void fixup_transformed_pos(float *p) { p[3] = w; } +DWORD *find_conversion_shift(IWineD3DVertexBufferImpl *This, WineDirect3DVertexStridedData *strided, DWORD stride) { + DWORD *ret, i, shift, j, type; + DWORD orig_type_size; + + if(!stride) { + TRACE("No shift\n"); + return NULL; + } + + This->conv_stride = stride; + ret = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(DWORD) * stride); + for(i = 0; i < MAX_ATTRIBS; i++) { + type = strided->u.input[i].dwType; + if(type == WINED3DDECLTYPE_FLOAT16_2) { + shift = 4; + } else if(type == WINED3DDECLTYPE_FLOAT16_4) { + shift = 8; + } else { + shift = 0; + } + This->conv_stride += shift; + + if(shift) { + orig_type_size = WINED3D_ATR_TYPESIZE(type) * WINED3D_ATR_SIZE(type); + for(j = (DWORD_PTR) strided->u.input[i].lpData + orig_type_size; j < stride; j++) { + ret[j] += shift; + } + } + } + + if(TRACE_ON(d3d)) { + TRACE("Dumping conversion shift:\n"); + for(i = 0; i < stride; i++) { + TRACE("[%d]", ret[i]); + } + TRACE("\n"); + } + return ret; +} + inline BOOL WINAPI IWineD3DVertexBufferImpl_FindDecl(IWineD3DVertexBufferImpl *This) { WineDirect3DVertexStridedData strided; IWineD3DDeviceImpl *device = This->resource.wineD3DDevice; BOOL ret = FALSE; - DWORD type_old, type_new; + DWORD type_old, type_new, stride; + int i; /* In d3d7 the vertex buffer declaration NEVER changes because it is stored in the d3d7 vertex buffer. * Once we have our declaration there is no need to look it up again. @@ -180,8 +221,6 @@ inline BOOL WINAPI IWineD3DVertexBufferImpl_FindDecl(IWineD3DVertexBufferImpl *T */ if (use_vs(device)) { - /* Assume no conversion. TODO: Deal with FLOAT16 conversion*/ - memset(&strided, 0, sizeof(strided)); if(!This->last_was_vshader && This->last_was_converted) { /* Reload if we're switching from converted fixed function to vertex shaders. * This isn't strictly needed, e.g. a FLOAT16 attribute could stay at the same @@ -192,7 +231,69 @@ inline BOOL WINAPI IWineD3DVertexBufferImpl_FindDecl(IWineD3DVertexBufferImpl *T ret = TRUE; } This->last_was_vshader = TRUE; + + /* The vertex declaration is so nice to carry a flag for this. It takes the gl info into account, + * but check that separately to avoid the memcmp over the structure + */ + if(GL_SUPPORT(NV_HALF_FLOAT)) { + memset(&strided, 0, sizeof(strided)); + ret = FALSE; + } else if(((IWineD3DVertexDeclarationImpl *)device->stateBlock->vertexDecl)->half_float_conv_needed) { + memcpy(&strided, &device->strided_streams, sizeof(strided)); + + stride = 0; + for(i = 0; i < MAX_ATTRIBS; i++) { + if(strided.u.input[i].VBO != This->vbo) { + /* Ignore attributes from a different vbo */ + memset(&strided.u.input[i], 0, sizeof(strided.u.input[i])); + continue; + }; + + type_old = This->strided.u.input[i].dwType; + type_new = strided.u.input[i].dwType; + if(type_old != type_new) { + if(type_old == WINED3DDECLTYPE_FLOAT16_2 || type_new == WINED3DDECLTYPE_FLOAT16_2 || + type_old == WINED3DDECLTYPE_FLOAT16_4 || type_new == WINED3DDECLTYPE_FLOAT16_4) { + TRACE("Reloading because attribute %i was %s before and is %s now\n", i, + debug_d3ddecltype(type_old), debug_d3ddecltype(type_new)); + stride = strided.u.input[i].dwStride; + ret = TRUE; + } + } else if(type_new == WINED3DDECLTYPE_FLOAT16_2 || type_new == WINED3DDECLTYPE_FLOAT16_4) { + if(This->strided.u.input[i].lpData != strided.u.input[i].lpData) { + TRACE("Reconverting buffer because attribute %d has type %s and moved from offset %p to %p\n", + i, debug_d3ddecltype(type_new), This->strided.u.s.position.lpData, strided.u.s.position.lpData); + memcpy(&This->strided, &strided, sizeof(strided)); + ret = TRUE; + stride = strided.u.input[i].dwStride; + } + } + + if(!(type_new == WINED3DDECLTYPE_FLOAT16_2 || type_new == WINED3DDECLTYPE_FLOAT16_4)) { + /* Nuke unconverted attributes */ + memset(&strided.u.input[i], 0, sizeof(strided.u.input[i])); + } + } + if(ret) { + memcpy(&This->strided, &strided, sizeof(strided)); + HeapFree(GetProcessHeap(), 0, This->conv_shift); + This->conv_shift = find_conversion_shift(This, &This->strided, stride); + } + } else { + /* No conversion*/ + memset(&strided, 0, sizeof(strided)); + ret = (memcmp(&strided, &This->strided, sizeof(strided)) != 0); + if(ret) { + HeapFree(GetProcessHeap(), 0, This->conv_shift); + This->conv_shift = NULL; + } + } } else { + /* This will need modifications of the loading code as well - not handled right now */ + if(((IWineD3DVertexDeclarationImpl *)device->stateBlock->vertexDecl)->half_float_conv_needed) { + FIXME("Implement half float fixup with fixed function vertex processing\n"); + } + /* we need a copy because we modify some params */ memcpy(&strided, &device->strided_streams, sizeof(strided)); @@ -337,9 +438,9 @@ static void WINAPI IWineD3DVertexBufferImpl_PreLoad(IWineD3DVertexBuffer *if IWineD3DVertexBufferImpl *This = (IWineD3DVertexBufferImpl *) iface; IWineD3DDeviceImpl *device = This->resource.wineD3DDevice; BYTE *data; - UINT start = 0, end = 0, stride = 0; + UINT start = 0, end = 0, stride = 0, vertices; BOOL declChanged = FALSE; - int i; + int i, j; TRACE("(%p)->()\n", This); if(This->Flags & VBFLAG_LOAD) { @@ -391,6 +492,7 @@ static void WINAPI IWineD3DVertexBufferImpl_PreLoad(IWineD3DVertexBuffer *if checkGLcall("glDeleteBuffersARB"); LEAVE_GL(); This->vbo = 0; + HeapFree(GetProcessHeap(), 0, This->conv_shift); /* The stream source state handler might have read the memory of the vertex buffer already * and got the memory in the vbo which is not valid any longer. Dirtify the stream source @@ -451,16 +553,80 @@ static void WINAPI IWineD3DVertexBufferImpl_PreLoad(IWineD3DVertexBuffer *if /* OK, we have the original data from the app, the description of the buffer and the dirty area. * so convert the stuff */ - data = HeapAlloc(GetProcessHeap(), 0, end-start); - if(!data) { - ERR("Out of memory\n"); - return; - } - memcpy(data, This->resource.allocatedMemory + start, end - start); - if(This->last_was_vshader) { - /* TODO: Implement conversion for FLOAT16_2 and FLOAT16_4 */ + TRACE("vertex-shadered conversion\n"); + /* TODO: Improve that */ + for(i = 0; i < MAX_ATTRIBS; i++) { + if(This->strided.u.input[i].dwStride) { + stride = This->strided.u.input[i].dwStride; + } + } + TRACE("Found input stride %d, output stride %d\n", stride, This->conv_stride); + /* For now reconvert the entire buffer */ + start = 0; + end = This->resource.size; + + vertices = This->resource.size / stride; + TRACE("%d vertices in buffer\n", vertices); + if(This->vbo_size != vertices * This->conv_stride) { + TRACE("Old size %d, creating new size %d\n", This->vbo_size, vertices * This->conv_stride); + ENTER_GL(); + GL_EXTCALL(glBindBufferARB(GL_ARRAY_BUFFER_ARB, This->vbo)); + checkGLcall("glBindBufferARB"); + GL_EXTCALL(glBufferDataARB(GL_ARRAY_BUFFER_ARB, vertices * This->conv_stride, NULL, This->vbo_usage)); + This->vbo_size = vertices * This->conv_stride; + checkGLcall("glBufferDataARB"); + LEAVE_GL(); + } + data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, vertices * This->conv_stride); + if(!data) { + ERR("Out of memory\n"); + return; + } + + /* Now for each vertex in the buffer */ + for(i = 0; i < vertices; i++) { + /* Copy the vertex over, taking the shifts into account */ + for(j = 0; j < stride; j++) { + data[This->conv_stride * i + j + This->conv_shift[j]] = This->resource.allocatedMemory[i * stride + j]; + } + /* And convert FLOAT16s */ + for(j = 0; j < MAX_ATTRIBS; j++) { + DWORD_PTR offset = (DWORD_PTR) This->strided.u.input[j].lpData; + float *dest = (float *) (&data[This->conv_stride * i + offset + This->conv_shift[offset]]); + WORD *in = (WORD *) (&This->resource.allocatedMemory[i * stride + offset]); + + switch(This->strided.u.input[j].dwType) { + case WINED3DDECLTYPE_FLOAT16_4: + dest[3] = float_16_to_32(in + 3); + dest[2] = float_16_to_32(in + 2); + /* drop through */ + case WINED3DDECLTYPE_FLOAT16_2: + dest[1] = float_16_to_32(in + 1); + dest[0] = float_16_to_32(in + 0); + break; + default: + break; + } + } + } + if(!device->isInDraw) { + ActivateContext(device, device->lastActiveRenderTarget, CTXUSAGE_RESOURCELOAD); + } + ENTER_GL(); + GL_EXTCALL(glBindBufferARB(GL_ARRAY_BUFFER_ARB, This->vbo)); + checkGLcall("glBindBufferARB"); + GL_EXTCALL(glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, This->vbo_size, data)); + checkGLcall("glBufferSubDataARB"); + LEAVE_GL(); } else { + data = HeapAlloc(GetProcessHeap(), 0, end-start); + if(!data) { + ERR("Out of memory\n"); + return; + } + memcpy(data, This->resource.allocatedMemory + start, end - start); + if (This->strided.u.s.position.dwStride) stride = This->strided.u.s.position.dwStride; else if(This->strided.u.s.specular.dwStride) stride = This->strided.u.s.specular.dwStride; else if(This->strided.u.s.diffuse.dwStride) stride = This->strided.u.s.diffuse.dwStride; @@ -498,18 +664,17 @@ static void WINAPI IWineD3DVertexBufferImpl_PreLoad(IWineD3DVertexBuffer *if CONVERT_D3DCOLOR_ATTRIB(texCoords[7]); #undef CONVERT_D3DCOLOR_ATTRIB } + if(!device->isInDraw) { + ActivateContext(device, device->lastActiveRenderTarget, CTXUSAGE_RESOURCELOAD); + } + ENTER_GL(); + GL_EXTCALL(glBindBufferARB(GL_ARRAY_BUFFER_ARB, This->vbo)); + checkGLcall("glBindBufferARB"); + GL_EXTCALL(glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, start, end - start, data)); + checkGLcall("glBufferSubDataARB"); + LEAVE_GL(); } - if(!device->isInDraw) { - ActivateContext(device, device->lastActiveRenderTarget, CTXUSAGE_RESOURCELOAD); - } - ENTER_GL(); - GL_EXTCALL(glBindBufferARB(GL_ARRAY_BUFFER_ARB, This->vbo)); - checkGLcall("glBindBufferARB"); - GL_EXTCALL(glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, start, end - start, data)); - checkGLcall("glBufferSubDataARB"); - LEAVE_GL(); - HeapFree(GetProcessHeap(), 0, data); } diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 8df8db6cba7..dbfe3322ebd 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -121,6 +121,24 @@ void init_type_lookup(WineD3D_GL_Info *gl_info); #define WINED3D_ATR_NORMALIZED(type) GLINFO_LOCATION.glTypeLookup[type].normalized #define WINED3D_ATR_TYPESIZE(type) GLINFO_LOCATION.glTypeLookup[type].typesize +/* See GL_NV_half_float for reference */ +static inline float float_16_to_32(const unsigned short *in) { + const unsigned short s = ((*in) & 0x8000); + const unsigned short e = ((*in) & 0x7C00) >> 10; + const unsigned short m = (*in) & 0x3FF; + const float sgn = (s ? -1.0 : 1.0); + + if(e == 0) { + if(m == 0) return sgn * 0.0; /* +0.0 or -0.0 */ + else return sgn * pow(2, -14.0) * ( (float) m / 1024.0); + } else if(e < 31) { + return sgn * pow(2, (float) e-15.0) * (1.0 + ((float) m / 1024.0)); + } else { + if(m == 0) return sgn / 0.0; /* +INF / -INF */ + else return 0.0 / 0.0; /* NAN */ + } +} + /** * Settings */ @@ -826,6 +844,8 @@ typedef struct IWineD3DVertexBufferImpl GLuint vbo; BYTE Flags; LONG bindCount; + LONG vbo_size; + GLenum vbo_usage; UINT dirtystart, dirtyend; LONG lockcount; @@ -835,6 +855,10 @@ typedef struct IWineD3DVertexBufferImpl WineDirect3DVertexStridedData strided; BOOL last_was_vshader; BOOL last_was_converted; + + /* Extra load offsets, for FLOAT16 conversion */ + DWORD *conv_shift; + DWORD conv_stride; } IWineD3DVertexBufferImpl; extern const IWineD3DVertexBufferVtbl IWineD3DVertexBuffer_Vtbl;