wined3d: Upload directly from the source surface in wined3d_surface_blt(), if possible.

This avoids either loading sysmem surfaces into video memory (wasting GPU
memory) and then doing a blit on the GPU, or downloading GPU surfaces to
sysmem, doing a blit on the CPU, and likely uploading it again. This mostly
matters for ddraw and d3d8 applications, d3d9 already has to go through
wined3d_device_update_surface() to transfer data from sysmem surfaces to GPU
surfaces.
This commit is contained in:
Henri Verbeet 2011-10-24 20:45:42 +02:00 committed by Alexandre Julliard
parent 6238926a2e
commit 2da4f87a7e
3 changed files with 145 additions and 119 deletions

View File

@ -4628,30 +4628,10 @@ float CDECL wined3d_device_get_npatch_mode(const struct wined3d_device *device)
return 0.0f; return 0.0f;
} }
static void invalidate_active_texture(const struct wined3d_device *device, struct wined3d_context *context)
{
DWORD sampler = device->rev_tex_unit_map[context->active_texture];
if (sampler != WINED3D_UNMAPPED_STAGE)
context_invalidate_state(context, STATE_SAMPLER(sampler));
}
HRESULT CDECL wined3d_device_update_surface(struct wined3d_device *device, HRESULT CDECL wined3d_device_update_surface(struct wined3d_device *device,
struct wined3d_surface *src_surface, const RECT *src_rect, struct wined3d_surface *src_surface, const RECT *src_rect,
struct wined3d_surface *dst_surface, const POINT *dst_point) struct wined3d_surface *dst_surface, const POINT *dst_point)
{ {
const struct wined3d_format *src_format;
const struct wined3d_format *dst_format;
const struct wined3d_gl_info *gl_info;
struct wined3d_context *context;
struct wined3d_bo_address data;
struct wined3d_format format;
UINT update_w, update_h;
CONVERT_TYPES convert;
UINT dst_w, dst_h;
UINT src_w, src_h;
POINT p;
RECT r;
TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_point %s.\n", TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_point %s.\n",
device, src_surface, wine_dbgstr_rect(src_rect), device, src_surface, wine_dbgstr_rect(src_rect),
dst_surface, wine_dbgstr_point(dst_point)); dst_surface, wine_dbgstr_point(dst_point));
@ -4663,101 +4643,7 @@ HRESULT CDECL wined3d_device_update_surface(struct wined3d_device *device,
return WINED3DERR_INVALIDCALL; return WINED3DERR_INVALIDCALL;
} }
src_format = src_surface->resource.format; return surface_upload_from_surface(dst_surface, dst_point, src_surface, src_rect);
dst_format = dst_surface->resource.format;
if (src_format->id != dst_format->id)
{
WARN("Source and destination surfaces should have the same format.\n");
return WINED3DERR_INVALIDCALL;
}
if (!dst_point)
{
p.x = 0;
p.y = 0;
dst_point = &p;
}
else if (dst_point->x < 0 || dst_point->y < 0)
{
WARN("Invalid destination point.\n");
return WINED3DERR_INVALIDCALL;
}
if (!src_rect)
{
r.left = 0;
r.top = 0;
r.right = src_surface->resource.width;
r.bottom = src_surface->resource.height;
src_rect = &r;
}
else if (src_rect->left < 0 || src_rect->left >= src_rect->right
|| src_rect->top < 0 || src_rect->top >= src_rect->bottom)
{
WARN("Invalid source rectangle.\n");
return WINED3DERR_INVALIDCALL;
}
src_w = src_surface->resource.width;
src_h = src_surface->resource.height;
dst_w = dst_surface->resource.width;
dst_h = dst_surface->resource.height;
update_w = src_rect->right - src_rect->left;
update_h = src_rect->bottom - src_rect->top;
if (update_w > dst_w || dst_point->x > dst_w - update_w
|| update_h > dst_h || dst_point->y > dst_h - update_h)
{
WARN("Destination out of bounds.\n");
return WINED3DERR_INVALIDCALL;
}
/* NPOT block sizes would be silly. */
if ((src_format->flags & WINED3DFMT_FLAG_COMPRESSED)
&& ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
&& (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
{
WARN("Update rect not block-aligned.\n");
return WINED3DERR_INVALIDCALL;
}
/* This call loads the OpenGL surface directly, instead of copying the
* surface to the destination's sysmem copy. If surface conversion is
* needed, use BltFast instead to copy in sysmem and use regular surface
* loading. */
d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
if (convert != NO_CONVERSION || format.convert)
return wined3d_surface_bltfast(dst_surface, dst_point->x, dst_point->y, src_surface, src_rect, 0);
context = context_acquire(device, NULL);
gl_info = context->gl_info;
/* Only load the surface for partial updates. For newly allocated texture
* the texture wouldn't be the current location, and we'd upload zeroes
* just to overwrite them again. */
if (update_w == dst_w && update_h == dst_h)
surface_prepare_texture(dst_surface, context, FALSE);
else
surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
surface_bind(dst_surface, context, FALSE);
data.buffer_object = 0;
data.addr = src_surface->resource.allocatedMemory;
if (!data.addr)
ERR("Source surface has no allocated memory, but should be a sysmem surface.\n");
surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_w, dst_point, FALSE, &data);
invalidate_active_texture(device, context);
context_release(context);
surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
return WINED3D_OK;
} }
HRESULT CDECL wined3d_device_draw_rect_patch(struct wined3d_device *device, UINT handle, HRESULT CDECL wined3d_device_draw_rect_patch(struct wined3d_device *device, UINT handle,

View File

@ -1730,6 +1730,26 @@ HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const REC
{ {
TRACE("Color blit.\n"); TRACE("Color blit.\n");
/* Upload */
if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
{
if (scale)
TRACE("Not doing upload because of scaling.\n");
else if (convert)
TRACE("Not doing upload because of format conversion.\n");
else
{
POINT dst_point = {dst_rect.left, dst_rect.top};
if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
{
if (!surface_is_offscreen(dst_surface))
surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
return WINED3D_OK;
}
}
}
/* Use present for back -> front blits. The idea behind this is /* Use present for back -> front blits. The idea behind this is
* that present is potentially faster than a blit, in particular * that present is potentially faster than a blit, in particular
* when FBO blits aren't available. Some ddraw applications like * when FBO blits aren't available. Some ddraw applications like
@ -2393,7 +2413,7 @@ static void surface_download_data(struct wined3d_surface *surface, const struct
/* This call just uploads data, the caller is responsible for binding the /* This call just uploads data, the caller is responsible for binding the
* correct texture. */ * correct texture. */
/* Context activation is done by the caller. */ /* Context activation is done by the caller. */
void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, static void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point, const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point,
BOOL srgb, const struct wined3d_bo_address *data) BOOL srgb, const struct wined3d_bo_address *data)
{ {
@ -2499,6 +2519,120 @@ void surface_upload_data(const struct wined3d_surface *surface, const struct win
} }
} }
HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
struct wined3d_surface *src_surface, const RECT *src_rect)
{
const struct wined3d_format *src_format;
const struct wined3d_format *dst_format;
const struct wined3d_gl_info *gl_info;
struct wined3d_context *context;
struct wined3d_bo_address data;
struct wined3d_format format;
UINT update_w, update_h;
CONVERT_TYPES convert;
UINT dst_w, dst_h;
UINT src_w, src_h;
POINT p;
RECT r;
TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
dst_surface, wine_dbgstr_point(dst_point),
src_surface, wine_dbgstr_rect(src_rect));
src_format = src_surface->resource.format;
dst_format = dst_surface->resource.format;
if (src_format->id != dst_format->id)
{
WARN("Source and destination surfaces should have the same format.\n");
return WINED3DERR_INVALIDCALL;
}
if (!dst_point)
{
p.x = 0;
p.y = 0;
dst_point = &p;
}
else if (dst_point->x < 0 || dst_point->y < 0)
{
WARN("Invalid destination point.\n");
return WINED3DERR_INVALIDCALL;
}
if (!src_rect)
{
r.left = 0;
r.top = 0;
r.right = src_surface->resource.width;
r.bottom = src_surface->resource.height;
src_rect = &r;
}
else if (src_rect->left < 0 || src_rect->left >= src_rect->right
|| src_rect->top < 0 || src_rect->top >= src_rect->bottom)
{
WARN("Invalid source rectangle.\n");
return WINED3DERR_INVALIDCALL;
}
src_w = src_surface->resource.width;
src_h = src_surface->resource.height;
dst_w = dst_surface->resource.width;
dst_h = dst_surface->resource.height;
update_w = src_rect->right - src_rect->left;
update_h = src_rect->bottom - src_rect->top;
if (update_w > dst_w || dst_point->x > dst_w - update_w
|| update_h > dst_h || dst_point->y > dst_h - update_h)
{
WARN("Destination out of bounds.\n");
return WINED3DERR_INVALIDCALL;
}
/* NPOT block sizes would be silly. */
if ((src_format->flags & WINED3DFMT_FLAG_COMPRESSED)
&& ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
&& (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
{
WARN("Update rect not block-aligned.\n");
return WINED3DERR_INVALIDCALL;
}
/* This call loads the OpenGL surface directly, instead of copying the
* surface to the destination's sysmem copy. If surface conversion is
* needed, use BltFast instead to copy in sysmem and use regular surface
* loading. */
d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
if (convert != NO_CONVERSION || format.convert)
return wined3d_surface_bltfast(dst_surface, dst_point->x, dst_point->y, src_surface, src_rect, 0);
context = context_acquire(dst_surface->resource.device, NULL);
gl_info = context->gl_info;
/* Only load the surface for partial updates. For newly allocated texture
* the texture wouldn't be the current location, and we'd upload zeroes
* just to overwrite them again. */
if (update_w == dst_w && update_h == dst_h)
surface_prepare_texture(dst_surface, context, FALSE);
else
surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
surface_bind(dst_surface, context, FALSE);
data.buffer_object = src_surface->pbo;
data.addr = src_surface->resource.allocatedMemory;
surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_w, dst_point, FALSE, &data);
invalidate_active_texture(dst_surface->resource.device, context);
context_release(context);
surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
return WINED3D_OK;
}
/* This call just allocates the texture, the caller is responsible for binding /* This call just allocates the texture, the caller is responsible for binding
* the correct texture. */ * the correct texture. */
/* Context activation is done by the caller. */ /* Context activation is done by the caller. */

View File

@ -1782,6 +1782,13 @@ static inline BOOL isStateDirty(const struct wined3d_context *context, DWORD sta
return context->isStateDirty[idx] & (1 << shift); return context->isStateDirty[idx] & (1 << shift);
} }
static inline void invalidate_active_texture(const struct wined3d_device *device, struct wined3d_context *context)
{
DWORD sampler = device->rev_tex_unit_map[context->active_texture];
if (sampler != WINED3D_UNMAPPED_STAGE)
context_invalidate_state(context, STATE_SAMPLER(sampler));
}
#define WINED3D_RESOURCE_ACCESS_GPU 0x1 #define WINED3D_RESOURCE_ACCESS_GPU 0x1
#define WINED3D_RESOURCE_ACCESS_CPU 0x2 #define WINED3D_RESOURCE_ACCESS_CPU 0x2
/* SCRATCH is mostly the same as CPU, but can't be used by the GPU at all, /* SCRATCH is mostly the same as CPU, but can't be used by the GPU at all,
@ -2095,9 +2102,8 @@ void surface_set_texture_name(struct wined3d_surface *surface, GLuint name, BOOL
void surface_set_texture_target(struct wined3d_surface *surface, GLenum target) DECLSPEC_HIDDEN; void surface_set_texture_target(struct wined3d_surface *surface, GLenum target) DECLSPEC_HIDDEN;
void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect) DECLSPEC_HIDDEN; void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect) DECLSPEC_HIDDEN;
void surface_update_draw_binding(struct wined3d_surface *surface) DECLSPEC_HIDDEN; void surface_update_draw_binding(struct wined3d_surface *surface) DECLSPEC_HIDDEN;
void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point, struct wined3d_surface *src_surface, const RECT *src_rect) DECLSPEC_HIDDEN;
BOOL srgb, const struct wined3d_bo_address *data) DECLSPEC_HIDDEN;
void get_drawable_size_swapchain(const struct wined3d_context *context, UINT *width, UINT *height) DECLSPEC_HIDDEN; void get_drawable_size_swapchain(const struct wined3d_context *context, UINT *width, UINT *height) DECLSPEC_HIDDEN;
void get_drawable_size_backbuffer(const struct wined3d_context *context, UINT *width, UINT *height) DECLSPEC_HIDDEN; void get_drawable_size_backbuffer(const struct wined3d_context *context, UINT *width, UINT *height) DECLSPEC_HIDDEN;