From 393ea20679d1955decff8f892acc20b3dd66ebb9 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Tue, 6 Jul 2021 20:43:43 -0500 Subject: [PATCH] wined3d: Set an array of UAVs as a single CS operation. Signed-off-by: Zebediah Figura Signed-off-by: Henri Verbeet Signed-off-by: Alexandre Julliard --- dlls/d3d11/device.c | 67 ++++++++++++++++++-------------- dlls/wined3d/cs.c | 70 +++++++++++++++++++++------------- dlls/wined3d/device.c | 45 +++++++++++----------- dlls/wined3d/wined3d.spec | 2 +- dlls/wined3d/wined3d_private.h | 6 +-- include/wine/wined3d.h | 6 +-- 6 files changed, 111 insertions(+), 85 deletions(-) diff --git a/dlls/d3d11/device.c b/dlls/d3d11/device.c index ec2447b3c4e..93df97e59ae 100644 --- a/dlls/d3d11/device.c +++ b/dlls/d3d11/device.c @@ -1087,20 +1087,17 @@ static void STDMETHODCALLTYPE d3d11_device_context_OMSetRenderTargets(ID3D11Devi } static void STDMETHODCALLTYPE d3d11_device_context_OMSetRenderTargetsAndUnorderedAccessViews( - ID3D11DeviceContext1 *iface, UINT render_target_view_count, - ID3D11RenderTargetView *const *render_target_views, ID3D11DepthStencilView *depth_stencil_view, - UINT unordered_access_view_start_slot, UINT unordered_access_view_count, - ID3D11UnorderedAccessView *const *unordered_access_views, const UINT *initial_counts) + ID3D11DeviceContext1 *iface, UINT render_target_view_count, ID3D11RenderTargetView *const *render_target_views, + ID3D11DepthStencilView *depth_stencil_view, UINT uav_start_idx, UINT uav_count, + ID3D11UnorderedAccessView *const *uavs, const UINT *initial_counts) { struct d3d11_device_context *context = impl_from_ID3D11DeviceContext1(iface); unsigned int i; TRACE("iface %p, render_target_view_count %u, render_target_views %p, depth_stencil_view %p, " - "unordered_access_view_start_slot %u, unordered_access_view_count %u, unordered_access_views %p, " - "initial_counts %p.\n", + "uav_start_idx %u, uav_count %u, uavs %p, initial_counts %p.\n", iface, render_target_view_count, render_target_views, depth_stencil_view, - unordered_access_view_start_slot, unordered_access_view_count, unordered_access_views, - initial_counts); + uav_start_idx, uav_count, uavs, initial_counts); if (render_target_view_count != D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL) { @@ -1108,28 +1105,31 @@ static void STDMETHODCALLTYPE d3d11_device_context_OMSetRenderTargetsAndUnordere depth_stencil_view); } - if (unordered_access_view_count != D3D11_KEEP_UNORDERED_ACCESS_VIEWS) + if (uav_count != D3D11_KEEP_UNORDERED_ACCESS_VIEWS) { - wined3d_mutex_lock(); - for (i = 0; i < unordered_access_view_start_slot; ++i) - { - wined3d_device_context_set_unordered_access_view(context->wined3d_context, - WINED3D_PIPELINE_GRAPHICS, i, NULL, ~0u); - } - for (i = 0; i < unordered_access_view_count; ++i) - { - struct d3d11_unordered_access_view *view - = unsafe_impl_from_ID3D11UnorderedAccessView(unordered_access_views[i]); + struct wined3d_unordered_access_view *wined3d_views[D3D11_PS_CS_UAV_REGISTER_COUNT] = {0}; + unsigned int wined3d_initial_counts[D3D11_PS_CS_UAV_REGISTER_COUNT]; - wined3d_device_context_set_unordered_access_view(context->wined3d_context, - WINED3D_PIPELINE_GRAPHICS, unordered_access_view_start_slot + i, - view ? view->wined3d_view : NULL, initial_counts ? initial_counts[i] : ~0u); - } - for (; unordered_access_view_start_slot + i < D3D11_PS_CS_UAV_REGISTER_COUNT; ++i) + if (!wined3d_bound_range(uav_start_idx, uav_count, ARRAY_SIZE(wined3d_views))) { - wined3d_device_context_set_unordered_access_view(context->wined3d_context, - WINED3D_PIPELINE_GRAPHICS, unordered_access_view_start_slot + i, NULL, ~0u); + WARN("View count %u exceeds limit; ignoring call.\n", uav_count); + return; } + + memset(wined3d_initial_counts, 0xff, sizeof(wined3d_initial_counts)); + + for (i = 0; i < uav_count; ++i) + { + struct d3d11_unordered_access_view *view = + unsafe_impl_from_ID3D11UnorderedAccessView(uavs[i]); + + wined3d_views[uav_start_idx + i] = view ? view->wined3d_view : NULL; + wined3d_initial_counts[uav_start_idx + i] = initial_counts ? initial_counts[i] : ~0u; + } + + wined3d_mutex_lock(); + wined3d_device_context_set_unordered_access_views(context->wined3d_context, WINED3D_PIPELINE_GRAPHICS, + 0, ARRAY_SIZE(wined3d_views), wined3d_views, wined3d_initial_counts); wined3d_mutex_unlock(); } } @@ -1651,19 +1651,28 @@ static void STDMETHODCALLTYPE d3d11_device_context_CSSetUnorderedAccessViews(ID3 UINT start_slot, UINT view_count, ID3D11UnorderedAccessView *const *views, const UINT *initial_counts) { struct d3d11_device_context *context = impl_from_ID3D11DeviceContext1(iface); + struct wined3d_unordered_access_view *wined3d_views[D3D11_PS_CS_UAV_REGISTER_COUNT]; unsigned int i; TRACE("iface %p, start_slot %u, view_count %u, views %p, initial_counts %p.\n", iface, start_slot, view_count, views, initial_counts); - wined3d_mutex_lock(); + if (view_count > ARRAY_SIZE(wined3d_views)) + { + WARN("View count %u exceeds limit; ignoring call.\n", view_count); + return; + } + for (i = 0; i < view_count; ++i) { struct d3d11_unordered_access_view *view = unsafe_impl_from_ID3D11UnorderedAccessView(views[i]); - wined3d_device_context_set_unordered_access_view(context->wined3d_context, WINED3D_PIPELINE_COMPUTE, - start_slot + i, view ? view->wined3d_view : NULL, initial_counts ? initial_counts[i] : ~0u); + wined3d_views[i] = view ? view->wined3d_view : NULL; } + + wined3d_mutex_lock(); + wined3d_device_context_set_unordered_access_views(context->wined3d_context, WINED3D_PIPELINE_COMPUTE, + start_slot, view_count, wined3d_views, initial_counts); wined3d_mutex_unlock(); } diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c index 1deafc2d4c3..f7e5009df46 100644 --- a/dlls/wined3d/cs.c +++ b/dlls/wined3d/cs.c @@ -123,7 +123,7 @@ enum wined3d_cs_op WINED3D_CS_OP_SET_CONSTANT_BUFFERS, WINED3D_CS_OP_SET_TEXTURE, WINED3D_CS_OP_SET_SHADER_RESOURCE_VIEWS, - WINED3D_CS_OP_SET_UNORDERED_ACCESS_VIEW, + WINED3D_CS_OP_SET_UNORDERED_ACCESS_VIEWS, WINED3D_CS_OP_SET_SAMPLERS, WINED3D_CS_OP_SET_SHADER, WINED3D_CS_OP_SET_BLEND_STATE, @@ -319,13 +319,17 @@ struct wined3d_cs_set_shader_resource_views struct wined3d_shader_resource_view *views[1]; }; -struct wined3d_cs_set_unordered_access_view +struct wined3d_cs_set_unordered_access_views { enum wined3d_cs_op opcode; enum wined3d_pipeline pipeline; - unsigned int view_idx; - struct wined3d_unordered_access_view *view; - unsigned int initial_count; + unsigned int start_idx; + unsigned int count; + struct + { + struct wined3d_unordered_access_view *view; + unsigned int initial_count; + } uavs[1]; }; struct wined3d_cs_set_samplers @@ -604,7 +608,7 @@ static const char *debug_cs_op(enum wined3d_cs_op op) WINED3D_TO_STR(WINED3D_CS_OP_SET_CONSTANT_BUFFERS); WINED3D_TO_STR(WINED3D_CS_OP_SET_TEXTURE); WINED3D_TO_STR(WINED3D_CS_OP_SET_SHADER_RESOURCE_VIEWS); - WINED3D_TO_STR(WINED3D_CS_OP_SET_UNORDERED_ACCESS_VIEW); + WINED3D_TO_STR(WINED3D_CS_OP_SET_UNORDERED_ACCESS_VIEWS); WINED3D_TO_STR(WINED3D_CS_OP_SET_SAMPLERS); WINED3D_TO_STR(WINED3D_CS_OP_SET_SHADER); WINED3D_TO_STR(WINED3D_CS_OP_SET_BLEND_STATE); @@ -1702,37 +1706,49 @@ void wined3d_device_context_emit_set_shader_resource_views(struct wined3d_device wined3d_device_context_submit(context, WINED3D_CS_QUEUE_DEFAULT); } -static void wined3d_cs_exec_set_unordered_access_view(struct wined3d_cs *cs, const void *data) +static void wined3d_cs_exec_set_unordered_access_views(struct wined3d_cs *cs, const void *data) { - const struct wined3d_cs_set_unordered_access_view *op = data; - struct wined3d_unordered_access_view *prev; + const struct wined3d_cs_set_unordered_access_views *op = data; + unsigned int i; - prev = cs->state.unordered_access_view[op->pipeline][op->view_idx]; - cs->state.unordered_access_view[op->pipeline][op->view_idx] = op->view; + for (i = 0; i < op->count; ++i) + { + struct wined3d_unordered_access_view *prev = cs->state.unordered_access_view[op->pipeline][op->start_idx + i]; + struct wined3d_unordered_access_view *view = op->uavs[i].view; + unsigned int initial_count = op->uavs[i].initial_count; - if (op->view) - InterlockedIncrement(&op->view->resource->bind_count); - if (prev) - InterlockedDecrement(&prev->resource->bind_count); + cs->state.unordered_access_view[op->pipeline][op->start_idx + i] = view; - if (op->view && op->initial_count != ~0u) - wined3d_unordered_access_view_set_counter(op->view, op->initial_count); + if (view) + InterlockedIncrement(&view->resource->bind_count); + if (prev) + InterlockedDecrement(&prev->resource->bind_count); + + if (view && initial_count != ~0u) + wined3d_unordered_access_view_set_counter(view, initial_count); + } device_invalidate_state(cs->c.device, STATE_UNORDERED_ACCESS_VIEW_BINDING(op->pipeline)); } -void wined3d_device_context_emit_set_unordered_access_view(struct wined3d_device_context *context, - enum wined3d_pipeline pipeline, unsigned int view_idx, struct wined3d_unordered_access_view *view, - unsigned int initial_count) +void wined3d_device_context_emit_set_unordered_access_views(struct wined3d_device_context *context, + enum wined3d_pipeline pipeline, unsigned int start_idx, unsigned int count, + struct wined3d_unordered_access_view *const *views, const unsigned int *initial_counts) { - struct wined3d_cs_set_unordered_access_view *op; + struct wined3d_cs_set_unordered_access_views *op; + unsigned int i; - op = wined3d_device_context_require_space(context, sizeof(*op), WINED3D_CS_QUEUE_DEFAULT); - op->opcode = WINED3D_CS_OP_SET_UNORDERED_ACCESS_VIEW; + op = wined3d_device_context_require_space(context, + offsetof(struct wined3d_cs_set_unordered_access_views, uavs[count]), WINED3D_CS_QUEUE_DEFAULT); + op->opcode = WINED3D_CS_OP_SET_UNORDERED_ACCESS_VIEWS; op->pipeline = pipeline; - op->view_idx = view_idx; - op->view = view; - op->initial_count = initial_count; + op->start_idx = start_idx; + op->count = count; + for (i = 0; i < count; ++i) + { + op->uavs[i].view = views[i]; + op->uavs[i].initial_count = initial_counts ? initial_counts[i] : ~0u; + } wined3d_device_context_submit(context, WINED3D_CS_QUEUE_DEFAULT); } @@ -2917,7 +2933,7 @@ static void (* const wined3d_cs_op_handlers[])(struct wined3d_cs *cs, const void /* WINED3D_CS_OP_SET_CONSTANT_BUFFERS */ wined3d_cs_exec_set_constant_buffers, /* WINED3D_CS_OP_SET_TEXTURE */ wined3d_cs_exec_set_texture, /* WINED3D_CS_OP_SET_SHADER_RESOURCE_VIEWS */ wined3d_cs_exec_set_shader_resource_views, - /* WINED3D_CS_OP_SET_UNORDERED_ACCESS_VIEW */ wined3d_cs_exec_set_unordered_access_view, + /* WINED3D_CS_OP_SET_UNORDERED_ACCESS_VIEWS */ wined3d_cs_exec_set_unordered_access_views, /* WINED3D_CS_OP_SET_SAMPLERS */ wined3d_cs_exec_set_samplers, /* WINED3D_CS_OP_SET_SHADER */ wined3d_cs_exec_set_shader, /* WINED3D_CS_OP_SET_BLEND_STATE */ wined3d_cs_exec_set_blend_state, diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index e9d4d0a0677..bb37f6aff51 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -1703,13 +1703,8 @@ void CDECL wined3d_device_context_set_state(struct wined3d_device_context *conte } for (i = 0; i < WINED3D_PIPELINE_COUNT; ++i) - { - for (j = 0; j < MAX_UNORDERED_ACCESS_VIEWS; ++j) - { - wined3d_device_context_emit_set_unordered_access_view(context, i, j, - state->unordered_access_view[i][j], ~0); - } - } + wined3d_device_context_emit_set_unordered_access_views(context, i, 0, MAX_UNORDERED_ACCESS_VIEWS, + state->unordered_access_view[i], NULL); wined3d_device_context_push_constants(context, WINED3D_PUSH_CONSTANTS_VS_F, 0, WINED3D_MAX_VS_CONSTS_F, state->vs_consts_f); @@ -2063,31 +2058,37 @@ void CDECL wined3d_device_context_set_samplers(struct wined3d_device_context *co } } -void CDECL wined3d_device_context_set_unordered_access_view(struct wined3d_device_context *context, - enum wined3d_pipeline pipeline, unsigned int idx, struct wined3d_unordered_access_view *uav, - unsigned int initial_count) +void CDECL wined3d_device_context_set_unordered_access_views(struct wined3d_device_context *context, + enum wined3d_pipeline pipeline, unsigned int start_idx, unsigned int count, + struct wined3d_unordered_access_view *const *uavs, const unsigned int *initial_counts) { struct wined3d_state *state = context->state; - struct wined3d_unordered_access_view *prev; + unsigned int i; - TRACE("context %p, pipeline %#x, idx %u, uav %p, initial_count %u.\n", context, pipeline, idx, uav, initial_count); + TRACE("context %p, pipeline %#x, start_idx %u, count %u, uavs %p, initial_counts %p.\n", + context, pipeline, start_idx, count, uavs, initial_counts); - if (idx >= MAX_UNORDERED_ACCESS_VIEWS) + if (!wined3d_bound_range(start_idx, count, MAX_UNORDERED_ACCESS_VIEWS)) { - WARN("Invalid UAV index %u.\n", idx); + WARN("Invalid UAV index %u, count %u.\n", start_idx, count); return; } - prev = state->unordered_access_view[pipeline][idx]; - if (uav == prev && initial_count == ~0u) + if (!memcmp(uavs, &state->unordered_access_view[pipeline][start_idx], count * sizeof(*uavs)) && !initial_counts) return; - if (uav) - wined3d_unordered_access_view_incref(uav); - state->unordered_access_view[pipeline][idx] = uav; - wined3d_device_context_emit_set_unordered_access_view(context, pipeline, idx, uav, initial_count); - if (prev) - wined3d_unordered_access_view_decref(prev); + wined3d_device_context_emit_set_unordered_access_views(context, pipeline, start_idx, count, uavs, initial_counts); + for (i = 0; i < count; ++i) + { + struct wined3d_unordered_access_view *prev = state->unordered_access_view[pipeline][start_idx + i]; + struct wined3d_unordered_access_view *uav = uavs[i]; + + if (uav) + wined3d_unordered_access_view_incref(uav); + state->unordered_access_view[pipeline][start_idx + i] = uav; + if (prev) + wined3d_unordered_access_view_decref(prev); + } } static void wined3d_device_context_unbind_srv_for_rtv(struct wined3d_device_context *context, diff --git a/dlls/wined3d/wined3d.spec b/dlls/wined3d/wined3d.spec index 4d4f34f8d6a..13c8b218b65 100644 --- a/dlls/wined3d/wined3d.spec +++ b/dlls/wined3d/wined3d.spec @@ -138,7 +138,7 @@ @ cdecl wined3d_device_context_set_state(ptr ptr) @ cdecl wined3d_device_context_set_stream_output(ptr long ptr long) @ cdecl wined3d_device_context_set_stream_source(ptr long ptr long long) -@ cdecl wined3d_device_context_set_unordered_access_view(ptr long long ptr long) +@ cdecl wined3d_device_context_set_unordered_access_views(ptr long long long ptr ptr) @ cdecl wined3d_device_context_set_vertex_declaration(ptr ptr) @ cdecl wined3d_device_context_set_viewports(ptr long ptr) @ cdecl wined3d_device_context_unmap(ptr ptr long) diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 91fb681a691..36fdf56a73c 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -4856,9 +4856,9 @@ void wined3d_device_context_emit_set_texture_state(struct wined3d_device_context enum wined3d_texture_stage_state state, unsigned int value) DECLSPEC_HIDDEN; void wined3d_device_context_emit_set_transform(struct wined3d_device_context *context, enum wined3d_transform_state state, const struct wined3d_matrix *matrix) DECLSPEC_HIDDEN; -void wined3d_device_context_emit_set_unordered_access_view(struct wined3d_device_context *context, - enum wined3d_pipeline pipeline, unsigned int view_idx, struct wined3d_unordered_access_view *view, - unsigned int initial_count) DECLSPEC_HIDDEN; +void wined3d_device_context_emit_set_unordered_access_views(struct wined3d_device_context *context, + enum wined3d_pipeline pipeline, unsigned int start_idx, unsigned int count, + struct wined3d_unordered_access_view *const *views, const unsigned int *initial_count) DECLSPEC_HIDDEN; void wined3d_device_context_emit_set_vertex_declaration(struct wined3d_device_context *context, struct wined3d_vertex_declaration *declaration) DECLSPEC_HIDDEN; void wined3d_device_context_emit_set_viewports(struct wined3d_device_context *context, unsigned int viewport_count, diff --git a/include/wine/wined3d.h b/include/wine/wined3d.h index 10253cab87d..21b9e01efd0 100644 --- a/include/wine/wined3d.h +++ b/include/wine/wined3d.h @@ -2527,9 +2527,9 @@ void __cdecl wined3d_device_context_set_stream_output(struct wined3d_device_cont struct wined3d_buffer *buffer, unsigned int offset); HRESULT __cdecl wined3d_device_context_set_stream_source(struct wined3d_device_context *context, unsigned int stream_idx, struct wined3d_buffer *buffer, unsigned int offset, unsigned int stride); -void __cdecl wined3d_device_context_set_unordered_access_view(struct wined3d_device_context *context, - enum wined3d_pipeline pipeline, unsigned int idx, struct wined3d_unordered_access_view *uav, - unsigned int initial_count); +void __cdecl wined3d_device_context_set_unordered_access_views(struct wined3d_device_context *context, + enum wined3d_pipeline pipeline, unsigned int start_idx, unsigned int count, + struct wined3d_unordered_access_view *const *uavs, const unsigned int *initial_counts); void __cdecl wined3d_device_context_set_vertex_declaration(struct wined3d_device_context *context, struct wined3d_vertex_declaration *declaration); void __cdecl wined3d_device_context_set_viewports(struct wined3d_device_context *context, unsigned int viewport_count,