wined3d: Optimize scanning changed shader constants in wined3d_device_apply_stateblock().

Signed-off-by: Matteo Bruni <mbruni@codeweavers.com>
Signed-off-by: Henri Verbeet <hverbeet@codeweavers.com>
Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
Matteo Bruni 2020-02-27 13:31:11 +01:00 committed by Alexandre Julliard
parent 8d87bce071
commit e32b8e8137
2 changed files with 107 additions and 84 deletions

View File

@ -3835,13 +3835,14 @@ struct wined3d_texture * CDECL wined3d_device_get_texture(const struct wined3d_d
void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device,
struct wined3d_stateblock *stateblock)
{
const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info;
const struct wined3d_stateblock_state *state = &stateblock->stateblock_state;
const struct wined3d_saved_states *changed = &stateblock->changed;
struct wined3d_blend_state *blend_state;
struct wined3d_color colour;
unsigned int i, j, count;
struct wined3d_range range;
unsigned int i, j, start;
BOOL set_blend_state;
DWORD map;
TRACE("device %p, stateblock %p.\n", device, stateblock);
@ -3850,89 +3851,57 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device,
if (changed->pixelShader)
wined3d_device_set_pixel_shader(device, state->ps);
count = 0;
for (i = 0; i < d3d_info->limits.vs_uniform_count; ++i)
for (start = 0; ; start = range.offset + range.size)
{
if (wined3d_bitmap_is_set(changed->vs_consts_f, i))
++count;
else if (count)
{
wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count);
count = 0;
}
}
if (count)
wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count);
if (!wined3d_bitmap_get_range(changed->vs_consts_f, WINED3D_MAX_VS_CONSTS_F, start, &range))
break;
count = 0;
for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i)
{
if (changed->vertexShaderConstantsB & (1u << i))
++count;
else if (count)
{
wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count);
count = 0;
}
wined3d_device_set_vs_consts_f(device, range.offset, range.size, &state->vs_consts_f[range.offset]);
}
if (count)
wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count);
count = 0;
for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
map = changed->vertexShaderConstantsI;
for (start = 0; ; start = range.offset + range.size)
{
if (changed->vertexShaderConstantsI & (1u << i))
++count;
else if (count)
{
wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count);
count = 0;
}
}
if (count)
wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count);
if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_I, start, &range))
break;
count = 0;
for (i = 0; i < d3d_info->limits.ps_uniform_count; ++i)
{
if (wined3d_bitmap_is_set(changed->ps_consts_f, i))
++count;
else if (count)
{
wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count);
count = 0;
}
wined3d_device_set_vs_consts_i(device, range.offset, range.size, &state->vs_consts_i[range.offset]);
}
if (count)
wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count);
count = 0;
for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i)
map = changed->vertexShaderConstantsB;
for (start = 0; ; start = range.offset + range.size)
{
if (changed->pixelShaderConstantsB & (1u << i))
++count;
else if (count)
{
wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count);
count = 0;
}
}
if (count)
wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count);
if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_B, start, &range))
break;
count = 0;
for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
{
if (changed->pixelShaderConstantsI & (1u << i))
++count;
else if (count)
{
wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count);
count = 0;
}
wined3d_device_set_vs_consts_b(device, range.offset, range.size, &state->vs_consts_b[range.offset]);
}
for (start = 0; ; start = range.offset + range.size)
{
if (!wined3d_bitmap_get_range(changed->ps_consts_f, WINED3D_MAX_PS_CONSTS_F, start, &range))
break;
wined3d_device_set_ps_consts_f(device, range.offset, range.size, &state->ps_consts_f[range.offset]);
}
map = changed->pixelShaderConstantsI;
for (start = 0; ; start = range.offset + range.size)
{
if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_I, start, &range))
break;
wined3d_device_set_ps_consts_i(device, range.offset, range.size, &state->ps_consts_i[range.offset]);
}
map = changed->pixelShaderConstantsB;
for (start = 0; ; start = range.offset + range.size)
{
if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_B, start, &range))
break;
wined3d_device_set_ps_consts_b(device, range.offset, range.size, &state->ps_consts_b[range.offset]);
}
if (count)
wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count);
for (i = 0; i < ARRAY_SIZE(state->light_state->light_map); ++i)
{

View File

@ -3196,13 +3196,13 @@ struct wined3d_state
struct wined3d_shader_resource_view *shader_resource_view[WINED3D_SHADER_TYPE_COUNT][MAX_SHADER_RESOURCE_VIEWS];
struct wined3d_unordered_access_view *unordered_access_view[WINED3D_PIPELINE_COUNT][MAX_UNORDERED_ACCESS_VIEWS];
BOOL vs_consts_b[WINED3D_MAX_CONSTS_B];
struct wined3d_ivec4 vs_consts_i[WINED3D_MAX_CONSTS_I];
struct wined3d_vec4 vs_consts_f[WINED3D_MAX_VS_CONSTS_F];
struct wined3d_ivec4 vs_consts_i[WINED3D_MAX_CONSTS_I];
BOOL vs_consts_b[WINED3D_MAX_CONSTS_B];
BOOL ps_consts_b[WINED3D_MAX_CONSTS_B];
struct wined3d_ivec4 ps_consts_i[WINED3D_MAX_CONSTS_I];
struct wined3d_vec4 ps_consts_f[WINED3D_MAX_PS_CONSTS_F];
struct wined3d_ivec4 ps_consts_i[WINED3D_MAX_CONSTS_I];
BOOL ps_consts_b[WINED3D_MAX_CONSTS_B];
struct wined3d_texture *textures[WINED3D_MAX_COMBINED_SAMPLERS];
DWORD sampler_states[WINED3D_MAX_COMBINED_SAMPLERS][WINED3D_HIGHEST_SAMPLER_STATE + 1];
@ -3914,6 +3914,12 @@ struct wined3d_vertex_declaration
struct wined3d_saved_states
{
DWORD vs_consts_f[WINED3D_MAX_VS_CONSTS_F >> 5];
WORD vertexShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */
WORD vertexShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */
DWORD ps_consts_f[WINED3D_MAX_PS_CONSTS_F >> 5];
WORD pixelShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */
WORD pixelShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */
DWORD transform[(WINED3D_HIGHEST_TRANSFORM_STATE >> 5) + 1];
WORD streamSource; /* WINED3D_MAX_STREAMS, 16 */
WORD streamFreq; /* WINED3D_MAX_STREAMS, 16 */
@ -3921,12 +3927,6 @@ struct wined3d_saved_states
DWORD textureState[WINED3D_MAX_TEXTURES]; /* WINED3D_HIGHEST_TEXTURE_STATE + 1, 18 */
WORD samplerState[WINED3D_MAX_COMBINED_SAMPLERS]; /* WINED3D_HIGHEST_SAMPLER_STATE + 1, 14 */
DWORD clipplane; /* WINED3D_MAX_USER_CLIP_PLANES, 32 */
WORD pixelShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */
WORD pixelShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */
DWORD ps_consts_f[WINED3D_MAX_PS_CONSTS_F >> 5];
WORD vertexShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */
WORD vertexShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */
DWORD vs_consts_f[WINED3D_MAX_VS_CONSTS_F >> 5];
DWORD textures : 20; /* WINED3D_MAX_COMBINED_SAMPLERS, 20 */
DWORD indices : 1;
DWORD material : 1;
@ -5325,6 +5325,60 @@ static inline BOOL wined3d_bitmap_is_set(const uint32_t *map, unsigned int idx)
return map[idx >> 5] & (1u << (idx & 0x1f));
}
static inline unsigned int wined3d_bitmap_ffs_xor(const uint32_t *bitmap, unsigned int bit_count,
unsigned int start, uint32_t xor_mask)
{
const unsigned int word_bit_count = sizeof(*bitmap) * CHAR_BIT;
const uint32_t *ptr, *end_ptr;
uint32_t map, mask;
assert(bit_count < word_bit_count || !(bit_count % word_bit_count));
ptr = bitmap + start / word_bit_count;
end_ptr = bitmap + (bit_count + word_bit_count - 1) / word_bit_count;
if (ptr >= end_ptr)
return ~0u;
mask = ~0u << start % word_bit_count;
map = (*ptr ^ xor_mask) & mask;
while (!map)
{
if (++ptr == end_ptr)
return ~0u;
map = *ptr ^ xor_mask;
}
return (ptr - bitmap) * word_bit_count + wined3d_bit_scan(&map);
}
static inline unsigned int wined3d_bitmap_ffs(const uint32_t *bitmap, unsigned int bit_count, unsigned int start)
{
return wined3d_bitmap_ffs_xor(bitmap, bit_count, start, 0);
}
static inline unsigned int wined3d_bitmap_ffz(const uint32_t *bitmap, unsigned int bit_count, unsigned int start)
{
return wined3d_bitmap_ffs_xor(bitmap, bit_count, start, ~0u);
}
static inline BOOL wined3d_bitmap_get_range(const DWORD *bitmap, unsigned int bit_count,
unsigned int start, struct wined3d_range *range)
{
unsigned int range_start, range_end;
range_start = wined3d_bitmap_ffs(bitmap, bit_count, start);
if (range_start == ~0u)
return FALSE;
range_end = wined3d_bitmap_ffz(bitmap, bit_count, range_start + 1);
if (range_end == ~0u)
range_end = bit_count;
range->offset = range_start;
range->size = range_end - range_start;
return TRUE;
}
/* The WNDCLASS-Name for the fake window which we use to retrieve the GL capabilities */
#define WINED3D_OPENGL_WINDOW_CLASS_NAME "WineD3D_OpenGL"