wined3d: Enable constant packing for NP2 texcoord fixup.

Previously every texture that was flagged for NP2 fixup used a vec2
uniform in the shader to store texture dimensions.  Turns out that the
GLSL compilers just maps vec2 to vec4, so essentially wasting 2
floats. The new code only uses vec4 uniforms but packs dimensions info
of 2 textures into a single uniform.
This commit is contained in:
Tobias Jakobi 2009-06-17 23:26:38 +02:00 committed by Alexandre Julliard
parent 7906774339
commit ef7f769be6
1 changed files with 68 additions and 36 deletions

View File

@ -103,7 +103,7 @@ struct glsl_shader_prog_link {
GLint vuniformI_locations[MAX_CONST_I];
GLint puniformI_locations[MAX_CONST_I];
GLint posFixup_location;
GLint np2Fixup_location[MAX_FRAGMENT_SAMPLERS];
GLint np2Fixup_location;
GLint bumpenvmat_location[MAX_TEXTURES];
GLint luminancescale_location[MAX_TEXTURES];
GLint luminanceoffset_location[MAX_TEXTURES];
@ -572,24 +572,31 @@ static void shader_glsl_load_np2fixup_constants(
return;
}
if (prog->ps_args.np2_fixup) {
UINT i;
UINT fixup = prog->ps_args.np2_fixup;
if (prog->ps_args.np2_fixup && -1 != prog->np2Fixup_location) {
const WineD3D_GL_Info* gl_info = &deviceImpl->adapter->gl_info;
const IWineD3DStateBlockImpl* stateBlock = (const IWineD3DStateBlockImpl*) deviceImpl->stateBlock;
UINT i;
UINT fixup = prog->ps_args.np2_fixup;
GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS];
for (i = 0; fixup; fixup >>= 1, ++i) {
if (-1 != prog->np2Fixup_location[i]) {
const IWineD3DBaseTextureImpl* const tex = (const IWineD3DBaseTextureImpl*) stateBlock->textures[i];
if (!tex) {
FIXME("Nonexistent texture is flagged for NP2 texcoord fixup\n");
continue;
} else {
const float tex_dim[2] = {tex->baseTexture.pow2Matrix[0], tex->baseTexture.pow2Matrix[5]};
GL_EXTCALL(glUniform2fvARB(prog->np2Fixup_location[i], 1, tex_dim));
}
const unsigned char idx = prog->np2Fixup_info->idx[i];
const IWineD3DBaseTextureImpl* const tex = (const IWineD3DBaseTextureImpl*) stateBlock->textures[i];
GLfloat* tex_dim = &np2fixup_constants[(idx >> 1) * 4];
if (!tex) {
FIXME("Nonexistent texture is flagged for NP2 texcoord fixup\n");
continue;
}
if (idx % 2) {
tex_dim[2] = tex->baseTexture.pow2Matrix[0]; tex_dim[3] = tex->baseTexture.pow2Matrix[5];
} else {
tex_dim[0] = tex->baseTexture.pow2Matrix[0]; tex_dim[1] = tex->baseTexture.pow2Matrix[5];
}
}
GL_EXTCALL(glUniform4fvARB(prog->np2Fixup_location, prog->np2Fixup_info->num_consts, np2fixup_constants));
}
}
@ -776,10 +783,11 @@ static int vec4_varyings(DWORD shader_major, const WineD3D_GL_Info *gl_info)
/** Generate the variable & register declarations for the GLSL output target */
static void shader_generate_glsl_declarations(IWineD3DBaseShader *iface, const shader_reg_maps *reg_maps,
SHADER_BUFFER *buffer, const WineD3D_GL_Info *gl_info,
const struct ps_compile_args *ps_args)
struct shader_glsl_ctx_priv *ctx_priv)
{
IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface;
IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) This->baseShader.device;
const struct ps_compile_args *ps_args = ctx_priv->cur_ps_args;
unsigned int i, extra_constants_needed = 0;
const local_constant *lconst;
@ -922,15 +930,6 @@ static void shader_generate_glsl_declarations(IWineD3DBaseShader *iface, const s
} else {
shader_addline(buffer, "uniform sampler2D %csampler%u;\n", prefix, i);
}
if (pshader && ps_args->np2_fixup & (1 << i))
{
/* NP2/RECT textures in OpenGL use texcoords in the range [0,width]x[0,height]
* while D3D has them in the (normalized) [0,1]x[0,1] range.
* samplerNP2Fixup stores texture dimensions and is updated through
* shader_glsl_load_np2fixup_constants when the sampler changes. */
shader_addline(buffer, "uniform vec2 %csamplerNP2Fixup%u;\n", prefix, i);
}
break;
case WINED3DSTT_CUBE:
shader_addline(buffer, "uniform samplerCube %csampler%u;\n", prefix, i);
@ -945,7 +944,38 @@ static void shader_generate_glsl_declarations(IWineD3DBaseShader *iface, const s
}
}
}
/* Declare uniforms for NP2 texcoord fixup:
* This is NOT done inside the loop that declares the texture samplers since the NP2 fixup code
* is currently only used for the GeforceFX series and when forcing the ARB_npot extension off.
* Modern cards just skip the code anyway, so put it inside a seperate loop. */
if (pshader && ps_args->np2_fixup) {
struct ps_np2fixup_info* const fixup = ctx_priv->cur_np2fixup_info;
UINT cur = 0;
/* NP2/RECT textures in OpenGL use texcoords in the range [0,width]x[0,height]
* while D3D has them in the (normalized) [0,1]x[0,1] range.
* samplerNP2Fixup stores texture dimensions and is updated through
* shader_glsl_load_np2fixup_constants when the sampler changes. */
for (i = 0; i < This->baseShader.limits.sampler; ++i) {
if (reg_maps->sampler_type[i]) {
if (!(ps_args->np2_fixup & (1 << i))) continue;
if (WINED3DSTT_2D != reg_maps->sampler_type[i]) {
FIXME("Non-2D texture is flagged for NP2 texcoord fixup.\n");
continue;
}
fixup->idx[i] = cur++;
}
}
fixup->num_consts = (cur + 1) >> 1;
shader_addline(buffer, "uniform vec4 %csamplerNP2Fixup[%u];\n", prefix, fixup->num_consts);
}
/* Declare address variables */
for (i = 0; i < This->baseShader.limits.address; i++) {
if (reg_maps->address[i])
@ -1679,7 +1709,7 @@ static void PRINTF_ATTR(8, 9) shader_glsl_gen_sample_code(const struct wined3d_s
if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type))
{
struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
fixup = priv->cur_ps_args->color_fixup[sampler];
sampler_base = "Psampler";
@ -1707,7 +1737,11 @@ static void PRINTF_ATTR(8, 9) shader_glsl_gen_sample_code(const struct wined3d_s
shader_addline(ins->ctx->buffer, ", %s)%s);\n", bias, dst_swizzle);
} else {
if (np2_fixup) {
shader_addline(ins->ctx->buffer, " * PsamplerNP2Fixup%u)%s);\n", sampler, dst_swizzle);
const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
const unsigned char idx = priv->cur_np2fixup_info->idx[sampler];
shader_addline(ins->ctx->buffer, " * PsamplerNP2Fixup[%u].%s)%s);\n", idx >> 1,
(idx % 2) ? "zw" : "xy", dst_swizzle);
} else if(dx && dy) {
shader_addline(ins->ctx->buffer, ", %s, %s)%s);\n", dx, dy, dst_swizzle);
} else {
@ -3658,7 +3692,7 @@ static GLuint shader_glsl_generate_pshader(IWineD3DPixelShaderImpl *This,
}
/* Base Declarations */
shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, buffer, &GLINFO_LOCATION, args);
shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, buffer, &GLINFO_LOCATION, &priv_ctx);
/* Pack 3.0 inputs */
if (reg_maps->shader_version.major >= 3 && args->vp_mode != vertexshader)
@ -3747,7 +3781,7 @@ static GLuint shader_glsl_generate_vshader(IWineD3DVertexShaderImpl *This,
priv_ctx.cur_vs_args = args;
/* Base Declarations */
shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, buffer, &GLINFO_LOCATION, NULL);
shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, buffer, &GLINFO_LOCATION, &priv_ctx);
/* Base Shader Body */
shader_generate_main((IWineD3DBaseShader*)This, buffer, reg_maps, function, &priv_ctx);
@ -4053,7 +4087,6 @@ static void set_glsl_shader_program(IWineD3DDevice *iface, BOOL use_ps, BOOL use
if(pshader) {
char name[32];
WORD map;
for(i = 0; i < MAX_TEXTURES; i++) {
sprintf(name, "bumpenvmat%u", i);
@ -4064,13 +4097,12 @@ static void set_glsl_shader_program(IWineD3DDevice *iface, BOOL use_ps, BOOL use
entry->luminanceoffset_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
}
map = ps_compile_args.np2_fixup;
for (i = 0; map; map >>= 1, ++i)
{
if (!(map & 1)) continue;
sprintf(name, "PsamplerNP2Fixup%u", i);
entry->np2Fixup_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
if (ps_compile_args.np2_fixup) {
if (entry->np2Fixup_info) {
entry->np2Fixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "PsamplerNP2Fixup"));
} else {
FIXME("NP2 texcoord fixup needed for this pixelshader, but no fixup uniform found.");
}
}
}