diff --git a/dlls/wined3d/baseshader.c b/dlls/wined3d/baseshader.c
index 6bc7aacd051..fed1144477e 100644
--- a/dlls/wined3d/baseshader.c
+++ b/dlls/wined3d/baseshader.c
@@ -361,6 +361,7 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, struct shader_reg_m
     unsigned int cur_loop_depth = 0, max_loop_depth = 0;
     const DWORD* pToken = byte_code;
     char pshader;
+    unsigned int intconst = 0, boolconst = 0;
 
     /* There are some minor differences between pixel and vertex shaders */
 
@@ -490,13 +491,21 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, struct shader_reg_m
         else if (ins.handler_idx == WINED3DSIH_LOOP
                 || ins.handler_idx == WINED3DSIH_REP)
         {
+            DWORD reg;
+
+            if(ins.handler_idx == WINED3DSIH_LOOP) {
+                reg = pToken[1];
+            } else {
+                reg = pToken[0];
+            }
+
             cur_loop_depth++;
             if(cur_loop_depth > max_loop_depth)
                 max_loop_depth = cur_loop_depth;
             pToken += param_size;
 
             /* Rep and Loop always use an integer constant for the control parameters */
-            This->baseShader.uses_int_consts = TRUE;
+            intconst |= (1 << (reg & WINED3DSP_REGNUM_MASK));
         }
         else if (ins.handler_idx == WINED3DSIH_ENDLOOP
                 || ins.handler_idx == WINED3DSIH_ENDREP)
@@ -628,10 +637,10 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, struct shader_reg_m
                     }
                 }
                 else if(WINED3DSPR_CONSTINT == regtype) {
-                    This->baseShader.uses_int_consts = TRUE;
+                    intconst |= (1 << reg);
                 }
                 else if(WINED3DSPR_CONSTBOOL == regtype) {
-                    This->baseShader.uses_bool_consts = TRUE;
+                    boolconst |= (1 << reg);
                 }
 
                 /* WINED3DSPR_TEXCRDOUT is the same as WINED3DSPR_OUTPUT. _OUTPUT can be > MAX_REG_TEXCRD and is used
@@ -650,6 +659,8 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, struct shader_reg_m
     reg_maps->loop_depth = max_loop_depth;
 
     This->baseShader.functionLength = ((char *)pToken - (char *)byte_code);
+    This->baseShader.num_bool_consts = count_bits(boolconst);
+    This->baseShader.num_int_consts = count_bits(intconst);
 
     return WINED3D_OK;
 }
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c
index 35adc50f836..6aca4da0ff1 100644
--- a/dlls/wined3d/glsl_shader.c
+++ b/dlls/wined3d/glsl_shader.c
@@ -558,13 +558,13 @@ static void shader_glsl_load_constants(
                 prog->vuniformF_locations, &priv->vconst_heap, priv->stack, constant_version);
 
         /* Load DirectX 9 integer constants/uniforms for vertex shader */
-        if(vshader->baseShader.uses_int_consts) {
+        if(vshader->baseShader.num_int_consts) {
             shader_glsl_load_constantsI(vshader, gl_info, prog->vuniformI_locations,
                     stateBlock->vertexShaderConstantI, stateBlock->changed.vertexShaderConstantsI);
         }
 
         /* Load DirectX 9 boolean constants/uniforms for vertex shader */
-        if(vshader->baseShader.uses_bool_consts) {
+        if(vshader->baseShader.num_bool_consts) {
             shader_glsl_load_constantsB(vshader, gl_info, programId,
                     stateBlock->vertexShaderConstantB, stateBlock->changed.vertexShaderConstantsB);
         }
@@ -583,13 +583,13 @@ static void shader_glsl_load_constants(
                 prog->puniformF_locations, &priv->pconst_heap, priv->stack, constant_version);
 
         /* Load DirectX 9 integer constants/uniforms for pixel shader */
-        if(pshader->baseShader.uses_int_consts) {
+        if(pshader->baseShader.num_int_consts) {
             shader_glsl_load_constantsI(pshader, gl_info, prog->puniformI_locations,
                     stateBlock->pixelShaderConstantI, stateBlock->changed.pixelShaderConstantsI);
         }
 
         /* Load DirectX 9 boolean constants/uniforms for pixel shader */
-        if(pshader->baseShader.uses_bool_consts) {
+        if(pshader->baseShader.num_bool_consts) {
             shader_glsl_load_constantsB(pshader, gl_info, programId,
                     stateBlock->pixelShaderConstantB, stateBlock->changed.pixelShaderConstantsB);
         }
@@ -724,21 +724,53 @@ static void shader_generate_glsl_declarations(IWineD3DBaseShader *iface, const s
     /* Declare the constants (aka uniforms) */
     if (This->baseShader.limits.constant_float > 0) {
         unsigned max_constantsF;
+        /* Unless the shader uses indirect addressing, always declare the maximum array size and ignore that we need some
+         * uniforms privately. E.g. if GL supports 256 uniforms, and we need 2 for the pos fixup and immediate values, still
+         * declare VC[256]. If the shader needs more uniforms than we have it won't work in any case. If it uses less, the
+         * compiler will figure out which uniforms are really used and strip them out. This allows a shader to use c255 on
+         * a dx9 card, as long as it doesn't also use all the other constants.
+         *
+         * If the shader uses indirect addressing the compiler must assume that all declared uniforms are used. In this case,
+         * declare only the amount that we're assured to have.
+         *
+         * Thus we run into problems in these two cases:
+         * 1) The shader really uses more uniforms than supported
+         * 2) The shader uses indirect addressing, less constants than supported, but uses a constant index > #supported consts
+         */
         if(pshader) {
-            max_constantsF = GL_LIMITS(pshader_constantsF) - (MAX_CONST_B / 4) - MAX_CONST_I - 2;
-            max_constantsF = min(This->baseShader.limits.constant_float, max_constantsF);
+            /* No indirect addressing here */
+            max_constantsF = GL_LIMITS(pshader_constantsF);
         } else {
-            /* Subtract the other potential uniforms from the max available (bools, ints, and 1 row of projection matrix) */
-            max_constantsF = GL_LIMITS(vshader_constantsF) - (MAX_CONST_B / 4) - MAX_CONST_I - 1;
-            max_constantsF = min(This->baseShader.limits.constant_float, max_constantsF);
+            if(This->baseShader.reg_maps.usesrelconstF) {
+                /* Subtract the other potential uniforms from the max available (bools, ints, and 1 row of projection matrix).
+                 * Subtract another uniform for immediate values, which have to be loaded via uniform by the driver as well.
+                 * The shader code only uses 0.5, 2.0, 1.0, 128 and -128 in vertex shader code, so one vec4 should be enough
+                 * (Unfortunately the Nvidia driver doesn't store 128 and -128 in one float
+                 */
+                max_constantsF = GL_LIMITS(vshader_constantsF) - 3;
+                max_constantsF -= This->baseShader.num_int_consts;
+                /* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly,
+                 * so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but
+                 * for now take this into account when calculating the number of available constants
+                 */
+                max_constantsF -= This->baseShader.num_bool_consts;
+                /* Set by driver quirks in directx.c */
+                max_constantsF -= GLINFO_LOCATION.reserved_glsl_constants;
+            } else {
+                max_constantsF = GL_LIMITS(vshader_constantsF);
+            }
         }
+        max_constantsF = min(This->baseShader.limits.constant_float, max_constantsF);
         shader_addline(buffer, "uniform vec4 %cC[%u];\n", prefix, max_constantsF);
     }
 
-    if (This->baseShader.limits.constant_int > 0)
+    /* Always declare the full set of constants, the compiler can remove the unused ones because d3d doesn't(yet)
+     * support indirect int and bool constant addressing. This avoids problems if the app uses e.g. i0 and i9.
+     */
+    if (This->baseShader.limits.constant_int > 0 && This->baseShader.num_int_consts)
         shader_addline(buffer, "uniform ivec4 %cI[%u];\n", prefix, This->baseShader.limits.constant_int);
 
-    if (This->baseShader.limits.constant_bool > 0)
+    if (This->baseShader.limits.constant_bool > 0 && This->baseShader.num_bool_consts)
         shader_addline(buffer, "uniform bool %cB[%u];\n", prefix, This->baseShader.limits.constant_bool);
 
     if(!pshader) {
@@ -4117,8 +4149,7 @@ static void shader_glsl_get_caps(WINED3DDEVTYPE devtype, const WineD3D_GL_Info *
     else
         pCaps->VertexShaderVersion = WINED3DVS_VERSION(3,0);
     TRACE_(d3d_caps)("Hardware vertex shader version %d.%d enabled (GLSL)\n", (pCaps->VertexShaderVersion >> 8) & 0xff, pCaps->VertexShaderVersion & 0xff);
-    /* Subtract the other potential uniforms from the max available (bools, ints, and 1 row of projection matrix) */
-    pCaps->MaxVertexShaderConst = GL_LIMITS(vshader_constantsF) - (MAX_CONST_B / 4) - MAX_CONST_I - 1;
+    pCaps->MaxVertexShaderConst = GL_LIMITS(vshader_constantsF);
 
     /* Older DX9-class videocards (GeforceFX / Radeon >9500/X*00) only support pixel shader 2.0/2.0a/2.0b.
      * In OpenGL the extensions related to GLSL abstract lowlevel GL info away which is needed
@@ -4136,12 +4167,7 @@ static void shader_glsl_get_caps(WINED3DDEVTYPE devtype, const WineD3D_GL_Info *
     else
         pCaps->PixelShaderVersion = WINED3DPS_VERSION(3,0);
 
-    /* Subtract the other potential uniforms from the max available (bools & ints), and 2 states for fog.
-     * In theory the texbem instruction may need one more shader constant too. But lets assume
-     * that a sm <= 1.3 shader does not need all the uniforms provided by a glsl-capable card,
-     * and lets not take away a uniform needlessly from all other shaders.
-     */
-    pCaps->MaxPixelShaderConst = GL_LIMITS(pshader_constantsF) - (MAX_CONST_B / 4) - MAX_CONST_I - 2;
+    pCaps->MaxPixelShaderConst = GL_LIMITS(pshader_constantsF);
 
     /* FIXME: The following line is card dependent. -8.0 to 8.0 is the
      * Direct3D minimum requirement.
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 65640b343a6..3a0eaab2b9f 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -2244,6 +2244,7 @@ BOOL getDepthStencilBits(const struct GlPixelFormatDesc *format_desc, short *dep
 /* Math utils */
 void multiply_matrix(WINED3DMATRIX *dest, const WINED3DMATRIX *src1, const WINED3DMATRIX *src2);
 UINT wined3d_log2i(UINT32 x);
+unsigned int count_bits(unsigned int mask);
 
 typedef struct local_constant {
     struct list entry;
@@ -2318,7 +2319,7 @@ typedef struct IWineD3DBaseShaderClass
     UINT                            functionLength;
     UINT                            cur_loop_depth, cur_loop_regno;
     BOOL                            load_local_constsF;
-    BOOL                            uses_bool_consts, uses_int_consts;
+    BOOL                            num_bool_consts, num_int_consts;
 
     /* Type of shader backend */
     int shader_mode;