/*
 * Copyright 2016 Paul Gofman
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

#include "config.h"
#include "wine/port.h"

#include "d3dx9_private.h"

#include <float.h>
#include <assert.h>

WINE_DEFAULT_DEBUG_CHANNEL(d3dx);

enum pres_ops
{
    PRESHADER_OP_NOP,
    PRESHADER_OP_MOV,
    PRESHADER_OP_NEG,
    PRESHADER_OP_RCP,
    PRESHADER_OP_FRC,
    PRESHADER_OP_EXP,
    PRESHADER_OP_LOG,
    PRESHADER_OP_RSQ,
    PRESHADER_OP_SIN,
    PRESHADER_OP_COS,
    PRESHADER_OP_ASIN,
    PRESHADER_OP_ACOS,
    PRESHADER_OP_ATAN,
    PRESHADER_OP_MIN,
    PRESHADER_OP_MAX,
    PRESHADER_OP_LT,
    PRESHADER_OP_GE,
    PRESHADER_OP_ADD,
    PRESHADER_OP_MUL,
    PRESHADER_OP_ATAN2,
    PRESHADER_OP_DIV,
    PRESHADER_OP_CMP,
    PRESHADER_OP_DOT,
    PRESHADER_OP_DOTSWIZ6,
    PRESHADER_OP_DOTSWIZ8,
};

typedef double (*pres_op_func)(double *args, int n);

static double to_signed_nan(double v)
{
    static const union
    {
        ULONG64 ulong64_value;
        double double_value;
    }
    signed_nan =
    {
        0xfff8000000000000
    };

    return isnan(v) ? signed_nan.double_value : v;
}

static double pres_mov(double *args, int n) {return args[0];}
static double pres_add(double *args, int n) {return args[0] + args[1];}
static double pres_mul(double *args, int n) {return args[0] * args[1];}
static double pres_dot(double *args, int n)
{
    int i;
    double sum;

    sum = 0.0;
    for (i = 0; i < n; ++i)
        sum += args[i] * args[i + n];
    return sum;
}

static double pres_dotswiz6(double *args, int n)
{
    return pres_dot(args, 3);
}

static double pres_dotswiz8(double *args, int n)
{
    return pres_dot(args, 4);
}

static double pres_neg(double *args, int n) {return -args[0];}
static double pres_rcp(double *args, int n) {return 1.0 / args[0];}
static double pres_lt(double *args, int n)  {return args[0] < args[1] ? 1.0 : 0.0;}
static double pres_ge(double *args, int n)  {return args[0] >= args[1] ? 1.0 : 0.0;}
static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);}
static double pres_min(double *args, int n) {return fmin(args[0], args[1]);}
static double pres_max(double *args, int n) {return fmax(args[0], args[1]);}
static double pres_cmp(double *args, int n) {return args[0] >= 0.0 ? args[1] : args[2];}
static double pres_sin(double *args, int n) {return sin(args[0]);}
static double pres_cos(double *args, int n) {return cos(args[0]);}
static double pres_rsq(double *args, int n)
{
    double v;

    v = fabs(args[0]);
    if (v == 0.0)
        return INFINITY;
    else
        return 1.0 / sqrt(v);
}
static double pres_exp(double *args, int n) {return pow(2.0, args[0]);}
static double pres_log(double *args, int n)
{
    double v;

    v = fabs(args[0]);
    if (v == 0.0)
        return 0.0;
    else
#ifdef HAVE_LOG2
        return log2(v);
#else
        return log(v) / log(2);
#endif
}
static double pres_asin(double *args, int n) {return to_signed_nan(asin(args[0]));}
static double pres_acos(double *args, int n) {return to_signed_nan(acos(args[0]));}
static double pres_atan(double *args, int n) {return atan(args[0]);}
static double pres_atan2(double *args, int n) {return atan2(args[0], args[1]);}

/* According to the test results 'div' operation always returns 0. Compiler does not seem to ever
 * generate it, using rcp + mul instead, so probably it is not implemented in native d3dx. */
static double pres_div(double *args, int n) {return 0.0;}

#define PRES_OPCODE_MASK 0x7ff00000
#define PRES_OPCODE_SHIFT 20
#define PRES_SCALAR_FLAG 0x80000000
#define PRES_NCOMP_MASK  0x0000ffff

#define FOURCC_PRES 0x53455250
#define FOURCC_CLIT 0x54494c43
#define FOURCC_FXLC 0x434c5846
#define FOURCC_PRSI 0x49535250
#define PRES_SIGN 0x46580000

struct op_info
{
    unsigned int opcode;
    char mnem[16];
    unsigned int input_count;
    BOOL func_all_comps;
    pres_op_func func;
};

static const struct op_info pres_op_info[] =
{
    {0x000, "nop", 0, 0, NULL    }, /* PRESHADER_OP_NOP */
    {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */
    {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */
    {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */
    {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */
    {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */
    {0x106, "log", 1, 0, pres_log}, /* PRESHADER_OP_LOG */
    {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */
    {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */
    {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */
    {0x10a, "asin", 1, 0, pres_asin}, /* PRESHADER_OP_ASIN */
    {0x10b, "acos", 1, 0, pres_acos}, /* PRESHADER_OP_ACOS */
    {0x10c, "atan", 1, 0, pres_atan}, /* PRESHADER_OP_ATAN */
    {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */
    {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */
    {0x202, "lt",  2, 0, pres_lt }, /* PRESHADER_OP_LT  */
    {0x203, "ge",  2, 0, pres_ge }, /* PRESHADER_OP_GE  */
    {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */
    {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */
    {0x206, "atan2", 2, 0, pres_atan2}, /* PRESHADER_OP_ATAN2 */
    {0x208, "div", 2, 0, pres_div}, /* PRESHADER_OP_DIV */
    {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */
    {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */
    {0x70e, "d3ds_dotswiz", 6, 0, pres_dotswiz6}, /* PRESHADER_OP_DOTSWIZ6 */
    {0x70e, "d3ds_dotswiz", 8, 0, pres_dotswiz8}, /* PRESHADER_OP_DOTSWIZ8 */
};

enum pres_value_type
{
    PRES_VT_FLOAT,
    PRES_VT_DOUBLE,
    PRES_VT_INT,
    PRES_VT_BOOL,
    PRES_VT_COUNT
};

static const struct
{
    unsigned int component_size;
    enum pres_value_type type;
}
table_info[] =
{
    {sizeof(double), PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */
    {sizeof(float),  PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */
    {sizeof(float),  PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */
    {sizeof(BOOL),   PRES_VT_BOOL  }, /* PRES_REGTAB_OBCONST */
    {sizeof(int),    PRES_VT_INT,  }, /* PRES_REGTAB_OICONST */
    /* TODO: use double precision for 64 bit */
    {sizeof(float),  PRES_VT_FLOAT }  /* PRES_REGTAB_TEMP */
};

static const char *table_symbol[] =
{
    "imm", "c", "oc", "ob", "oi", "r", "(null)",
};

static const enum pres_reg_tables pres_regset2table[] =
{
    PRES_REGTAB_OBCONST,  /* D3DXRS_BOOL */
    PRES_REGTAB_OICONST,  /* D3DXRS_INT4 */
    PRES_REGTAB_CONST,    /* D3DXRS_FLOAT4 */
    PRES_REGTAB_COUNT,     /* D3DXRS_SAMPLER */
};

static const enum pres_reg_tables shad_regset2table[] =
{
    PRES_REGTAB_OBCONST,  /* D3DXRS_BOOL */
    PRES_REGTAB_OICONST,  /* D3DXRS_INT4 */
    PRES_REGTAB_OCONST,   /* D3DXRS_FLOAT4 */
    PRES_REGTAB_COUNT,     /* D3DXRS_SAMPLER */
};

struct d3dx_pres_reg
{
    enum pres_reg_tables table;
    /* offset is component index, not register index, e. g.
       offset for component c3.y is 13 (3 * 4 + 1) */
    unsigned int offset;
};

struct d3dx_pres_operand
{
    struct d3dx_pres_reg reg;
    struct d3dx_pres_reg index_reg;
};

#define MAX_INPUTS_COUNT 8

struct d3dx_pres_ins
{
    enum pres_ops op;
    /* first input argument is scalar,
       scalar component is propagated */
    BOOL scalar_op;
    unsigned int component_count;
    struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT];
    struct d3dx_pres_operand output;
};

struct const_upload_info
{
    BOOL transpose;
    unsigned int major, minor;
    unsigned int major_stride;
    unsigned int major_count;
    unsigned int count;
    unsigned int minor_remainder;
};

static enum pres_value_type table_type_from_param_type(D3DXPARAMETER_TYPE type)
{
    switch (type)
    {
        case D3DXPT_FLOAT:
            return PRES_VT_FLOAT;
        case D3DXPT_INT:
            return PRES_VT_INT;
        case D3DXPT_BOOL:
            return PRES_VT_BOOL;
        default:
            FIXME("Unsupported type %u.\n", type);
            return PRES_VT_COUNT;
    }
}

static unsigned int get_reg_offset(unsigned int table, unsigned int offset)
{
    return table == PRES_REGTAB_OBCONST ? offset : offset >> 2;
}

static unsigned int get_offset_reg(unsigned int table, unsigned int reg_idx)
{
    return table == PRES_REGTAB_OBCONST ? reg_idx : reg_idx << 2;
}

static unsigned int get_reg_components(unsigned int table)
{
    return get_offset_reg(table, 1);
}

#define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8)

static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table)
{
    unsigned int size;

    size = get_offset_reg(table, rs->table_sizes[table]) * table_info[table].component_size;
    if (size)
    {
        rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
        if (!rs->tables[table])
            return E_OUTOFMEMORY;
    }
    return D3D_OK;
}

static void regstore_free_tables(struct d3dx_regstore *rs)
{
    unsigned int i;

    for (i = 0; i < PRES_REGTAB_COUNT; ++i)
    {
        HeapFree(GetProcessHeap(), 0, rs->tables[i]);
    }
}

static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, const void *data,
        unsigned int start_offset, unsigned int count)
{
    BYTE *dst = rs->tables[table];
    const BYTE *src = data;
    unsigned int size;

    dst += start_offset * table_info[table].component_size;
    size = count * table_info[table].component_size;
    assert((src < dst && size <= dst - src) || (src > dst && size <= src - dst));
    memcpy(dst, src, size);
}

static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset)
{
    BYTE *p;

    p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
    switch (table_info[table].type)
    {
        case PRES_VT_FLOAT:
            return *(float *)p;
        case PRES_VT_DOUBLE:
            return *(double *)p;
        default:
            FIXME("Unexpected preshader input from table %u.\n", table);
            return NAN;
    }
}

static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v)
{
    BYTE *p;

    p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
    switch (table_info[table].type)
    {
        case PRES_VT_FLOAT : *(float *)p = v; break;
        case PRES_VT_DOUBLE: *(double *)p = v; break;
        case PRES_VT_INT   : *(int *)p = lrint(v); break;
        case PRES_VT_BOOL  : *(BOOL *)p = !!v; break;
        default:
            FIXME("Bad type %u.\n", table_info[table].type);
            break;
    }
}

static void dump_bytecode(void *data, unsigned int size)
{
    unsigned int *bytecode = (unsigned int *)data;
    unsigned int i, j, n;

    size /= sizeof(*bytecode);
    i = 0;
    while (i < size)
    {
        n = min(size - i, 8);
        for (j = 0; j < n; ++j)
            TRACE("0x%08x,", bytecode[i + j]);
        i += n;
        TRACE("\n");
    }
}

static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count,
        unsigned int fourcc, unsigned int *size)
{
    /* Provide at least one value in comment section on non-NULL return. */
    while (count > 2 && (*ptr & 0xffff) == 0xfffe)
    {
        unsigned int section_size;

        section_size = (*ptr >> 16);
        if (!section_size || section_size + 1 > count)
            break;
        if (*(ptr + 1) == fourcc)
        {
            *size = section_size;
            return ptr + 2;
        }
        count -= section_size + 1;
        ptr += section_size + 1;
    }
    return NULL;
}

static unsigned int *parse_pres_reg(unsigned int *ptr, struct d3dx_pres_reg *reg)
{
    static const enum pres_reg_tables reg_table[8] =
    {
        PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT,
        PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP
    };

    if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT)
    {
        FIXME("Unsupported register table %#x.\n", *ptr);
        return NULL;
    }

    reg->table = reg_table[*ptr++];
    reg->offset = *ptr++;
    return ptr;
}

static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr)
{
    if (count < 3 || (*ptr && count < 5))
    {
        WARN("Byte code buffer ends unexpectedly, count %u.\n", count);
        return NULL;
    }

    if (*ptr)
    {
        if (*ptr != 1)
        {
            FIXME("Unknown relative addressing flag, word %#x.\n", *ptr);
            return NULL;
        }
        ptr = parse_pres_reg(ptr + 1, &opr->index_reg);
        if (!ptr)
            return NULL;
    }
    else
    {
        opr->index_reg.table = PRES_REGTAB_COUNT;
        ++ptr;
    }

    ptr = parse_pres_reg(ptr, &opr->reg);

    if (opr->reg.table == PRES_REGTAB_OBCONST)
        opr->reg.offset /= 4;
    return ptr;
}

static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins)
{
    unsigned int ins_code, ins_raw;
    unsigned int input_count;
    unsigned int i;

    if (count < 2)
    {
        WARN("Byte code buffer ends unexpectedly.\n");
        return NULL;
    }

    ins_raw = *ptr++;
    ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT;
    ins->component_count = ins_raw & PRES_NCOMP_MASK;
    ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG);

    if (ins->component_count < 1 || ins->component_count > 4)
    {
        FIXME("Unsupported number of components %u.\n", ins->component_count);
        return NULL;
    }
    input_count = *ptr++;
    count -= 2;
    for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i)
        if (ins_code == pres_op_info[i].opcode && input_count == pres_op_info[i].input_count)
            break;
    if (i == ARRAY_SIZE(pres_op_info))
    {
        FIXME("Unknown opcode %#x, input_count %u, raw %#x.\n", ins_code, input_count, ins_raw);
        return NULL;
    }
    ins->op = i;
    if (input_count > ARRAY_SIZE(ins->inputs))
    {
        FIXME("Actual input args count %u exceeds inputs array size, instruction %s.\n", input_count,
                pres_op_info[i].mnem);
        return NULL;
    }
    for (i = 0; i < input_count; ++i)
    {
        unsigned int *p;

        p = parse_pres_arg(ptr, count, &ins->inputs[i]);
        if (!p)
            return NULL;
        count -= p - ptr;
        ptr = p;
    }
    ptr = parse_pres_arg(ptr, count, &ins->output);
    if (ins->output.index_reg.table != PRES_REGTAB_COUNT)
    {
        FIXME("Relative addressing in output register not supported.\n");
        return NULL;
    }
    if (get_reg_offset(ins->output.reg.table, ins->output.reg.offset
            + (pres_op_info[ins->op].func_all_comps ? 0 : ins->component_count - 1))
            != get_reg_offset(ins->output.reg.table, ins->output.reg.offset))
    {
        FIXME("Instructions outputting multiple registers are not supported.\n");
        return NULL;
    }
    return ptr;
}

static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc,
        WORD *constantinfo_reserved)
{
    const struct ctab_constant *constant = d3dx_shader_get_ctab_constant(ctab, hc);

    if (!constant)
    {
        FIXME("Could not get constant desc.\n");
        if (constantinfo_reserved)
            *constantinfo_reserved = 0;
        return D3DERR_INVALIDCALL;
    }
    *desc = constant->desc;
    if (constantinfo_reserved)
        *constantinfo_reserved = constant->constantinfo_reserved;
    return D3D_OK;
}

static void get_const_upload_info(struct d3dx_const_param_eval_output *const_set,
        struct const_upload_info *info)
{
    struct d3dx_parameter *param = const_set->param;
    unsigned int table = const_set->table;

    info->transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS)
            || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS);
    if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS)
    {
        info->major = param->columns;
        info->minor = param->rows;
    }
    else
    {
        info->major = param->rows;
        info->minor = param->columns;
    }

    if (get_reg_components(table) == 1)
    {
        unsigned int const_length = get_offset_reg(table, const_set->register_count);

        info->major_stride = info->minor;
        info->major_count = const_length / info->major_stride;
        info->minor_remainder = const_length % info->major_stride;
    }
    else
    {
        info->major_stride = get_reg_components(table);
        info->major_count = const_set->register_count;
        info->minor_remainder = 0;
    }
    info->count = info->major_count * info->minor + info->minor_remainder;
}

#define INITIAL_CONST_SET_SIZE 16

static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set)
{
    if (const_tab->const_set_count >= const_tab->const_set_size)
    {
        unsigned int new_size;
        struct d3dx_const_param_eval_output *new_alloc;

        if (!const_tab->const_set_size)
        {
            new_size = INITIAL_CONST_SET_SIZE;
            new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size);
            if (!new_alloc)
            {
                ERR("Out of memory.\n");
                return E_OUTOFMEMORY;
            }
        }
        else
        {
            new_size = const_tab->const_set_size * 2;
            new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set,
                    sizeof(*const_tab->const_set) * new_size);
            if (!new_alloc)
            {
                ERR("Out of memory.\n");
                return E_OUTOFMEMORY;
            }
        }
        const_tab->const_set = new_alloc;
        const_tab->const_set_size = new_size;
    }
    const_tab->const_set[const_tab->const_set_count++] = *set;
    return D3D_OK;
}

static void append_pres_const_sets_for_shader_input(struct d3dx_const_tab *const_tab,
        struct d3dx_preshader *pres)
{
    unsigned int i;
    struct d3dx_const_param_eval_output const_set = {NULL};

    for (i = 0; i < pres->ins_count; ++i)
    {
        const struct d3dx_pres_ins *ins = &pres->ins[i];
        const struct d3dx_pres_reg *reg = &ins->output.reg;

        if (reg->table == PRES_REGTAB_TEMP)
            continue;

        const_set.register_index = get_reg_offset(reg->table, reg->offset);
        const_set.register_count = 1;
        const_set.table = reg->table;
        const_set.constant_class = D3DXPC_FORCE_DWORD;
        const_set.element_count = 1;
        append_const_set(const_tab, &const_set);
    }
}

static int compare_const_set(const void *a, const void *b)
{
    const struct d3dx_const_param_eval_output *r1 = a;
    const struct d3dx_const_param_eval_output *r2 = b;

    if (r1->table != r2->table)
        return r1->table - r2->table;
    return r1->register_index - r2->register_index;
}

static HRESULT merge_const_set_entries(struct d3dx_const_tab *const_tab,
        struct d3dx_parameter *param, unsigned int index)
{
    unsigned int i, start_index = index;
    DWORD *current_data;
    enum pres_reg_tables current_table;
    unsigned int current_start_offset, element_count;
    struct d3dx_const_param_eval_output *first_const;

    if (!const_tab->const_set_count)
        return D3D_OK;

    while (index < const_tab->const_set_count - 1)
    {
        first_const = &const_tab->const_set[index];
        current_data = first_const->param->data;
        current_table = first_const->table;
        current_start_offset = get_offset_reg(current_table, first_const->register_index);
        element_count = 0;
        for (i = index; i < const_tab->const_set_count; ++i)
        {
            struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[i];
            unsigned int count = get_offset_reg(const_set->table,
                    const_set->register_count * const_set->element_count);
            unsigned int start_offset = get_offset_reg(const_set->table, const_set->register_index);

            if (!(const_set->table == current_table && current_start_offset == start_offset
                    && const_set->direct_copy == first_const->direct_copy
                    && current_data == const_set->param->data
                    && (const_set->direct_copy || (first_const->param->type == const_set->param->type
                    && first_const->param->class == const_set->param->class
                    && first_const->param->columns == const_set->param->columns
                    && first_const->param->rows == const_set->param->rows
                    && first_const->register_count == const_set->register_count
                    && (i == const_tab->const_set_count - 1
                    || first_const->param->element_count == const_set->param->element_count)))))
                break;

            current_start_offset += count;
            current_data += const_set->direct_copy ? count : const_set->param->rows
                    * const_set->param->columns * const_set->element_count;
            element_count += const_set->element_count;
        }

        if (i > index + 1)
        {
            TRACE("Merging %u child parameters for %s, not merging %u, direct_copy %#x.\n", i - index,
                    debugstr_a(param->name), const_tab->const_set_count - i, first_const->direct_copy);

            first_const->element_count = element_count;
            if (first_const->direct_copy)
            {
                first_const->element_count = 1;
                if (index == start_index
                        && !(param->type == D3DXPT_VOID && param->class == D3DXPC_STRUCT))
                {
                    if (table_type_from_param_type(param->type) == PRES_VT_COUNT)
                        return D3DERR_INVALIDCALL;
                    first_const->param = param;
                }
                first_const->register_count = get_reg_offset(current_table, current_start_offset)
                        - first_const->register_index;
            }
            memmove(&const_tab->const_set[index + 1], &const_tab->const_set[i],
                    sizeof(*const_tab->const_set) * (const_tab->const_set_count - i));
            const_tab->const_set_count -= i - index - 1;
        }
        else
        {
            TRACE("Not merging %u child parameters for %s, direct_copy %#x.\n",
                    const_tab->const_set_count - i, debugstr_a(param->name), first_const->direct_copy);
        }
        index = i;
    }
    return D3D_OK;
}

static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab,
        D3DXHANDLE hc, struct d3dx_parameter *param)
{
    D3DXCONSTANT_DESC desc;
    unsigned int const_count, param_count, i;
    BOOL get_element;
    struct d3dx_const_param_eval_output const_set;
    struct const_upload_info info;
    enum pres_value_type table_type;
    HRESULT hr;

    if (FAILED(get_ctab_constant_desc(ctab, hc, &desc, NULL)))
        return D3DERR_INVALIDCALL;

    if (param->element_count)
    {
        param_count = param->element_count;
        const_count = desc.Elements;
        get_element = TRUE;
    }
    else
    {
        if (desc.Elements > 1)
        {
            FIXME("Unexpected number of constant elements %u.\n", desc.Elements);
            return D3DERR_INVALIDCALL;
        }
        param_count = param->member_count;
        const_count = desc.StructMembers;
        get_element = FALSE;
    }
    if (const_count != param_count)
    {
        FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n",
                param_count, const_count);
        return D3DERR_INVALIDCALL;
    }
    if (const_count)
    {
        HRESULT ret = D3D_OK;
        D3DXHANDLE hc_element;
        unsigned int index = const_tab->const_set_count;

        for (i = 0; i < const_count; ++i)
        {
            if (get_element)
                hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i);
            else
                hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i);
            if (!hc_element)
            {
                FIXME("Could not get constant.\n");
                hr = D3DERR_INVALIDCALL;
            }
            else
            {
                hr = init_set_constants_param(const_tab, ctab, hc_element, &param->members[i]);
            }
            if (FAILED(hr))
                ret = hr;
        }
        if (FAILED(ret))
            return ret;
        return merge_const_set_entries(const_tab, param, index);
    }

    TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n",
            debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes);
    TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n",
            debugstr_a(param->name), param->rows, param->columns, param->class,
            param->flags, param->bytes);

    const_set.element_count = 1;
    const_set.param = param;
    const_set.constant_class = desc.Class;
    if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table))
    {
        FIXME("Unknown register set %u.\n", desc.RegisterSet);
        return D3DERR_INVALIDCALL;
    }
    const_set.register_index = desc.RegisterIndex;
    const_set.table = const_tab->regset2table[desc.RegisterSet];
    if (const_set.table >= PRES_REGTAB_COUNT)
    {
        ERR("Unexpected register set %u.\n", desc.RegisterSet);
        return D3DERR_INVALIDCALL;
    }
    assert(table_info[const_set.table].component_size == sizeof(unsigned int));
    assert(param->bytes / (param->rows * param->columns) == sizeof(unsigned int));
    const_set.register_count = desc.RegisterCount;
    table_type = table_info[const_set.table].type;
    get_const_upload_info(&const_set, &info);
    if (!info.count)
    {
        TRACE("%s has zero count, skipping.\n", debugstr_a(param->name));
        return D3D_OK;
    }

    if (table_type_from_param_type(param->type) == PRES_VT_COUNT)
        return D3DERR_INVALIDCALL;

    const_set.direct_copy = table_type_from_param_type(param->type) == table_type
            && !info.transpose && info.minor == info.major_stride
            && info.count == get_offset_reg(const_set.table, const_set.register_count)
            && info.count * sizeof(unsigned int) <= param->bytes;
    if (info.minor_remainder && !const_set.direct_copy && !info.transpose)
        FIXME("Incomplete last row for not transposed matrix which cannot be directly copied, parameter %s.\n",
                debugstr_a(param->name));

    if (info.major_count > info.major
            || (info.major_count == info.major && info.minor_remainder))
    {
        WARN("Constant dimensions exceed parameter size.\n");
        return D3DERR_INVALIDCALL;
    }

    if (FAILED(hr = append_const_set(const_tab, &const_set)))
        return hr;

    return D3D_OK;
}

static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out,
        struct d3dx9_base_effect *base, const char **skip_constants,
        unsigned int skip_constants_count, struct d3dx_preshader *pres)
{
    ID3DXConstantTable *ctab;
    D3DXCONSTANT_DESC *cdesc;
    struct d3dx_parameter **inputs_param;
    D3DXCONSTANTTABLE_DESC desc;
    HRESULT hr;
    D3DXHANDLE hc;
    unsigned int i, j;

    hr = D3DXGetShaderConstantTable(byte_code, &ctab);
    if (FAILED(hr) || !ctab)
    {
        TRACE("Could not get CTAB data, hr %#x.\n", hr);
        /* returning OK, shaders and preshaders without CTAB are valid */
        return D3D_OK;
    }
    if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc)))
    {
        FIXME("Could not get CTAB desc, hr %#x.\n", hr);
        goto cleanup;
    }

    out->inputs = cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants);
    out->inputs_param = inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants);
    if (!cdesc || !inputs_param)
    {
        hr = E_OUTOFMEMORY;
        goto cleanup;
    }

    for (i = 0; i < desc.Constants; ++i)
    {
        unsigned int index = out->input_count;
        WORD constantinfo_reserved;

        hc = ID3DXConstantTable_GetConstant(ctab, NULL, i);
        if (!hc)
        {
            FIXME("Null constant handle.\n");
            goto cleanup;
        }
        if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[index], &constantinfo_reserved)))
            goto cleanup;
        inputs_param[index] = get_parameter_by_name(base, NULL, cdesc[index].Name);
        if (!inputs_param[index])
        {
            WARN("Could not find parameter %s in effect.\n", cdesc[index].Name);
            continue;
        }
        if (cdesc[index].Class == D3DXPC_OBJECT)
        {
            TRACE("Object %s, parameter %p.\n", cdesc[index].Name, inputs_param[index]);
            if (cdesc[index].RegisterSet != D3DXRS_SAMPLER || inputs_param[index]->class != D3DXPC_OBJECT
                    || !is_param_type_sampler(inputs_param[index]->type))
            {
                WARN("Unexpected object type, constant %s.\n", debugstr_a(cdesc[index].Name));
                hr = D3DERR_INVALIDCALL;
                goto cleanup;
            }
            if (max(inputs_param[index]->element_count, 1) < cdesc[index].RegisterCount)
            {
                WARN("Register count exceeds parameter size, constant %s.\n", debugstr_a(cdesc[index].Name));
                hr = D3DERR_INVALIDCALL;
                goto cleanup;
            }
        }
        if (!is_top_level_parameter(inputs_param[index]))
        {
            WARN("Expected top level parameter '%s'.\n", debugstr_a(cdesc[index].Name));
            hr = E_FAIL;
            goto cleanup;
        }

        for (j = 0; j < skip_constants_count; ++j)
        {
            if (!strcmp(cdesc[index].Name, skip_constants[j]))
            {
                if (!constantinfo_reserved)
                {
                    WARN("skip_constants parameter %s is not register bound.\n",
                            cdesc[index].Name);
                    hr = D3DERR_INVALIDCALL;
                    goto cleanup;
                }
                TRACE("Skipping constant %s.\n", cdesc[index].Name);
                break;
            }
        }
        if (j < skip_constants_count)
            continue;
        ++out->input_count;
        if (inputs_param[index]->class == D3DXPC_OBJECT)
            continue;
        if (FAILED(hr = init_set_constants_param(out, ctab, hc, inputs_param[index])))
            goto cleanup;
    }
    if (pres)
        append_pres_const_sets_for_shader_input(out, pres);
    if (out->const_set_count)
    {
        struct d3dx_const_param_eval_output *new_alloc;

        qsort(out->const_set, out->const_set_count, sizeof(*out->const_set), compare_const_set);

        i = 0;
        while (i < out->const_set_count - 1)
        {
            if (out->const_set[i].constant_class == D3DXPC_FORCE_DWORD
                    && out->const_set[i + 1].constant_class == D3DXPC_FORCE_DWORD
                    && out->const_set[i].table == out->const_set[i + 1].table
                    && out->const_set[i].register_index + out->const_set[i].register_count
                    >= out->const_set[i + 1].register_index)
            {
                assert(out->const_set[i].register_index + out->const_set[i].register_count
                        <= out->const_set[i + 1].register_index + 1);
                out->const_set[i].register_count = out->const_set[i + 1].register_index + 1
                        - out->const_set[i].register_index;
                memmove(&out->const_set[i + 1], &out->const_set[i + 2], sizeof(out->const_set[i])
                        * (out->const_set_count - i - 2));
                --out->const_set_count;
            }
            else
            {
                ++i;
            }
        }

        new_alloc = HeapReAlloc(GetProcessHeap(), 0, out->const_set,
                sizeof(*out->const_set) * out->const_set_count);
        if (new_alloc)
        {
            out->const_set = new_alloc;
            out->const_set_size = out->const_set_count;
        }
        else
        {
            WARN("Out of memory.\n");
        }
    }
cleanup:
    ID3DXConstantTable_Release(ctab);
    return hr;
}

static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register)
{
    if (table < PRES_REGTAB_COUNT)
        table_sizes[table] = max(table_sizes[table], max_register + 1);
}

static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab)
{
    unsigned int i, table, max_register;

    for (i = 0; i < ctab->input_count; ++i)
    {
        if (!ctab->inputs[i].RegisterCount)
            continue;
        max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1;
        table = ctab->regset2table[ctab->inputs[i].RegisterSet];
        update_table_size(table_sizes, table, max_register);
    }
}

static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count)
{
    static const char *xyzw_str = "xyzw";
    unsigned int i, table;

    table = arg->reg.table;
    if (table == PRES_REGTAB_IMMED && arg->index_reg.table == PRES_REGTAB_COUNT)
    {
        TRACE("(");
        for (i = 0; i < component_count; ++i)
            TRACE(i < component_count - 1 ? "%.16e, " : "%.16e",
                    ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->reg.offset + i]);
        TRACE(")");
    }
    else
    {
        if (arg->index_reg.table == PRES_REGTAB_COUNT)
        {
            TRACE("%s%u.", table_symbol[table], get_reg_offset(table, arg->reg.offset));
        }
        else
        {
            unsigned int index_reg;

            index_reg = get_reg_offset(arg->index_reg.table, arg->index_reg.offset);
            TRACE("%s[%u + %s%u.%c].", table_symbol[table], get_reg_offset(table, arg->reg.offset),
                    table_symbol[arg->index_reg.table], index_reg,
                    xyzw_str[arg->index_reg.offset - get_offset_reg(arg->index_reg.table, index_reg)]);
        }
        for (i = 0; i < component_count; ++i)
            TRACE("%c", xyzw_str[(arg->reg.offset + i) % 4]);
    }
}

static void dump_registers(struct d3dx_const_tab *ctab)
{
    unsigned int table, i;

    for (i = 0; i < ctab->input_count; ++i)
    {
        table = ctab->regset2table[ctab->inputs[i].RegisterSet];
        TRACE("//   %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)",
                table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount);
    }
}

static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins)
{
    unsigned int i;

    TRACE("%s ", pres_op_info[ins->op].mnem);
    dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count);
    for (i = 0; i < pres_op_info[ins->op].input_count; ++i)
    {
        TRACE(", ");
        dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count);
    }
    TRACE("\n");
}

static void dump_preshader(struct d3dx_preshader *pres)
{
    unsigned int i, immediate_count = pres->regs.table_sizes[PRES_REGTAB_IMMED] * 4;
    const double *immediates = pres->regs.tables[PRES_REGTAB_IMMED];

    if (immediate_count)
        TRACE("// Immediates:\n");
    for (i = 0; i < immediate_count; ++i)
    {
        if (!(i % 4))
            TRACE("// ");
        TRACE("%.8e", immediates[i]);
        if (i % 4 == 3)
            TRACE("\n");
        else
            TRACE(", ");
    }
    TRACE("// Preshader registers:\n");
    dump_registers(&pres->inputs);
    TRACE("preshader\n");
    for (i = 0; i < pres->ins_count; ++i)
        dump_ins(&pres->regs, &pres->ins[i]);
}

static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx9_base_effect *base)
{
    unsigned int *p;
    unsigned int i, j, const_count;
    double *dconst;
    HRESULT hr;
    unsigned int saved_word;
    unsigned int section_size;

    TRACE("Preshader version %#x.\n", *ptr & 0xffff);

    if (!count)
    {
        WARN("Unexpected end of byte code buffer.\n");
        return D3DXERR_INVALIDDATA;
    }

    p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, &section_size);
    if (p)
    {
        const_count = *p++;
        if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int)))
        {
            WARN("Byte code buffer ends unexpectedly.\n");
            return D3DXERR_INVALIDDATA;
        }
        dconst = (double *)p;
    }
    else
    {
        const_count = 0;
        dconst = NULL;
    }
    TRACE("%u double constants.\n", const_count);

    p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, &section_size);
    if (!p)
    {
        WARN("Could not find preshader code.\n");
        return D3D_OK;
    }
    pres->ins_count = *p++;
    --section_size;
    if (pres->ins_count > UINT_MAX / sizeof(*pres->ins))
    {
        WARN("Invalid instruction count %u.\n", pres->ins_count);
        return D3DXERR_INVALIDDATA;
    }
    TRACE("%u instructions.\n", pres->ins_count);
    pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count);
    if (!pres->ins)
        return E_OUTOFMEMORY;
    for (i = 0; i < pres->ins_count; ++i)
    {
        unsigned int *ptr_next;

        ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]);
        if (!ptr_next)
            return D3DXERR_INVALIDDATA;
        section_size -= ptr_next - p;
        p = ptr_next;
    }

    pres->inputs.regset2table = pres_regset2table;

    saved_word = *ptr;
    *ptr = 0xfffe0000;
    hr = get_constants_desc(ptr, &pres->inputs, base, NULL, 0, NULL);
    *ptr = saved_word;
    if (FAILED(hr))
        return hr;

    if (const_count % get_reg_components(PRES_REGTAB_IMMED))
    {
        FIXME("const_count %u is not a multiple of %u.\n", const_count,
                get_reg_components(PRES_REGTAB_IMMED));
        return D3DXERR_INVALIDDATA;
    }
    pres->regs.table_sizes[PRES_REGTAB_IMMED] = get_reg_offset(PRES_REGTAB_IMMED, const_count);

    update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs);
    for (i = 0; i < pres->ins_count; ++i)
    {
        for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j)
        {
            enum pres_reg_tables table;
            unsigned int reg_idx;

            if (pres->ins[i].inputs[j].index_reg.table == PRES_REGTAB_COUNT)
            {
                unsigned int last_component_index = pres->ins[i].scalar_op && !j ? 0
                        : pres->ins[i].component_count - 1;

                table = pres->ins[i].inputs[j].reg.table;
                reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].reg.offset
                        + last_component_index);
            }
            else
            {
                table = pres->ins[i].inputs[j].index_reg.table;
                reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].index_reg.offset);
            }
            if (reg_idx >= pres->regs.table_sizes[table])
            {
                /* Native accepts these broken preshaders. */
                FIXME("Out of bounds register index, i %u, j %u, table %u, reg_idx %u, preshader parsing failed.\n",
                        i, j, table, reg_idx);
                return D3DXERR_INVALIDDATA;
            }
        }
        update_table_size(pres->regs.table_sizes, pres->ins[i].output.reg.table,
                get_reg_offset(pres->ins[i].output.reg.table, pres->ins[i].output.reg.offset));
    }
    if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED)))
        return E_OUTOFMEMORY;
    regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count);

    return D3D_OK;
}

HRESULT d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_code, unsigned int byte_code_size,
        D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out, ULONG64 *version_counter,
        const char **skip_constants, unsigned int skip_constants_count)
{
    struct d3dx_param_eval *peval;
    unsigned int *ptr, *shader_ptr = NULL;
    unsigned int i;
    BOOL shader;
    unsigned int count, pres_size;
    HRESULT ret;

    TRACE("base_effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n",
            base_effect, byte_code, byte_code_size, type, peval_out);

    count = byte_code_size / sizeof(unsigned int);
    if (!byte_code || !count)
    {
        *peval_out = NULL;
        return D3D_OK;
    }

    peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval));
    if (!peval)
    {
        ret = E_OUTOFMEMORY;
        goto err_out;
    }
    peval->version_counter = version_counter;

    peval->param_type = type;
    switch (type)
    {
        case D3DXPT_VERTEXSHADER:
        case D3DXPT_PIXELSHADER:
            shader = TRUE;
            break;
        default:
            shader = FALSE;
            break;
    }
    peval->shader_inputs.regset2table = shad_regset2table;

    ptr = (unsigned int *)byte_code;
    if (shader)
    {
        if ((*ptr & 0xfffe0000) != 0xfffe0000)
        {
            FIXME("Invalid shader signature %#x.\n", *ptr);
            ret = D3DXERR_INVALIDDATA;
            goto err_out;
        }
        TRACE("Shader version %#x.\n", *ptr & 0xffff);
        shader_ptr = ptr;
        ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size);
        if (!ptr)
            TRACE("No preshader found.\n");
    }
    else
    {
        pres_size = count;
    }

    if (ptr && FAILED(ret = parse_preshader(&peval->pres, ptr, pres_size, base_effect)))
    {
        FIXME("Failed parsing preshader, byte code for analysis follows.\n");
        dump_bytecode(byte_code, byte_code_size);
        goto err_out;
    }

    if (shader)
    {
        if (FAILED(ret = get_constants_desc(shader_ptr, &peval->shader_inputs, base_effect,
                skip_constants, skip_constants_count, &peval->pres)))
        {
            TRACE("Could not get shader constant table, hr %#x.\n", ret);
            goto err_out;
        }
        update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs);
    }

    for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i)
    {
        if (FAILED(ret = regstore_alloc_table(&peval->pres.regs, i)))
            goto err_out;
    }

    if (TRACE_ON(d3dx))
    {
        dump_bytecode(byte_code, byte_code_size);
        dump_preshader(&peval->pres);
        if (shader)
        {
            TRACE("// Shader registers:\n");
            dump_registers(&peval->shader_inputs);
        }
    }
    *peval_out = peval;
    TRACE("Created parameter evaluator %p.\n", *peval_out);
    return D3D_OK;

err_out:
    WARN("Error creating parameter evaluator.\n");
    if (TRACE_ON(d3dx))
        dump_bytecode(byte_code, byte_code_size);

    d3dx_free_param_eval(peval);
    *peval_out = NULL;
    return ret;
}

static void d3dx_free_const_tab(struct d3dx_const_tab *ctab)
{
    HeapFree(GetProcessHeap(), 0, ctab->inputs);
    HeapFree(GetProcessHeap(), 0, ctab->inputs_param);
    HeapFree(GetProcessHeap(), 0, ctab->const_set);
}

static void d3dx_free_preshader(struct d3dx_preshader *pres)
{
    HeapFree(GetProcessHeap(), 0, pres->ins);

    regstore_free_tables(&pres->regs);
    d3dx_free_const_tab(&pres->inputs);
}

void d3dx_free_param_eval(struct d3dx_param_eval *peval)
{
    TRACE("peval %p.\n", peval);

    if (!peval)
        return;

    d3dx_free_preshader(&peval->pres);
    d3dx_free_const_tab(&peval->shader_inputs);
    HeapFree(GetProcessHeap(), 0, peval);
}

static void pres_int_from_float(void *out, const void *in, unsigned int count)
{
    unsigned int i;
    const float *in_float = in;
    int *out_int = out;

    for (i = 0; i < count; ++i)
        out_int[i] = in_float[i];
}

static void pres_bool_from_value(void *out, const void *in, unsigned int count)
{
    unsigned int i;
    const DWORD *in_dword = in;
    BOOL *out_bool = out;

    for (i = 0; i < count; ++i)
        out_bool[i] = !!in_dword[i];
}

static void pres_float_from_int(void *out, const void *in, unsigned int count)
{
    unsigned int i;
    const int *in_int = in;
    float *out_float = out;

    for (i = 0; i < count; ++i)
        out_float[i] = in_int[i];
}

static void pres_float_from_bool(void *out, const void *in, unsigned int count)
{
    unsigned int i;
    const BOOL *in_bool = in;
    float *out_float = out;

    for (i = 0; i < count; ++i)
        out_float[i] = !!in_bool[i];
}

static void pres_int_from_bool(void *out, const void *in, unsigned int count)
{
    unsigned int i;
    const float *in_bool = in;
    int *out_int = out;

    for (i = 0; i < count; ++i)
        out_int[i] = !!in_bool[i];
}

static void regstore_set_data(struct d3dx_regstore *rs, unsigned int table,
        unsigned int offset, const unsigned int *in, unsigned int count, enum pres_value_type param_type)
{
    typedef void (*conv_func)(void *out, const void *in, unsigned int count);
    static const conv_func set_const_funcs[PRES_VT_COUNT][PRES_VT_COUNT] =
    {
        {NULL,                 NULL, pres_int_from_float, pres_bool_from_value},
        {NULL,                 NULL, NULL,                NULL},
        {pres_float_from_int,  NULL, NULL,                pres_bool_from_value},
        {pres_float_from_bool, NULL, pres_int_from_bool,  NULL}
    };
    enum pres_value_type table_type = table_info[table].type;

    if (param_type == table_type)
    {
        regstore_set_values(rs, table, in, offset, count);
        return;
    }

    set_const_funcs[param_type][table_type]((unsigned int *)rs->tables[table] + offset, in, count);
}

static HRESULT set_constants_device(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
        D3DXPARAMETER_TYPE type, enum pres_reg_tables table, void *ptr,
        unsigned int start, unsigned int count)
{
    if (type == D3DXPT_VERTEXSHADER)
    {
        switch(table)
        {
            case PRES_REGTAB_OCONST:
                return SET_D3D_STATE_(manager, device, SetVertexShaderConstantF, start, ptr, count);
            case PRES_REGTAB_OICONST:
                return SET_D3D_STATE_(manager, device, SetVertexShaderConstantI, start, ptr, count);
            case PRES_REGTAB_OBCONST:
                return SET_D3D_STATE_(manager, device, SetVertexShaderConstantB, start, ptr, count);
            default:
                FIXME("Unexpected register table %u.\n", table);
                return D3DERR_INVALIDCALL;
        }
    }
    else if (type == D3DXPT_PIXELSHADER)
    {
        switch(table)
        {
            case PRES_REGTAB_OCONST:
                return SET_D3D_STATE_(manager, device, SetPixelShaderConstantF, start, ptr, count);
            case PRES_REGTAB_OICONST:
                return SET_D3D_STATE_(manager, device, SetPixelShaderConstantI, start, ptr, count);
            case PRES_REGTAB_OBCONST:
                return SET_D3D_STATE_(manager, device, SetPixelShaderConstantB, start, ptr, count);
            default:
                FIXME("Unexpected register table %u.\n", table);
                return D3DERR_INVALIDCALL;
        }
    }
    else
    {
        FIXME("Unexpected parameter type %u.\n", type);
        return D3DERR_INVALIDCALL;
    }
}

static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab,
        ULONG64 new_update_version, ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
        D3DXPARAMETER_TYPE type, BOOL device_update_all, BOOL pres_dirty)
{
    unsigned int const_idx;
    unsigned int current_start = 0, current_count = 0;
    enum pres_reg_tables current_table = PRES_REGTAB_COUNT;
    BOOL update_device = manager || device;
    HRESULT hr, result = D3D_OK;
    ULONG64 update_version = const_tab->update_version;

    for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
    {
        struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
        enum pres_reg_tables table = const_set->table;
        struct d3dx_parameter *param = const_set->param;
        unsigned int element, i, j, start_offset;
        struct const_upload_info info;
        unsigned int *data;
        enum pres_value_type param_type;

        if (!(param && is_param_dirty(param, update_version)))
            continue;

        data = param->data;
        start_offset = get_offset_reg(table, const_set->register_index);
        if (const_set->direct_copy)
        {
            regstore_set_values(rs, table, data, start_offset,
                    get_offset_reg(table, const_set->register_count));
            continue;
        }
        param_type = table_type_from_param_type(param->type);
        if (const_set->constant_class == D3DXPC_SCALAR || const_set->constant_class == D3DXPC_VECTOR)
        {
            unsigned int count = max(param->rows, param->columns);

            if (count >= get_reg_components(table))
            {
                regstore_set_data(rs, table, start_offset, data,
                        count * const_set->element_count, param_type);
            }
            else
            {
                for (element = 0; element < const_set->element_count; ++element)
                    regstore_set_data(rs, table, start_offset + get_offset_reg(table, element),
                            &data[element * count], count, param_type);
            }
            continue;
        }
        get_const_upload_info(const_set, &info);
        for (element = 0; element < const_set->element_count; ++element)
        {
            unsigned int *out = (unsigned int *)rs->tables[table] + start_offset;

            /* Store reshaped but (possibly) not converted yet data temporarily in the same constants buffer.
             * All the supported types of parameters and table values have the same size. */
            if (info.transpose)
            {
                for (i = 0; i < info.major_count; ++i)
                    for (j = 0; j < info.minor; ++j)
                        out[i * info.major_stride + j] = data[i + j * info.major];

                for (j = 0; j < info.minor_remainder; ++j)
                    out[i * info.major_stride + j] = data[i + j * info.major];
            }
            else
            {
                for (i = 0; i < info.major_count; ++i)
                    for (j = 0; j < info.minor; ++j)
                        out[i * info.major_stride + j] = data[i * info.minor + j];
            }
            start_offset += get_offset_reg(table, const_set->register_count);
            data += param->rows * param->columns;
        }
        start_offset = get_offset_reg(table, const_set->register_index);
        if (table_info[table].type != param_type)
            regstore_set_data(rs, table, start_offset, (unsigned int *)rs->tables[table] + start_offset,
                    get_offset_reg(table, const_set->register_count) * const_set->element_count, param_type);
    }
    const_tab->update_version = new_update_version;
    if (!update_device)
        return D3D_OK;

    for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
    {
        struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];

        if (device_update_all || (const_set->param
                ? is_param_dirty(const_set->param, update_version) : pres_dirty))
        {
            enum pres_reg_tables table = const_set->table;

            if (table == current_table && current_start + current_count == const_set->register_index)
            {
                current_count += const_set->register_count * const_set->element_count;
            }
            else
            {
                if (current_count)
                {
                    if (FAILED(hr = set_constants_device(manager, device, type, current_table,
                            (DWORD *)rs->tables[current_table]
                            + get_offset_reg(current_table, current_start), current_start, current_count)))
                        result = hr;
                }
                current_table = table;
                current_start = const_set->register_index;
                current_count = const_set->register_count * const_set->element_count;
            }
        }
    }
    if (current_count)
    {
        if (FAILED(hr = set_constants_device(manager, device, type, current_table,
                (DWORD *)rs->tables[current_table]
                + get_offset_reg(current_table, current_start), current_start, current_count)))
            result = hr;
    }
    return result;
}

static double exec_get_reg_value(struct d3dx_regstore *rs, enum pres_reg_tables table, unsigned int offset)
{
    return regstore_get_double(rs, table, offset);
}

static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr, unsigned int comp)
{
    unsigned int offset, base_index, reg_index, table;

    table = opr->reg.table;

    if (opr->index_reg.table == PRES_REGTAB_COUNT)
        base_index = 0;
    else
        base_index = lrint(exec_get_reg_value(rs, opr->index_reg.table, opr->index_reg.offset));

    offset = get_offset_reg(table, base_index) + opr->reg.offset + comp;
    reg_index = get_reg_offset(table, offset);

    if (reg_index >= rs->table_sizes[table])
    {
        unsigned int wrap_size;

        if (table == PRES_REGTAB_CONST)
        {
            /* As it can be guessed from tests, offset into floating constant table is wrapped
             * to the nearest power of 2 and not to the actual table size. */
            for (wrap_size = 1; wrap_size < rs->table_sizes[table]; wrap_size <<= 1)
                ;
        }
        else
        {
            wrap_size = rs->table_sizes[table];
        }
        WARN("Wrapping register index %u, table %u, wrap_size %u, table size %u.\n",
                reg_index, table, wrap_size, rs->table_sizes[table]);
        reg_index %= wrap_size;

        if (reg_index >= rs->table_sizes[table])
            return 0.0;

        offset = get_offset_reg(table, reg_index) + offset % get_reg_components(table);
    }

    return exec_get_reg_value(rs, table, offset);
}

static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_reg *reg,
        unsigned int comp, double res)
{
    regstore_set_double(rs, reg->table, reg->offset + comp, res);
}

#define ARGS_ARRAY_SIZE 8
static HRESULT execute_preshader(struct d3dx_preshader *pres)
{
    unsigned int i, j, k;
    double args[ARGS_ARRAY_SIZE];
    double res;

    for (i = 0; i < pres->ins_count; ++i)
    {
        const struct d3dx_pres_ins *ins;
        const struct op_info *oi;

        ins = &pres->ins[i];
        oi = &pres_op_info[ins->op];
        if (oi->func_all_comps)
        {
            if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE)
            {
                FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count);
                return E_FAIL;
            }
            for (k = 0; k < oi->input_count; ++k)
                for (j = 0; j < ins->component_count; ++j)
                    args[k * ins->component_count + j] = exec_get_arg(&pres->regs, &ins->inputs[k],
                            ins->scalar_op && !k ? 0 : j);
            res = oi->func(args, ins->component_count);

            /* only 'dot' instruction currently falls here */
            exec_set_arg(&pres->regs, &ins->output.reg, 0, res);
        }
        else
        {
            for (j = 0; j < ins->component_count; ++j)
            {
                for (k = 0; k < oi->input_count; ++k)
                    args[k] = exec_get_arg(&pres->regs, &ins->inputs[k], ins->scalar_op && !k ? 0 : j);
                res = oi->func(args, ins->component_count);
                exec_set_arg(&pres->regs, &ins->output.reg, j, res);
            }
        }
    }
    return D3D_OK;
}

static BOOL is_const_tab_input_dirty(struct d3dx_const_tab *ctab, ULONG64 update_version)
{
    unsigned int i;

    if (update_version == ULONG64_MAX)
        update_version = ctab->update_version;
    for (i = 0; i < ctab->input_count; ++i)
    {
        if (is_top_level_param_dirty(top_level_parameter_from_parameter(ctab->inputs_param[i]),
                update_version))
            return TRUE;
    }
    return FALSE;
}

BOOL is_param_eval_input_dirty(struct d3dx_param_eval *peval, ULONG64 update_version)
{
    return is_const_tab_input_dirty(&peval->pres.inputs, update_version)
            || is_const_tab_input_dirty(&peval->shader_inputs, update_version);
}

HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param,
        void *param_value)
{
    HRESULT hr;
    unsigned int i;
    unsigned int elements, elements_param, elements_table;
    float *oc;

    TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value);

    if (is_const_tab_input_dirty(&peval->pres.inputs, ULONG64_MAX))
    {
        set_constants(&peval->pres.regs, &peval->pres.inputs,
                next_update_version(peval->version_counter),
                NULL, NULL, peval->param_type, FALSE, FALSE);

        if (FAILED(hr = execute_preshader(&peval->pres)))
            return hr;
    }

    elements_table = get_offset_reg(PRES_REGTAB_OCONST, peval->pres.regs.table_sizes[PRES_REGTAB_OCONST]);
    elements_param = param->bytes / sizeof(unsigned int);
    elements = min(elements_table, elements_param);
    oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST];
    for (i = 0; i < elements; ++i)
        set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT);
    return D3D_OK;
}

HRESULT d3dx_param_eval_set_shader_constants(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
        struct d3dx_param_eval *peval, BOOL update_all)
{
    HRESULT hr;
    struct d3dx_preshader *pres = &peval->pres;
    struct d3dx_regstore *rs = &pres->regs;
    ULONG64 new_update_version = next_update_version(peval->version_counter);
    BOOL pres_dirty = FALSE;

    TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type);

    if (is_const_tab_input_dirty(&pres->inputs, ULONG64_MAX))
    {
        set_constants(rs, &pres->inputs, new_update_version,
                NULL, NULL, peval->param_type, FALSE, FALSE);
        if (FAILED(hr = execute_preshader(pres)))
            return hr;
        pres_dirty = TRUE;
    }

    return set_constants(rs, &peval->shader_inputs, new_update_version,
            manager, device, peval->param_type, update_all, pres_dirty);
}