/* * Copyright 2016 Paul Gofman * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ #include "config.h" #include "wine/port.h" #include "d3dx9_private.h" #include #include WINE_DEFAULT_DEBUG_CHANNEL(d3dx); enum pres_ops { PRESHADER_OP_NOP, PRESHADER_OP_MOV, PRESHADER_OP_NEG, PRESHADER_OP_RCP, PRESHADER_OP_FRC, PRESHADER_OP_EXP, PRESHADER_OP_LOG, PRESHADER_OP_RSQ, PRESHADER_OP_SIN, PRESHADER_OP_COS, PRESHADER_OP_ASIN, PRESHADER_OP_ACOS, PRESHADER_OP_ATAN, PRESHADER_OP_MIN, PRESHADER_OP_MAX, PRESHADER_OP_LT, PRESHADER_OP_GE, PRESHADER_OP_ADD, PRESHADER_OP_MUL, PRESHADER_OP_ATAN2, PRESHADER_OP_DIV, PRESHADER_OP_CMP, PRESHADER_OP_DOT, PRESHADER_OP_DOTSWIZ6, PRESHADER_OP_DOTSWIZ8, }; typedef double (*pres_op_func)(double *args, int n); static double to_signed_nan(double v) { static const union { ULONG64 ulong64_value; double double_value; } signed_nan = { 0xfff8000000000000 }; return isnan(v) ? signed_nan.double_value : v; } static double pres_mov(double *args, int n) {return args[0];} static double pres_add(double *args, int n) {return args[0] + args[1];} static double pres_mul(double *args, int n) {return args[0] * args[1];} static double pres_dot(double *args, int n) { int i; double sum; sum = 0.0; for (i = 0; i < n; ++i) sum += args[i] * args[i + n]; return sum; } static double pres_dotswiz6(double *args, int n) { return pres_dot(args, 3); } static double pres_dotswiz8(double *args, int n) { return pres_dot(args, 4); } static double pres_neg(double *args, int n) {return -args[0];} static double pres_rcp(double *args, int n) {return 1.0 / args[0];} static double pres_lt(double *args, int n) {return args[0] < args[1] ? 1.0 : 0.0;} static double pres_ge(double *args, int n) {return args[0] >= args[1] ? 1.0 : 0.0;} static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);} static double pres_min(double *args, int n) {return fmin(args[0], args[1]);} static double pres_max(double *args, int n) {return fmax(args[0], args[1]);} static double pres_cmp(double *args, int n) {return args[0] >= 0.0 ? args[1] : args[2];} static double pres_sin(double *args, int n) {return sin(args[0]);} static double pres_cos(double *args, int n) {return cos(args[0]);} static double pres_rsq(double *args, int n) { double v; v = fabs(args[0]); if (v == 0.0) return INFINITY; else return 1.0 / sqrt(v); } static double pres_exp(double *args, int n) {return pow(2.0, args[0]);} static double pres_log(double *args, int n) { double v; v = fabs(args[0]); if (v == 0.0) return 0.0; else #ifdef HAVE_LOG2 return log2(v); #else return log(v) / log(2); #endif } static double pres_asin(double *args, int n) {return to_signed_nan(asin(args[0]));} static double pres_acos(double *args, int n) {return to_signed_nan(acos(args[0]));} static double pres_atan(double *args, int n) {return atan(args[0]);} static double pres_atan2(double *args, int n) {return atan2(args[0], args[1]);} /* According to the test results 'div' operation always returns 0. Compiler does not seem to ever * generate it, using rcp + mul instead, so probably it is not implemented in native d3dx. */ static double pres_div(double *args, int n) {return 0.0;} #define PRES_OPCODE_MASK 0x7ff00000 #define PRES_OPCODE_SHIFT 20 #define PRES_SCALAR_FLAG 0x80000000 #define PRES_NCOMP_MASK 0x0000ffff #define FOURCC_PRES 0x53455250 #define FOURCC_CLIT 0x54494c43 #define FOURCC_FXLC 0x434c5846 #define FOURCC_PRSI 0x49535250 #define PRES_SIGN 0x46580000 struct op_info { unsigned int opcode; char mnem[16]; unsigned int input_count; BOOL func_all_comps; pres_op_func func; }; static const struct op_info pres_op_info[] = { {0x000, "nop", 0, 0, NULL }, /* PRESHADER_OP_NOP */ {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */ {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */ {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */ {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */ {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */ {0x106, "log", 1, 0, pres_log}, /* PRESHADER_OP_LOG */ {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */ {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */ {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */ {0x10a, "asin", 1, 0, pres_asin}, /* PRESHADER_OP_ASIN */ {0x10b, "acos", 1, 0, pres_acos}, /* PRESHADER_OP_ACOS */ {0x10c, "atan", 1, 0, pres_atan}, /* PRESHADER_OP_ATAN */ {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */ {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */ {0x202, "lt", 2, 0, pres_lt }, /* PRESHADER_OP_LT */ {0x203, "ge", 2, 0, pres_ge }, /* PRESHADER_OP_GE */ {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */ {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */ {0x206, "atan2", 2, 0, pres_atan2}, /* PRESHADER_OP_ATAN2 */ {0x208, "div", 2, 0, pres_div}, /* PRESHADER_OP_DIV */ {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */ {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */ {0x70e, "d3ds_dotswiz", 6, 0, pres_dotswiz6}, /* PRESHADER_OP_DOTSWIZ6 */ {0x70e, "d3ds_dotswiz", 8, 0, pres_dotswiz8}, /* PRESHADER_OP_DOTSWIZ8 */ }; enum pres_value_type { PRES_VT_FLOAT, PRES_VT_DOUBLE, PRES_VT_INT, PRES_VT_BOOL, PRES_VT_COUNT }; static const struct { unsigned int component_size; enum pres_value_type type; } table_info[] = { {sizeof(double), PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */ {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */ {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */ {sizeof(BOOL), PRES_VT_BOOL }, /* PRES_REGTAB_OBCONST */ {sizeof(int), PRES_VT_INT, }, /* PRES_REGTAB_OICONST */ /* TODO: use double precision for 64 bit */ {sizeof(float), PRES_VT_FLOAT } /* PRES_REGTAB_TEMP */ }; static const char *table_symbol[] = { "imm", "c", "oc", "ob", "oi", "r", "(null)", }; static const enum pres_reg_tables pres_regset2table[] = { PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */ PRES_REGTAB_OICONST, /* D3DXRS_INT4 */ PRES_REGTAB_CONST, /* D3DXRS_FLOAT4 */ PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */ }; static const enum pres_reg_tables shad_regset2table[] = { PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */ PRES_REGTAB_OICONST, /* D3DXRS_INT4 */ PRES_REGTAB_OCONST, /* D3DXRS_FLOAT4 */ PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */ }; struct d3dx_pres_reg { enum pres_reg_tables table; /* offset is component index, not register index, e. g. offset for component c3.y is 13 (3 * 4 + 1) */ unsigned int offset; }; struct d3dx_pres_operand { struct d3dx_pres_reg reg; struct d3dx_pres_reg index_reg; }; #define MAX_INPUTS_COUNT 8 struct d3dx_pres_ins { enum pres_ops op; /* first input argument is scalar, scalar component is propagated */ BOOL scalar_op; unsigned int component_count; struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT]; struct d3dx_pres_operand output; }; struct const_upload_info { BOOL transpose; unsigned int major, minor; unsigned int major_stride; unsigned int major_count; unsigned int count; unsigned int minor_remainder; }; static enum pres_value_type table_type_from_param_type(D3DXPARAMETER_TYPE type) { switch (type) { case D3DXPT_FLOAT: return PRES_VT_FLOAT; case D3DXPT_INT: return PRES_VT_INT; case D3DXPT_BOOL: return PRES_VT_BOOL; default: FIXME("Unsupported type %u.\n", type); return PRES_VT_COUNT; } } static unsigned int get_reg_offset(unsigned int table, unsigned int offset) { return table == PRES_REGTAB_OBCONST ? offset : offset >> 2; } static unsigned int get_offset_reg(unsigned int table, unsigned int reg_idx) { return table == PRES_REGTAB_OBCONST ? reg_idx : reg_idx << 2; } static unsigned int get_reg_components(unsigned int table) { return get_offset_reg(table, 1); } #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8) static HRESULT init_set_constants(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab); static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table) { unsigned int size; size = get_offset_reg(table, rs->table_sizes[table]) * table_info[table].component_size; if (size) { rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size); rs->table_value_set[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*rs->table_value_set[table]) * ((rs->table_sizes[table] + PRES_BITMASK_BLOCK_SIZE - 1) / PRES_BITMASK_BLOCK_SIZE)); if (!rs->tables[table] || !rs->table_value_set[table]) return E_OUTOFMEMORY; } return D3D_OK; } static void regstore_free_tables(struct d3dx_regstore *rs) { unsigned int i; for (i = 0; i < PRES_REGTAB_COUNT; ++i) { HeapFree(GetProcessHeap(), 0, rs->tables[i]); HeapFree(GetProcessHeap(), 0, rs->table_value_set[i]); } } static void regstore_set_modified_reg(struct d3dx_regstore *rs, unsigned int table, unsigned int start, unsigned int end) { unsigned int block_idx, start_block, end_block; start_block = start / PRES_BITMASK_BLOCK_SIZE; start -= start_block * PRES_BITMASK_BLOCK_SIZE; end_block = end / PRES_BITMASK_BLOCK_SIZE; end = (end_block + 1) * PRES_BITMASK_BLOCK_SIZE - 1 - end; if (start_block == end_block) { rs->table_value_set[table][start_block] |= (~0u << start) & (~0u >> end); } else { rs->table_value_set[table][start_block] |= ~0u << start; for (block_idx = start_block + 1; block_idx < end_block; ++block_idx) rs->table_value_set[table][block_idx] = ~0u; rs->table_value_set[table][end_block] |= ~0u >> end; } } static void regstore_set_modified(struct d3dx_regstore *rs, unsigned int table, unsigned int start_offset, unsigned int count) { if (!count) return; regstore_set_modified_reg(rs, table, get_reg_offset(table, start_offset), get_reg_offset(table, start_offset + count - 1)); } static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, const void *data, unsigned int start_offset, unsigned int count) { BYTE *dst = rs->tables[table]; const BYTE *src = data; unsigned int size; if (!count) return; dst += start_offset * table_info[table].component_size; size = count * table_info[table].component_size; assert((src < dst && size <= dst - src) || (src > dst && size <= src - dst)); memcpy(dst, src, size); regstore_set_modified(rs, table, start_offset, count); } static unsigned int regstore_is_val_set_reg(struct d3dx_regstore *rs, unsigned int table, unsigned int reg_idx) { return rs->table_value_set[table][reg_idx / PRES_BITMASK_BLOCK_SIZE] & (1u << (reg_idx % PRES_BITMASK_BLOCK_SIZE)); } static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset) { BYTE *p; p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset; switch (table_info[table].type) { case PRES_VT_FLOAT: return *(float *)p; case PRES_VT_DOUBLE: return *(double *)p; default: FIXME("Unexpected preshader input from table %u.\n", table); return NAN; } } static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v) { BYTE *p; unsigned int reg_idx; p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset; switch (table_info[table].type) { case PRES_VT_FLOAT : *(float *)p = v; break; case PRES_VT_DOUBLE: *(double *)p = v; break; case PRES_VT_INT : *(int *)p = lrint(v); break; case PRES_VT_BOOL : *(BOOL *)p = !!v; break; default: FIXME("Bad type %u.\n", table_info[table].type); break; } reg_idx = get_reg_offset(table, offset); rs->table_value_set[table][reg_idx / PRES_BITMASK_BLOCK_SIZE] |= 1u << (reg_idx % PRES_BITMASK_BLOCK_SIZE); } static void regstore_reset_modified(struct d3dx_regstore *rs, unsigned int table) { memset(rs->table_value_set[table], 0, sizeof(*rs->table_value_set[table]) * ((rs->table_sizes[table] + PRES_BITMASK_BLOCK_SIZE - 1) / PRES_BITMASK_BLOCK_SIZE)); } static void dump_bytecode(void *data, unsigned int size) { unsigned int *bytecode = (unsigned int *)data; unsigned int i, j, n; size /= sizeof(*bytecode); i = 0; while (i < size) { n = min(size - i, 8); for (j = 0; j < n; ++j) TRACE("0x%08x,", bytecode[i + j]); i += n; TRACE("\n"); } } static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count, unsigned int fourcc, unsigned int *size) { /* Provide at least one value in comment section on non-NULL return. */ while (count > 2 && (*ptr & 0xffff) == 0xfffe) { unsigned int section_size; section_size = (*ptr >> 16); if (!section_size || section_size + 1 > count) break; if (*(ptr + 1) == fourcc) { *size = section_size; return ptr + 2; } count -= section_size + 1; ptr += section_size + 1; } return NULL; } static unsigned int *parse_pres_reg(unsigned int *ptr, struct d3dx_pres_reg *reg) { static const enum pres_reg_tables reg_table[8] = { PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT, PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP }; if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT) { FIXME("Unsupported register table %#x.\n", *ptr); return NULL; } reg->table = reg_table[*ptr++]; reg->offset = *ptr++; return ptr; } static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr) { if (count < 3 || (*ptr && count < 5)) { WARN("Byte code buffer ends unexpectedly, count %u.\n", count); return NULL; } if (*ptr) { if (*ptr != 1) { FIXME("Unknown relative addressing flag, word %#x.\n", *ptr); return NULL; } ptr = parse_pres_reg(ptr + 1, &opr->index_reg); if (!ptr) return NULL; } else { opr->index_reg.table = PRES_REGTAB_COUNT; ++ptr; } ptr = parse_pres_reg(ptr, &opr->reg); if (opr->reg.table == PRES_REGTAB_OBCONST) opr->reg.offset /= 4; return ptr; } static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins) { unsigned int ins_code, ins_raw; unsigned int input_count; unsigned int i; if (count < 2) { WARN("Byte code buffer ends unexpectedly.\n"); return NULL; } ins_raw = *ptr++; ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT; ins->component_count = ins_raw & PRES_NCOMP_MASK; ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG); if (ins->component_count < 1 || ins->component_count > 4) { FIXME("Unsupported number of components %u.\n", ins->component_count); return NULL; } input_count = *ptr++; count -= 2; for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i) if (ins_code == pres_op_info[i].opcode && input_count == pres_op_info[i].input_count) break; if (i == ARRAY_SIZE(pres_op_info)) { FIXME("Unknown opcode %#x, input_count %u, raw %#x.\n", ins_code, input_count, ins_raw); return NULL; } ins->op = i; if (input_count > ARRAY_SIZE(ins->inputs)) { FIXME("Actual input args count %u exceeds inputs array size, instruction %s.\n", input_count, pres_op_info[i].mnem); return NULL; } for (i = 0; i < input_count; ++i) { unsigned int *p; p = parse_pres_arg(ptr, count, &ins->inputs[i]); if (!p) return NULL; count -= p - ptr; ptr = p; } ptr = parse_pres_arg(ptr, count, &ins->output); if (ins->output.index_reg.table != PRES_REGTAB_COUNT) { FIXME("Relative addressing in output register not supported.\n"); return NULL; } return ptr; } static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc) { D3DXCONSTANT_DESC buffer[2]; HRESULT hr; unsigned int count; count = ARRAY_SIZE(buffer); if (FAILED(hr = ID3DXConstantTable_GetConstantDesc(ctab, hc, buffer, &count))) { FIXME("Could not get constant desc, hr %#x.\n", hr); return hr; } else if (count != 1) { FIXME("Unexpected constant descriptors count %u.\n", count); return D3DERR_INVALIDCALL; } *desc = buffer[0]; return D3D_OK; } static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out, struct d3dx9_base_effect *base) { ID3DXConstantTable *ctab; D3DXCONSTANT_DESC *cdesc; struct d3dx_parameter **inputs_param; D3DXCONSTANTTABLE_DESC desc; HRESULT hr; D3DXHANDLE hc; unsigned int i; out->inputs = cdesc = NULL; out->inputs_param = NULL; out->input_count = 0; inputs_param = NULL; hr = D3DXGetShaderConstantTable(byte_code, &ctab); if (FAILED(hr) || !ctab) { TRACE("Could not get CTAB data, hr %#x.\n", hr); /* returning OK, shaders and preshaders without CTAB are valid */ return D3D_OK; } if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc))) { FIXME("Could not get CTAB desc, hr %#x.\n", hr); goto err_out; } cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants); inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants); if (!cdesc || !inputs_param) { hr = E_OUTOFMEMORY; goto err_out; } for (i = 0; i < desc.Constants; ++i) { hc = ID3DXConstantTable_GetConstant(ctab, NULL, i); if (!hc) { FIXME("Null constant handle.\n"); goto err_out; } if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[i]))) goto err_out; inputs_param[i] = get_parameter_by_name(base, NULL, cdesc[i].Name); if (!inputs_param[i]) { WARN("Could not find parameter %s in effect.\n", cdesc[i].Name); continue; } if (cdesc[i].Class == D3DXPC_OBJECT) { TRACE("Object %s, parameter %p.\n", cdesc[i].Name, inputs_param[i]); if (cdesc[i].RegisterSet != D3DXRS_SAMPLER || inputs_param[i]->class != D3DXPC_OBJECT || !is_param_type_sampler(inputs_param[i]->type)) { WARN("Unexpected object type, constant %s.\n", debugstr_a(cdesc[i].Name)); hr = D3DERR_INVALIDCALL; goto err_out; } if (max(inputs_param[i]->element_count, 1) < cdesc[i].RegisterCount) { WARN("Register count exceeds parameter size, constant %s.\n", debugstr_a(cdesc[i].Name)); hr = D3DERR_INVALIDCALL; goto err_out; } } } out->input_count = desc.Constants; out->inputs = cdesc; out->inputs_param = inputs_param; hr = init_set_constants(out, ctab); ID3DXConstantTable_Release(ctab); return hr; err_out: HeapFree(GetProcessHeap(), 0, cdesc); HeapFree(GetProcessHeap(), 0, inputs_param); if (ctab) ID3DXConstantTable_Release(ctab); return hr; } static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register) { if (table < PRES_REGTAB_COUNT) table_sizes[table] = max(table_sizes[table], max_register + 1); } static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab) { unsigned int i, table, max_register; for (i = 0; i < ctab->input_count; ++i) { if (!ctab->inputs[i].RegisterCount) continue; max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1; table = ctab->regset2table[ctab->inputs[i].RegisterSet]; update_table_size(table_sizes, table, max_register); } } static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count) { static const char *xyzw_str = "xyzw"; unsigned int i, table; table = arg->reg.table; if (table == PRES_REGTAB_IMMED && arg->index_reg.table == PRES_REGTAB_COUNT) { TRACE("("); for (i = 0; i < component_count; ++i) TRACE(i < component_count - 1 ? "%.16e, " : "%.16e", ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->reg.offset + i]); TRACE(")"); } else { if (arg->index_reg.table == PRES_REGTAB_COUNT) { TRACE("%s%u.", table_symbol[table], get_reg_offset(table, arg->reg.offset)); } else { unsigned int index_reg; index_reg = get_reg_offset(arg->index_reg.table, arg->index_reg.offset); TRACE("%s[%u + %s%u.%c].", table_symbol[table], get_reg_offset(table, arg->reg.offset), table_symbol[arg->index_reg.table], index_reg, xyzw_str[arg->index_reg.offset - get_offset_reg(arg->index_reg.table, index_reg)]); } for (i = 0; i < component_count; ++i) TRACE("%c", xyzw_str[(arg->reg.offset + i) % 4]); } } static void dump_registers(struct d3dx_const_tab *ctab) { unsigned int table, i; for (i = 0; i < ctab->input_count; ++i) { table = ctab->regset2table[ctab->inputs[i].RegisterSet]; TRACE("// %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)", table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount); } } static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins) { unsigned int i; TRACE("%s ", pres_op_info[ins->op].mnem); dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count); for (i = 0; i < pres_op_info[ins->op].input_count; ++i) { TRACE(", "); dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count); } TRACE("\n"); } static void dump_preshader(struct d3dx_preshader *pres) { unsigned int i, immediate_count = pres->regs.table_sizes[PRES_REGTAB_IMMED] * 4; const double *immediates = pres->regs.tables[PRES_REGTAB_IMMED]; if (immediate_count) TRACE("// Immediates:\n"); for (i = 0; i < immediate_count; ++i) { if (!(i % 4)) TRACE("// "); TRACE("%.8e", immediates[i]); if (i % 4 == 3) TRACE("\n"); else TRACE(", "); } TRACE("// Preshader registers:\n"); dump_registers(&pres->inputs); TRACE("preshader\n"); for (i = 0; i < pres->ins_count; ++i) dump_ins(&pres->regs, &pres->ins[i]); } static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx9_base_effect *base) { unsigned int *p; unsigned int i, j, const_count; double *dconst; HRESULT hr; unsigned int saved_word; unsigned int section_size; TRACE("Preshader version %#x.\n", *ptr & 0xffff); if (!count) { WARN("Unexpected end of byte code buffer.\n"); return D3DXERR_INVALIDDATA; } p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, §ion_size); if (p) { const_count = *p++; if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int))) { WARN("Byte code buffer ends unexpectedly.\n"); return D3DXERR_INVALIDDATA; } dconst = (double *)p; } else { const_count = 0; dconst = NULL; } TRACE("%u double constants.\n", const_count); p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, §ion_size); if (!p) { WARN("Could not find preshader code.\n"); return D3D_OK; } pres->ins_count = *p++; --section_size; if (pres->ins_count > UINT_MAX / sizeof(*pres->ins)) { WARN("Invalid instruction count %u.\n", pres->ins_count); return D3DXERR_INVALIDDATA; } TRACE("%u instructions.\n", pres->ins_count); pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count); if (!pres->ins) return E_OUTOFMEMORY; for (i = 0; i < pres->ins_count; ++i) { unsigned int *ptr_next; ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]); if (!ptr_next) return D3DXERR_INVALIDDATA; section_size -= ptr_next - p; p = ptr_next; } pres->inputs.regset2table = pres_regset2table; saved_word = *ptr; *ptr = 0xfffe0000; hr = get_constants_desc(ptr, &pres->inputs, base); *ptr = saved_word; if (FAILED(hr)) return hr; if (const_count % get_reg_components(PRES_REGTAB_IMMED)) { FIXME("const_count %u is not a multiple of %u.\n", const_count, get_reg_components(PRES_REGTAB_IMMED)); return D3DXERR_INVALIDDATA; } pres->regs.table_sizes[PRES_REGTAB_IMMED] = get_reg_offset(PRES_REGTAB_IMMED, const_count); update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs); for (i = 0; i < pres->ins_count; ++i) { for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j) { enum pres_reg_tables table; unsigned int reg_idx; if (pres->ins[i].inputs[j].index_reg.table == PRES_REGTAB_COUNT) { unsigned int last_component_index = pres->ins[i].scalar_op && !j ? 0 : pres->ins[i].component_count - 1; table = pres->ins[i].inputs[j].reg.table; reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].reg.offset + last_component_index); } else { table = pres->ins[i].inputs[j].index_reg.table; reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].index_reg.offset); } if (reg_idx >= pres->regs.table_sizes[table]) { FIXME("Out of bounds register index, i %u, j %u, table %u, reg_idx %u.\n", i, j, table, reg_idx); return D3DXERR_INVALIDDATA; } } update_table_size(pres->regs.table_sizes, pres->ins[i].output.reg.table, get_reg_offset(pres->ins[i].output.reg.table, pres->ins[i].output.reg.offset + pres->ins[i].component_count - 1)); } if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED))) return E_OUTOFMEMORY; regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count); return D3D_OK; } void d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_code, unsigned int byte_code_size, D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out, ULONG64 *version_counter) { struct d3dx_param_eval *peval; unsigned int *ptr; HRESULT hr; unsigned int i; BOOL shader; unsigned int count, pres_size; TRACE("base_effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n", base_effect, byte_code, byte_code_size, type, peval_out); count = byte_code_size / sizeof(unsigned int); if (!byte_code || !count) { *peval_out = NULL; return; } peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval)); if (!peval) goto err_out; peval->version_counter = version_counter; peval->param_type = type; switch (type) { case D3DXPT_VERTEXSHADER: case D3DXPT_PIXELSHADER: shader = TRUE; break; default: shader = FALSE; break; } peval->shader_inputs.regset2table = shad_regset2table; ptr = (unsigned int *)byte_code; if (shader) { if ((*ptr & 0xfffe0000) != 0xfffe0000) { FIXME("Invalid shader signature %#x.\n", *ptr); goto err_out; } TRACE("Shader version %#x.\n", *ptr & 0xffff); if (FAILED(hr = get_constants_desc(ptr, &peval->shader_inputs, base_effect))) { FIXME("Could not get shader constant table, hr %#x.\n", hr); goto err_out; } update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs); ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size); if (!ptr) TRACE("No preshader found.\n"); } else { pres_size = count; } if (ptr && FAILED(parse_preshader(&peval->pres, ptr, pres_size, base_effect))) { FIXME("Failed parsing preshader, byte code for analysis follows.\n"); dump_bytecode(byte_code, byte_code_size); goto err_out; } for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i) { if (FAILED(regstore_alloc_table(&peval->pres.regs, i))) goto err_out; } if (TRACE_ON(d3dx)) { dump_bytecode(byte_code, byte_code_size); dump_preshader(&peval->pres); if (shader) { TRACE("// Shader registers:\n"); dump_registers(&peval->shader_inputs); } } *peval_out = peval; TRACE("Created parameter evaluator %p.\n", *peval_out); return; err_out: FIXME("Error creating parameter evaluator.\n"); d3dx_free_param_eval(peval); *peval_out = NULL; } static void d3dx_free_const_tab(struct d3dx_const_tab *ctab) { HeapFree(GetProcessHeap(), 0, ctab->inputs); HeapFree(GetProcessHeap(), 0, ctab->inputs_param); HeapFree(GetProcessHeap(), 0, ctab->const_set); } static void d3dx_free_preshader(struct d3dx_preshader *pres) { HeapFree(GetProcessHeap(), 0, pres->ins); regstore_free_tables(&pres->regs); d3dx_free_const_tab(&pres->inputs); } void d3dx_free_param_eval(struct d3dx_param_eval *peval) { TRACE("peval %p.\n", peval); if (!peval) return; d3dx_free_preshader(&peval->pres); d3dx_free_const_tab(&peval->shader_inputs); HeapFree(GetProcessHeap(), 0, peval); } static void get_const_upload_info(struct d3dx_const_param_eval_output *const_set, struct const_upload_info *info) { struct d3dx_parameter *param = const_set->param; unsigned int table = const_set->table; info->transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS) || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS); if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS) { info->major = param->columns; info->minor = param->rows; } else { info->major = param->rows; info->minor = param->columns; } if (get_reg_components(table) == 1) { unsigned int const_length = get_offset_reg(table, const_set->register_count); info->major_stride = info->minor; info->major_count = const_length / info->major_stride; info->minor_remainder = const_length % info->major_stride; } else { info->major_stride = get_reg_components(table); info->major_count = const_set->register_count; info->minor_remainder = 0; } info->count = info->major_count * info->minor + info->minor_remainder; } static void pres_int_from_float(void *out, const void *in, unsigned int count) { unsigned int i; const float *in_float = in; int *out_int = out; for (i = 0; i < count; ++i) out_int[i] = in_float[i]; } static void pres_bool_from_value(void *out, const void *in, unsigned int count) { unsigned int i; const DWORD *in_dword = in; BOOL *out_bool = out; for (i = 0; i < count; ++i) out_bool[i] = !!in_dword[i]; } static void pres_float_from_int(void *out, const void *in, unsigned int count) { unsigned int i; const int *in_int = in; float *out_float = out; for (i = 0; i < count; ++i) out_float[i] = in_int[i]; } static void pres_float_from_bool(void *out, const void *in, unsigned int count) { unsigned int i; const BOOL *in_bool = in; float *out_float = out; for (i = 0; i < count; ++i) out_float[i] = !!in_bool[i]; } static void pres_int_from_bool(void *out, const void *in, unsigned int count) { unsigned int i; const float *in_bool = in; int *out_int = out; for (i = 0; i < count; ++i) out_int[i] = !!in_bool[i]; } static void regstore_set_data(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, const unsigned int *in, unsigned int count, enum pres_value_type param_type) { typedef void (*conv_func)(void *out, const void *in, unsigned int count); static const conv_func set_const_funcs[PRES_VT_COUNT][PRES_VT_COUNT] = { {NULL, NULL, pres_int_from_float, pres_bool_from_value}, {NULL, NULL, NULL, NULL}, {pres_float_from_int, NULL, NULL, pres_bool_from_value}, {pres_float_from_bool, NULL, pres_int_from_bool, NULL} }; enum pres_value_type table_type = table_info[table].type; if (param_type == table_type) { regstore_set_values(rs, table, in, offset, count); return; } set_const_funcs[param_type][table_type]((unsigned int *)rs->tables[table] + offset, in, count); regstore_set_modified(rs, table, offset, count); } static void set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab, ULONG64 new_update_version) { unsigned int const_idx; for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx) { struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx]; unsigned int table = const_set->table; struct d3dx_parameter *param = const_set->param; unsigned int element, i, j, start_offset; struct const_upload_info info; unsigned int *data = param->data; enum pres_value_type param_type; if (!is_param_dirty(param, const_tab->update_version)) continue; start_offset = get_offset_reg(table, const_set->register_index); if (const_set->direct_copy) { regstore_set_values(rs, table, data, start_offset, get_offset_reg(table, const_set->register_count)); continue; } param_type = table_type_from_param_type(param->type); if (const_set->constant_class == D3DXPC_SCALAR || const_set->constant_class == D3DXPC_VECTOR) { unsigned int count = max(param->rows, param->columns); if (count >= get_reg_components(table)) { regstore_set_data(rs, table, start_offset, data, count * const_set->element_count, param_type); } else { for (element = 0; element < const_set->element_count; ++element) regstore_set_data(rs, table, start_offset + get_offset_reg(table, element), &data[element * count], count, param_type); } continue; } get_const_upload_info(const_set, &info); for (element = 0; element < const_set->element_count; ++element) { unsigned int *out = (unsigned int *)rs->tables[table] + start_offset; /* Store reshaped but (possibly) not converted yet data temporarily in the same constants buffer. * All the supported types of parameters and table values have the same size. */ if (info.transpose) { for (i = 0; i < info.major_count; ++i) for (j = 0; j < info.minor; ++j) out[i * info.major_stride + j] = data[i + j * info.major]; for (j = 0; j < info.minor_remainder; ++j) out[i * info.major_stride + j] = data[i + j * info.major]; } else { for (i = 0; i < info.major_count; ++i) for (j = 0; j < info.minor; ++j) out[i * info.major_stride + j] = data[i * info.minor + j]; } start_offset += get_offset_reg(table, const_set->register_count); data += param->rows * param->columns; } start_offset = get_offset_reg(table, const_set->register_index); if (table_info[table].type == param_type) regstore_set_modified(rs, table, start_offset, get_offset_reg(table, const_set->register_count) * const_set->element_count); else regstore_set_data(rs, table, start_offset, (unsigned int *)rs->tables[table] + start_offset, get_offset_reg(table, const_set->register_count) * const_set->element_count, param_type); } const_tab->update_version = new_update_version; } #define INITIAL_CONST_SET_SIZE 16 static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set) { if (const_tab->const_set_count >= const_tab->const_set_size) { unsigned int new_size; struct d3dx_const_param_eval_output *new_alloc; if (!const_tab->const_set_size) { new_size = INITIAL_CONST_SET_SIZE; new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size); if (!new_alloc) { ERR("Out of memory.\n"); return E_OUTOFMEMORY; } } else { new_size = const_tab->const_set_size * 2; new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set, sizeof(*const_tab->const_set) * new_size); if (!new_alloc) { ERR("Out of memory.\n"); return E_OUTOFMEMORY; } } const_tab->const_set = new_alloc; const_tab->const_set_size = new_size; } const_tab->const_set[const_tab->const_set_count++] = *set; return D3D_OK; } static HRESULT merge_const_set_entries(struct d3dx_const_tab *const_tab, struct d3dx_parameter *param, unsigned int index) { unsigned int i, start_index = index; DWORD *current_data; enum pres_reg_tables current_table; unsigned int current_start_offset, element_count; struct d3dx_const_param_eval_output *first_const; if (!const_tab->const_set_count) return D3D_OK; while (index < const_tab->const_set_count - 1) { first_const = &const_tab->const_set[index]; current_data = first_const->param->data; current_table = first_const->table; current_start_offset = get_offset_reg(current_table, first_const->register_index); element_count = 0; for (i = index; i < const_tab->const_set_count; ++i) { struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[i]; unsigned int count = get_offset_reg(const_set->table, const_set->register_count * const_set->element_count); unsigned int start_offset = get_offset_reg(const_set->table, const_set->register_index); if (!(const_set->table == current_table && current_start_offset == start_offset && const_set->direct_copy == first_const->direct_copy && current_data == const_set->param->data && (const_set->direct_copy || (first_const->param->type == const_set->param->type && first_const->param->class == const_set->param->class && first_const->param->columns == const_set->param->columns && first_const->param->rows == const_set->param->rows && first_const->register_count == const_set->register_count && (i == const_tab->const_set_count - 1 || first_const->param->element_count == const_set->param->element_count))))) break; current_start_offset += count; current_data += const_set->direct_copy ? count : const_set->param->rows * const_set->param->columns * const_set->element_count; element_count += const_set->element_count; } if (i > index + 1) { TRACE("Merging %u child parameters for %s, not merging %u, direct_copy %#x.\n", i - index, debugstr_a(param->name), const_tab->const_set_count - i, first_const->direct_copy); first_const->element_count = element_count; if (first_const->direct_copy) { first_const->element_count = 1; if (index == start_index && !(param->type == D3DXPT_VOID && param->class == D3DXPC_STRUCT)) { if (table_type_from_param_type(param->type) == PRES_VT_COUNT) return D3DERR_INVALIDCALL; first_const->param = param; } first_const->register_count = get_reg_offset(current_table, current_start_offset) - first_const->register_index; } memmove(&const_tab->const_set[index + 1], &const_tab->const_set[i], sizeof(*const_tab->const_set) * (const_tab->const_set_count - i)); const_tab->const_set_count -= i - index - 1; } else { TRACE("Not merging %u child parameters for %s, direct_copy %#x.\n", const_tab->const_set_count - i, debugstr_a(param->name), first_const->direct_copy); } index = i; } return D3D_OK; } static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab, D3DXHANDLE hc, struct d3dx_parameter *param) { D3DXCONSTANT_DESC desc; unsigned int const_count, param_count, i; BOOL get_element; struct d3dx_const_param_eval_output const_set; struct const_upload_info info; enum pres_value_type table_type; HRESULT hr; if (FAILED(get_ctab_constant_desc(ctab, hc, &desc))) return D3DERR_INVALIDCALL; if (param->element_count) { param_count = param->element_count; const_count = desc.Elements; get_element = TRUE; } else { if (desc.Elements > 1) { FIXME("Unexpected number of constant elements %u.\n", desc.Elements); return D3DERR_INVALIDCALL; } param_count = param->member_count; const_count = desc.StructMembers; get_element = FALSE; } if (const_count != param_count) { FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n", param_count, const_count); return D3DERR_INVALIDCALL; } if (const_count) { HRESULT ret = D3D_OK; D3DXHANDLE hc_element; unsigned int index = const_tab->const_set_count; for (i = 0; i < const_count; ++i) { if (get_element) hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i); else hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i); if (!hc_element) { FIXME("Could not get constant.\n"); hr = D3DERR_INVALIDCALL; } else { hr = init_set_constants_param(const_tab, ctab, hc_element, ¶m->members[i]); } if (FAILED(hr)) ret = hr; } if (FAILED(ret)) return ret; return merge_const_set_entries(const_tab, param, index); } TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n", debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes); TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n", debugstr_a(param->name), param->rows, param->columns, param->class, param->flags, param->bytes); const_set.element_count = 1; const_set.param = param; const_set.constant_class = desc.Class; if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table)) { FIXME("Unknown register set %u.\n", desc.RegisterSet); return D3DERR_INVALIDCALL; } const_set.register_index = desc.RegisterIndex; const_set.table = const_tab->regset2table[desc.RegisterSet]; if (const_set.table >= PRES_REGTAB_COUNT) { ERR("Unexpected register set %u.\n", desc.RegisterSet); return D3DERR_INVALIDCALL; } assert(table_info[const_set.table].component_size == sizeof(unsigned int)); assert(param->bytes / (param->rows * param->columns) == sizeof(unsigned int)); const_set.register_count = desc.RegisterCount; table_type = table_info[const_set.table].type; get_const_upload_info(&const_set, &info); if (!info.count) { TRACE("%s has zero count, skipping.\n", debugstr_a(param->name)); return D3D_OK; } if (table_type_from_param_type(param->type) == PRES_VT_COUNT) return D3DERR_INVALIDCALL; const_set.direct_copy = table_type_from_param_type(param->type) == table_type && !info.transpose && info.minor == info.major_stride && info.count == get_offset_reg(const_set.table, const_set.register_count) && info.count * sizeof(unsigned int) <= param->bytes; if (info.minor_remainder && !const_set.direct_copy && !info.transpose) FIXME("Incomplete last row for not transposed matrix which cannot be directly copied, parameter %s.\n", debugstr_a(param->name)); if (info.major_count > info.major || (info.major_count == info.major && info.minor_remainder)) { WARN("Constant dimensions exceed parameter size.\n"); return D3DERR_INVALIDCALL; } if (FAILED(hr = append_const_set(const_tab, &const_set))) return hr; return D3D_OK; } static HRESULT init_set_constants(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab) { unsigned int i; HRESULT hr, ret; D3DXHANDLE hc; ret = D3D_OK; for (i = 0; i < const_tab->input_count; ++i) { if (!const_tab->inputs_param[i] || const_tab->inputs_param[i]->class == D3DXPC_OBJECT) continue; hc = ID3DXConstantTable_GetConstant(ctab, NULL, i); if (hc) { hr = init_set_constants_param(const_tab, ctab, hc, const_tab->inputs_param[i]); } else { FIXME("Could not get constant, index %u.\n", i); hr = D3DERR_INVALIDCALL; } if (FAILED(hr)) ret = hr; } if (const_tab->const_set_count) { const_tab->const_set = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set, sizeof(*const_tab->const_set) * const_tab->const_set_count); if (!const_tab->const_set) { ERR("Out of memory.\n"); return E_OUTOFMEMORY; } const_tab->const_set_size = const_tab->const_set_count; } return ret; } static double exec_get_reg_value(struct d3dx_regstore *rs, enum pres_reg_tables table, unsigned int offset) { if (!regstore_is_val_set_reg(rs, table, get_reg_offset(table, offset))) WARN("Using uninitialized input, table %u, offset %u.\n", table, offset); return regstore_get_double(rs, table, offset); } static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr, unsigned int comp) { unsigned int offset, base_index, reg_index, table; table = opr->reg.table; if (opr->index_reg.table == PRES_REGTAB_COUNT) base_index = 0; else base_index = lrint(exec_get_reg_value(rs, opr->index_reg.table, opr->index_reg.offset)); offset = get_offset_reg(table, base_index) + opr->reg.offset + comp; reg_index = get_reg_offset(table, offset); if (reg_index >= rs->table_sizes[table]) { unsigned int wrap_size; if (table == PRES_REGTAB_CONST) { /* As it can be guessed from tests, offset into floating constant table is wrapped * to the nearest power of 2 and not to the actual table size. */ for (wrap_size = 1; wrap_size < rs->table_sizes[table]; wrap_size <<= 1) ; } else { wrap_size = rs->table_sizes[table]; } WARN("Wrapping register index %u, table %u, wrap_size %u, table size %u.\n", reg_index, table, wrap_size, rs->table_sizes[table]); reg_index %= wrap_size; if (reg_index >= rs->table_sizes[table]) return 0.0; offset = get_offset_reg(table, reg_index) + offset % get_reg_components(table); } return exec_get_reg_value(rs, table, offset); } static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_reg *reg, unsigned int comp, double res) { regstore_set_double(rs, reg->table, reg->offset + comp, res); } #define ARGS_ARRAY_SIZE 8 static HRESULT execute_preshader(struct d3dx_preshader *pres) { unsigned int i, j, k; double args[ARGS_ARRAY_SIZE]; double res; for (i = 0; i < pres->ins_count; ++i) { const struct d3dx_pres_ins *ins; const struct op_info *oi; ins = &pres->ins[i]; oi = &pres_op_info[ins->op]; if (oi->func_all_comps) { if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE) { FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count); return E_FAIL; } for (k = 0; k < oi->input_count; ++k) for (j = 0; j < ins->component_count; ++j) args[k * ins->component_count + j] = exec_get_arg(&pres->regs, &ins->inputs[k], ins->scalar_op && !k ? 0 : j); res = oi->func(args, ins->component_count); /* only 'dot' instruction currently falls here */ exec_set_arg(&pres->regs, &ins->output.reg, 0, res); } else { for (j = 0; j < ins->component_count; ++j) { for (k = 0; k < oi->input_count; ++k) args[k] = exec_get_arg(&pres->regs, &ins->inputs[k], ins->scalar_op && !k ? 0 : j); res = oi->func(args, ins->component_count); exec_set_arg(&pres->regs, &ins->output.reg, j, res); } } } return D3D_OK; } static void set_preshader_modified(struct d3dx_preshader *pres) { unsigned int i; for (i = 0; i < pres->ins_count; ++i) { const struct d3dx_pres_ins *ins = &pres->ins[i]; const struct d3dx_pres_reg *reg = &ins->output.reg; if (reg->table == PRES_REGTAB_TEMP) continue; regstore_set_modified(&pres->regs, reg->table, reg->offset, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count); } } static BOOL is_const_tab_input_dirty(struct d3dx_const_tab *ctab, ULONG64 update_version) { unsigned int i; if (update_version == ULONG64_MAX) update_version = ctab->update_version; for (i = 0; i < ctab->input_count; ++i) { if (ctab->inputs_param[i] && is_param_dirty(ctab->inputs_param[i], update_version)) return TRUE; } return FALSE; } BOOL is_param_eval_input_dirty(struct d3dx_param_eval *peval, ULONG64 update_version) { return is_const_tab_input_dirty(&peval->pres.inputs, update_version) || is_const_tab_input_dirty(&peval->shader_inputs, update_version); } HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param, void *param_value) { HRESULT hr; unsigned int i; unsigned int elements, elements_param, elements_table; float *oc; TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value); if (is_const_tab_input_dirty(&peval->pres.inputs, ULONG64_MAX)) { set_constants(&peval->pres.regs, &peval->pres.inputs, next_update_version(peval->version_counter)); if (FAILED(hr = execute_preshader(&peval->pres))) return hr; } elements_table = get_offset_reg(PRES_REGTAB_OCONST, peval->pres.regs.table_sizes[PRES_REGTAB_OCONST]); elements_param = param->bytes / sizeof(unsigned int); elements = min(elements_table, elements_param); oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST]; for (i = 0; i < elements; ++i) set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT); return D3D_OK; } static HRESULT set_shader_constants_device(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, struct d3dx_regstore *rs, D3DXPARAMETER_TYPE type, enum pres_reg_tables table) { unsigned int start, count; void *ptr; HRESULT hr, result; result = D3D_OK; start = 0; while (start < rs->table_sizes[table]) { count = 0; while (start < rs->table_sizes[table] && !regstore_is_val_set_reg(rs, table, start)) ++start; while (start + count < rs->table_sizes[table] && regstore_is_val_set_reg(rs, table, start + count)) ++count; if (!count) break; TRACE("Setting %u constants at %u.\n", count, start); ptr = (BYTE *)rs->tables[table] + get_offset_reg(table, start) * table_info[table].component_size; if (type == D3DXPT_VERTEXSHADER) { switch(table) { case PRES_REGTAB_OCONST: hr = SET_D3D_STATE_(manager, device, SetVertexShaderConstantF, start, (const float *)ptr, count); break; case PRES_REGTAB_OICONST: hr = SET_D3D_STATE_(manager, device, SetVertexShaderConstantI, start, (const int *)ptr, count); break; case PRES_REGTAB_OBCONST: hr = SET_D3D_STATE_(manager, device, SetVertexShaderConstantB, start, (const BOOL *)ptr, count); break; default: FIXME("Unexpected register table %u.\n", table); return D3DERR_INVALIDCALL; } } else if (type == D3DXPT_PIXELSHADER) { switch(table) { case PRES_REGTAB_OCONST: hr = SET_D3D_STATE_(manager, device, SetPixelShaderConstantF, start, (const float *)ptr, count); break; case PRES_REGTAB_OICONST: hr = SET_D3D_STATE_(manager, device, SetPixelShaderConstantI, start, (const int *)ptr, count); break; case PRES_REGTAB_OBCONST: hr = SET_D3D_STATE_(manager, device, SetPixelShaderConstantB, start, (const BOOL *)ptr, count); break; default: FIXME("Unexpected register table %u.\n", table); return D3DERR_INVALIDCALL; } } else { FIXME("Unexpected parameter type %u.\n", type); return D3DERR_INVALIDCALL; } if (FAILED(hr)) { ERR("Setting constants failed, type %u, table %u, hr %#x.\n", type, table, hr); result = hr; } start += count; } regstore_reset_modified(rs, table); return result; } HRESULT d3dx_param_eval_set_shader_constants(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, struct d3dx_param_eval *peval, BOOL update_all) { static const enum pres_reg_tables set_tables[] = {PRES_REGTAB_OCONST, PRES_REGTAB_OICONST, PRES_REGTAB_OBCONST}; HRESULT hr, result; struct d3dx_preshader *pres = &peval->pres; struct d3dx_regstore *rs = &pres->regs; unsigned int i; ULONG64 new_update_version = next_update_version(peval->version_counter); BOOL update_device = update_all; TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type); if (is_const_tab_input_dirty(&pres->inputs, ULONG64_MAX)) { set_constants(rs, &pres->inputs, new_update_version); if (FAILED(hr = execute_preshader(pres))) return hr; update_device = TRUE; } if (is_const_tab_input_dirty(&peval->shader_inputs, ULONG64_MAX)) { set_constants(rs, &peval->shader_inputs, new_update_version); update_device = TRUE; } result = D3D_OK; if (update_device) { if (update_all) { for (i = 0; i < peval->shader_inputs.input_count; ++i) { unsigned int table; if (!peval->shader_inputs.inputs[i].RegisterCount) continue; table = peval->shader_inputs.regset2table[peval->shader_inputs.inputs[i].RegisterSet]; if (table < PRES_REGTAB_COUNT) regstore_set_modified_reg(rs, table, peval->shader_inputs.inputs[i].RegisterIndex, peval->shader_inputs.inputs[i].RegisterIndex + peval->shader_inputs.inputs[i].RegisterCount - 1); } set_preshader_modified(pres); } for (i = 0; i < ARRAY_SIZE(set_tables); ++i) { if (FAILED(hr = set_shader_constants_device(manager, device, rs, peval->param_type, set_tables[i]))) result = hr; } } return result; }